[ovs-dev] [PATCH ovn v5 5/5] northd: Flood ARPs to routers for "unreachable" addresses.

Mark Michelson mmichels at redhat.com
Tue Apr 20 15:56:18 UTC 2021


Previously, ARP TPAs were filtered down only to "reachable" addresses.
Reachable addresses are all router interface addresses, as well as NAT
external addresses and load balancer VIPs that are within the subnet
handled by a router's port.

However, it is possible that in some configurations, CMSes purposely
configure NAT or load balancer addresses on a router that are outside
the router's subnets, and they expect the router to respond to ARPs for
those addresses.

This commit adds a higher priority flow to logical switches that makes
it so ARPs targeted at "unreachable" addresses are flooded to all ports.
This way, the ARPs can reach the router appropriately and receive a
response.

Reported at: https://bugzilla.redhat.com/show_bug.cgi?id=1929901

Signed-off-by: Mark Michelson <mmichels at redhat.com>
---
 northd/ovn-northd.8.xml |   8 ++
 northd/ovn-northd.c     | 158 +++++++++++++++++++++++++++-------------
 northd/ovn_northd.dl    |  98 ++++++++++++++++++++-----
 3 files changed, 194 insertions(+), 70 deletions(-)

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index 8197aa513..ed8c44940 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1535,6 +1535,14 @@ output;
         logical ports.
       </li>
 
+      <li>
+        Priority-80 flows for each IP address/VIP/NAT address configured
+        outside its owning router port's subnet. These flows match ARP
+        requests and ND packets for the specific IP addresses.  Matched packets
+        are forwarded only to the <code>MC_FLOOD</code> multicast group which
+        contains all connected logical ports.
+      </li>
+
       <li>
         Priority-75 flows for each port connected to a logical router
         matching self originated ARP request/ND packets.  These packets
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index d36a3c29b..bfacb7464 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -6480,44 +6480,51 @@ build_lswitch_rport_arp_req_self_orig_flow(struct ovn_port *op,
     ds_destroy(&match);
 }
 
-/*
- * Ingress table 19: Flows that forward ARP/ND requests only to the routers
- * that own the addresses. Other ARP/ND packets are still flooded in the
- * switching domain as regular broadcast.
- */
 static void
-build_lswitch_rport_arp_req_flow_for_ip(struct sset *ips,
-                                        int addr_family,
-                                        struct ovn_port *patch_op,
-                                        struct ovn_datapath *od,
-                                        uint32_t priority,
-                                        struct hmap *lflows,
-                                        const struct ovsdb_idl_row *stage_hint)
+arp_nd_ns_match(struct sset *ips, int addr_family, struct ds *match)
 {
-    struct ds match   = DS_EMPTY_INITIALIZER;
-    struct ds actions = DS_EMPTY_INITIALIZER;
 
     /* Packets received from VXLAN tunnels have already been through the
      * router pipeline so we should skip them. Normally this is done by the
      * multicast_group implementation (VXLAN packets skip table 32 which
      * delivers to patch ports) but we're bypassing multicast_groups.
      */
-    ds_put_cstr(&match, FLAGBIT_NOT_VXLAN " && ");
+    ds_put_cstr(match, FLAGBIT_NOT_VXLAN " && ");
 
     if (addr_family == AF_INET) {
-        ds_put_cstr(&match, "arp.op == 1 && arp.tpa == { ");
+        ds_put_cstr(match, "arp.op == 1 && arp.tpa == { ");
     } else {
-        ds_put_cstr(&match, "nd_ns && nd.target == { ");
+        ds_put_cstr(match, "nd_ns && nd.target == { ");
     }
 
     const char *ip_address;
     SSET_FOR_EACH (ip_address, ips) {
-        ds_put_format(&match, "%s, ", ip_address);
+        ds_put_format(match, "%s, ", ip_address);
     }
 
-    ds_chomp(&match, ' ');
-    ds_chomp(&match, ',');
-    ds_put_cstr(&match, "}");
+    ds_chomp(match, ' ');
+    ds_chomp(match, ',');
+    ds_put_cstr(match, "}");
+}
+
+/*
+ * Ingress table 19: Flows that forward ARP/ND requests only to the routers
+ * that own the addresses. Other ARP/ND packets are still flooded in the
+ * switching domain as regular broadcast.
+ */
+static void
+build_lswitch_rport_arp_req_flow_for_reachable_ip(struct sset *ips,
+                                                  int addr_family,
+                                                  struct ovn_port *patch_op,
+                                                  struct ovn_datapath *od,
+                                                  uint32_t priority,
+                                                  struct hmap *lflows,
+                                                  const struct ovsdb_idl_row *stage_hint)
+{
+    struct ds match   = DS_EMPTY_INITIALIZER;
+    struct ds actions = DS_EMPTY_INITIALIZER;
+
+    arp_nd_ns_match(ips, addr_family, &match);
 
     /* Send a the packet to the router pipeline.  If the switch has non-router
      * ports then flood it there as well.
@@ -6540,6 +6547,32 @@ build_lswitch_rport_arp_req_flow_for_ip(struct sset *ips,
     ds_destroy(&actions);
 }
 
+/*
+ * Ingress table 19: Flows that forward ARP/ND requests for "unreachable" IPs
+ * (NAT or load balancer IPs configured on a router that are outside the router's
+ * configured subnets).
+ * These ARP/ND packets are flooded in the switching domain as regular broadcast.
+ */
+static void
+build_lswitch_rport_arp_req_flow_for_unreachable_ip(struct sset *ips,
+                                                    int addr_family,
+                                                    struct ovn_datapath *od,
+                                                    uint32_t priority,
+                                                    struct hmap *lflows,
+                                                    const struct ovsdb_idl_row *stage_hint)
+{
+    struct ds match = DS_EMPTY_INITIALIZER;
+
+    arp_nd_ns_match(ips, addr_family, &match);
+
+    ovn_lflow_add_unique_with_hint(lflows, od, S_SWITCH_IN_L2_LKUP,
+                                   priority, ds_cstr(&match),
+                                   "outport = \""MC_FLOOD"\"; output;",
+                                   stage_hint);
+
+    ds_destroy(&match);
+}
+
 /*
  * Ingress table 19: Flows that forward ARP/ND requests only to the routers
  * that own the addresses.
@@ -6566,39 +6599,48 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
      * router port.
      * Priority: 80.
      */
-    struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
-    struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
+    struct sset lb_ips_v4 = SSET_INITIALIZER(&lb_ips_v4);
+    struct sset lb_ips_v6 = SSET_INITIALIZER(&lb_ips_v6);
 
-    get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
+    get_router_load_balancer_ips(op->od, &lb_ips_v4, &lb_ips_v6);
+
+    struct sset reachable_ips_v4 = SSET_INITIALIZER(&reachable_ips_v4);
+    struct sset reachable_ips_v6 = SSET_INITIALIZER(&reachable_ips_v6);
+    struct sset unreachable_ips_v4 = SSET_INITIALIZER(&unreachable_ips_v4);
+    struct sset unreachable_ips_v6 = SSET_INITIALIZER(&unreachable_ips_v6);
 
     const char *ip_addr;
     const char *ip_addr_next;
-    SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v4) {
+    SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &lb_ips_v4) {
         ovs_be32 ipv4_addr;
 
         /* Check if the ovn port has a network configured on which we could
          * expect ARP requests for the LB VIP.
          */
-        if (ip_parse(ip_addr, &ipv4_addr) &&
-                lrouter_port_ipv4_reachable(op, ipv4_addr)) {
-            continue;
+        if (ip_parse(ip_addr, &ipv4_addr)) {
+            if (lrouter_port_ipv4_reachable(op, ipv4_addr)) {
+                sset_add(&reachable_ips_v4, ip_addr);
+            } else {
+                sset_add(&unreachable_ips_v4, ip_addr);
+            }
         }
-
-        sset_delete(&all_ips_v4, SSET_NODE_FROM_NAME(ip_addr));
     }
-    SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &all_ips_v6) {
+    SSET_FOR_EACH_SAFE (ip_addr, ip_addr_next, &lb_ips_v6) {
         struct in6_addr ipv6_addr;
 
         /* Check if the ovn port has a network configured on which we could
          * expect NS requests for the LB VIP.
          */
-        if (ipv6_parse(ip_addr, &ipv6_addr) &&
-                lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
-            continue;
+        if (ipv6_parse(ip_addr, &ipv6_addr)) {
+            if (lrouter_port_ipv6_reachable(op, &ipv6_addr)) {
+                sset_add(&reachable_ips_v6, ip_addr);
+            } else {
+                sset_add(&unreachable_ips_v6, ip_addr);
+            }
         }
-
-        sset_delete(&all_ips_v6, SSET_NODE_FROM_NAME(ip_addr));
     }
+    sset_destroy(&lb_ips_v4);
+    sset_destroy(&lb_ips_v6);
 
     for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
         struct ovn_nat *nat_entry = &op->od->nat_entries[i];
@@ -6619,37 +6661,53 @@ build_lswitch_rport_arp_req_flows(struct ovn_port *op,
             struct in6_addr *addr = &nat_entry->ext_addrs.ipv6_addrs[0].addr;
 
             if (lrouter_port_ipv6_reachable(op, addr)) {
-                sset_add(&all_ips_v6, nat->external_ip);
+                sset_add(&reachable_ips_v6, nat->external_ip);
+            } else {
+                sset_add(&unreachable_ips_v6, nat->external_ip);
             }
         } else {
             ovs_be32 addr = nat_entry->ext_addrs.ipv4_addrs[0].addr;
 
             if (lrouter_port_ipv4_reachable(op, addr)) {
-                sset_add(&all_ips_v4, nat->external_ip);
+                sset_add(&reachable_ips_v4, nat->external_ip);
+            } else {
+                sset_add(&unreachable_ips_v4, nat->external_ip);
             }
         }
     }
 
     for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-        sset_add(&all_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s);
+        sset_add(&reachable_ips_v4, op->lrp_networks.ipv4_addrs[i].addr_s);
     }
     for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-        sset_add(&all_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s);
+        sset_add(&reachable_ips_v6, op->lrp_networks.ipv6_addrs[i].addr_s);
     }
 
-    if (!sset_is_empty(&all_ips_v4)) {
-        build_lswitch_rport_arp_req_flow_for_ip(&all_ips_v4, AF_INET, sw_op,
-                                                sw_od, 80, lflows,
-                                                stage_hint);
+    if (!sset_is_empty(&reachable_ips_v4)) {
+        build_lswitch_rport_arp_req_flow_for_reachable_ip(&reachable_ips_v4, AF_INET, sw_op,
+                                                          sw_od, 80, lflows,
+                                                          stage_hint);
+    }
+    if (!sset_is_empty(&reachable_ips_v6)) {
+        build_lswitch_rport_arp_req_flow_for_reachable_ip(&reachable_ips_v6, AF_INET6, sw_op,
+                                                          sw_od, 80, lflows,
+                                                          stage_hint);
     }
-    if (!sset_is_empty(&all_ips_v6)) {
-        build_lswitch_rport_arp_req_flow_for_ip(&all_ips_v6, AF_INET6, sw_op,
-                                                sw_od, 80, lflows,
-                                                stage_hint);
+    if (!sset_is_empty(&unreachable_ips_v4)) {
+        build_lswitch_rport_arp_req_flow_for_unreachable_ip(&unreachable_ips_v4, AF_INET,
+                                                            sw_od, 90, lflows,
+                                                            stage_hint);
+    }
+    if (!sset_is_empty(&unreachable_ips_v6)) {
+        build_lswitch_rport_arp_req_flow_for_unreachable_ip(&unreachable_ips_v6, AF_INET6,
+                                                            sw_od, 90, lflows,
+                                                            stage_hint);
     }
 
-    sset_destroy(&all_ips_v4);
-    sset_destroy(&all_ips_v6);
+    sset_destroy(&reachable_ips_v4);
+    sset_destroy(&reachable_ips_v6);
+    sset_destroy(&unreachable_ips_v4);
+    sset_destroy(&unreachable_ips_v6);
 
     /* Self originated ARP requests/ND need to be flooded as usual.
      *
diff --git a/northd/ovn_northd.dl b/northd/ovn_northd.dl
index 089947516..612fcb3d3 100644
--- a/northd/ovn_northd.dl
+++ b/northd/ovn_northd.dl
@@ -4091,9 +4091,13 @@ UniqueFlow[Flow{.logical_datapath = sw.ls._uuid,
  * router port.
  * Priority: 80.
  */
-function get_arp_forward_ips(rp: Ref<RouterPort>): (Set<string>, Set<string>) = {
-    var all_ips_v4 = set_empty();
-    var all_ips_v6 = set_empty();
+function get_arp_forward_ips(rp: Ref<RouterPort>):
+    (Set<string>, Set<string>, Set<string>, Set<string>) =
+{
+    var reachable_ips_v4 = set_empty();
+    var reachable_ips_v6 = set_empty();
+    var unreachable_ips_v4 = set_empty();
+    var unreachable_ips_v6 = set_empty();
 
     (var lb_ips_v4, var lb_ips_v6)
         = get_router_load_balancer_ips(deref(rp.router));
@@ -4103,7 +4107,9 @@ function get_arp_forward_ips(rp: Ref<RouterPort>): (Set<string>, Set<string>) =
          */
         match (ip_parse(a)) {
             Some{ipv4} -> if (lrouter_port_ip_reachable(rp, IPv4{ipv4})) {
-                all_ips_v4.insert(a)
+                reachable_ips_v4.insert(a)
+            } else {
+                unreachable_ips_v4.insert(a)
             },
             _ -> ()
         }
@@ -4114,7 +4120,9 @@ function get_arp_forward_ips(rp: Ref<RouterPort>): (Set<string>, Set<string>) =
          */
         match (ipv6_parse(a)) {
             Some{ipv6} -> if (lrouter_port_ip_reachable(rp, IPv6{ipv6})) {
-                all_ips_v6.insert(a)
+                reachable_ips_v6.insert(a)
+            } else {
+                unreachable_ips_v6.insert(a)
             },
             _ -> ()
         }
@@ -4127,22 +4135,45 @@ function get_arp_forward_ips(rp: Ref<RouterPort>): (Set<string>, Set<string>) =
              */
             if (lrouter_port_ip_reachable(rp, nat.external_ip)) {
                 match (nat.external_ip) {
-                    IPv4{_} -> all_ips_v4.insert(nat.nat.external_ip),
-                    IPv6{_} -> all_ips_v6.insert(nat.nat.external_ip)
+                    IPv4{_} -> reachable_ips_v4.insert(nat.nat.external_ip),
+                    IPv6{_} -> reachable_ips_v6.insert(nat.nat.external_ip)
+                }
+            } else {
+                match (nat.external_ip) {
+                    IPv4{_} -> unreachable_ips_v4.insert(nat.nat.external_ip),
+                    IPv6{_} -> unreachable_ips_v6.insert(nat.nat.external_ip),
                 }
             }
         }
     };
 
     for (a in rp.networks.ipv4_addrs) {
-        all_ips_v4.insert("${a.addr}")
+        reachable_ips_v4.insert("${a.addr}")
     };
     for (a in rp.networks.ipv6_addrs) {
-        all_ips_v6.insert("${a.addr}")
+        reachable_ips_v6.insert("${a.addr}")
     };
 
-    (all_ips_v4, all_ips_v6)
+    (reachable_ips_v4, reachable_ips_v6, unreachable_ips_v4, unreachable_ips_v6)
 }
+
+relation &SwitchPortARPForwards(
+    port: Ref<SwitchPort>,
+    reachable_ips_v4: Set<string>,
+    reachable_ips_v6: Set<string>,
+    unreachable_ips_v4: Set<string>,
+    unreachable_ips_v6: Set<string>
+)
+
+&SwitchPortARPForwards(.port = port,
+                       .reachable_ips_v4 = reachable_ips_v4,
+                       .reachable_ips_v6 = reachable_ips_v6,
+                       .unreachable_ips_v4 = unreachable_ips_v4,
+                       .unreachable_ips_v6 = unreachable_ips_v6) :-
+    port in &SwitchPort(.peer = Some{rp}),
+    rp.is_enabled(),
+    (var reachable_ips_v4, var reachable_ips_v6, var unreachable_ips_v4, var unreachable_ips_v6) = get_arp_forward_ips(rp).
+
 /* Packets received from VXLAN tunnels have already been through the
  * router pipeline so we should skip them. Normally this is done by the
  * multicast_group implementation (VXLAN packets skip table 32 which
@@ -4154,7 +4185,7 @@ AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid,
                         .priority         = 80,
                         .__match          = fLAGBIT_NOT_VXLAN() ++
                                             " && arp.op == 1 && arp.tpa == { " ++
-                                            all_ips_v4.to_vec().join(", ") ++ "}",
+                                            ipv4.to_vec().join(", ") ++ "}",
                         .actions          = if (sw.has_non_router_port) {
                                                 "clone {outport = ${sp.json_name}; output; }; "
                                                 "outport = ${mc_flood_l2}; output;"
@@ -4163,17 +4194,17 @@ AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid,
                                             },
                         .external_ids     = stage_hint(sp.lsp._uuid)},
               .shared = not sw.has_non_router_port) :-
-    sp in &SwitchPort(.sw = sw, .peer = Some{rp}),
-    rp.is_enabled(),
-    (var all_ips_v4, _) = get_arp_forward_ips(rp),
-    not all_ips_v4.is_empty(),
+    sp in &SwitchPort(.sw = sw),
+    &SwitchPortARPForwards(.port = sp, .reachable_ips_v4 = ipv4),
+    not ipv4.is_empty(),
     var mc_flood_l2 = json_string_escape(mC_FLOOD_L2().0).
+
 AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid,
                         .stage            = s_SWITCH_IN_L2_LKUP(),
                         .priority         = 80,
                         .__match          = fLAGBIT_NOT_VXLAN() ++
                                             " && nd_ns && nd.target == { " ++
-                                            all_ips_v6.to_vec().join(", ") ++ "}",
+                                            ipv6.to_vec().join(", ") ++ "}",
                         .actions          = if (sw.has_non_router_port) {
                                                 "clone {outport = ${sp.json_name}; output; }; "
                                                 "outport = ${mc_flood_l2}; output;"
@@ -4182,12 +4213,39 @@ AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid,
                                             },
                         .external_ids     = stage_hint(sp.lsp._uuid)},
               .shared = not sw.has_non_router_port) :-
-    sp in &SwitchPort(.sw = sw, .peer = Some{rp}),
-    rp.is_enabled(),
-    (_, var all_ips_v6) = get_arp_forward_ips(rp),
-    not all_ips_v6.is_empty(),
+    sp in &SwitchPort(.sw = sw),
+    &SwitchPortARPForwards(.port = sp, .reachable_ips_v6 = ipv6),
+    not ipv6.is_empty(),
     var mc_flood_l2 = json_string_escape(mC_FLOOD_L2().0).
 
+AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid,
+                        .stage            = s_SWITCH_IN_L2_LKUP(),
+                        .priority         = 90,
+                        .__match          = fLAGBIT_NOT_VXLAN() ++
+                                            " && arp.op == 1 && arp.tpa == { " ++
+                                            ipv4.to_vec().join(", ") ++ "}",
+                        .actions          = "outport = ${flood}; output;",
+                        .external_ids     = stage_hint(sp.lsp._uuid)},
+              .shared = not sw.has_non_router_port) :-
+    sp in &SwitchPort(.sw = sw),
+    &SwitchPortARPForwards(.port = sp, .unreachable_ips_v4 = ipv4),
+    not ipv4.is_empty(),
+    var flood = json_string_escape(mC_FLOOD().0).
+
+AnnotatedFlow(.f = Flow{.logical_datapath = sw.ls._uuid,
+                        .stage            = s_SWITCH_IN_L2_LKUP(),
+                        .priority         = 90,
+                        .__match          = fLAGBIT_NOT_VXLAN() ++
+                                            " && nd_ns && nd.target == { " ++
+                                            ipv6.to_vec().join(", ") ++ "}",
+                        .actions          = "outport = ${flood}; output;",
+                        .external_ids     = stage_hint(sp.lsp._uuid)},
+              .shared = not sw.has_non_router_port) :-
+    sp in &SwitchPort(.sw = sw),
+    &SwitchPortARPForwards(.port = sp, .unreachable_ips_v6 = ipv6),
+    not ipv6.is_empty(),
+    var flood = json_string_escape(mC_FLOOD().0).
+
 for (SwitchPortNewDynamicAddress(.port = &SwitchPort{.lsp = lsp, .json_name = json_name, .sw = &sw},
                                  .address = Some{addrs})
      if lsp.__type != "external") {
-- 
2.29.2



More information about the dev mailing list