[ovs-dev] [PATCH ovn] ovn-northd: Fix multiple ARP replies for SNAT entries configured on a distributed router.

numans at ovn.org numans at ovn.org
Mon Sep 14 09:20:53 UTC 2020


From: Numan Siddique <numans at ovn.org>

The commit in the Fixes tag, while addressing the issue to send ARP replies for
the SNAT entries, didn't take into account the gateway router port scenario.
Because of this, all the chassis which have bridge mappings configured reply
to ARP request for SNAT entries. This patch fixes the issue by adding
"is_chassis_resident()" condition for such flows so that only the gateway chassis
which claims the gateway router port responds to the ARP request.

Note: This patch doesn't require any changes to ovn-northd.8.xml as it was already
documented with the desired flows for SNAT entries.

Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050679.html
Reported-by: Chris <kklimonda at syntaxhighlighted.com>
Fixes: e2aa124ff7c2("ovn-northd: Add ARP responder flows for SNAT entries.")
Signed-off-by: Numan Siddique <numans at ovn.org>
---
 northd/ovn-northd.c |  18 ++-
 tests/ovn-northd.at |   8 +-
 tests/ovn.at        | 260 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 280 insertions(+), 6 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index f394f17c3..2569212c7 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -8856,7 +8856,11 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                              ext_addrs->ipv4_addrs[0].addr_s;
 
             if (!strcmp(nat->type, "snat")) {
-                if (sset_contains(&snat_ips, ext_addr)) {
+                /* If its a router with distributed gateway port then don't
+                 * add the ARP flow for SNAT entries. Otherwise all the
+                 * chassis (which has ovn-bridge-mappings) configured will
+                 * reply for the ARPrequest to the snat external_ip. */
+                if (od->l3dgw_port || sset_contains(&snat_ips, ext_addr)) {
                     continue;
                 }
                 sset_add(&snat_ips, ext_addr);
@@ -9237,6 +9241,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
             continue;
         }
 
+        struct sset sset_snat_ips = SSET_INITIALIZER(&sset_snat_ips);
         for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
             struct ovn_nat *nat_entry = &op->od->nat_entries[i];
             const struct nbrec_nat *nat = nat_entry->nb;
@@ -9246,8 +9251,15 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                 continue;
             }
 
+            struct lport_addresses *ext_addrs = &nat_entry->ext_addrs;
+            char *ext_addr = nat_entry_is_v6(nat_entry) ?
+                             ext_addrs->ipv6_addrs[0].addr_s :
+                             ext_addrs->ipv4_addrs[0].addr_s;
             if (!strcmp(nat->type, "snat")) {
-                continue;
+                if (sset_contains(&sset_snat_ips, ext_addr)) {
+                    continue;
+                }
+                sset_add(&sset_snat_ips, ext_addr);
             }
 
             /* Mac address to use when replying to ARP/NS. */
@@ -9289,7 +9301,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
             /* Respond to ARP/NS requests on the chassis that binds the gw
              * port. Drop the ARP/NS requests on other chassis.
              */
-            struct lport_addresses *ext_addrs = &nat_entry->ext_addrs;
             if (nat_entry_is_v6(nat_entry)) {
                 build_lrouter_nd_flow(op->od, op, "nd_na",
                                       ext_addrs->ipv6_addrs[0].addr_s,
@@ -9312,6 +9323,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
                                        &nat->header_, lflows);
             }
         }
+        sset_destroy(&sset_snat_ips);
     }
 
     /* DHCPv6 reply handling */
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 5f531f5a6..4fe788e89 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -1863,9 +1863,6 @@ action=(xreg0[[0..47]] = 00:00:00:00:01:00; next;)
 # Priority 90 flows (per router).
 AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sort], [0], [dnl
   table=3 (lr_in_ip_input     ), priority=90   , dnl
-match=(arp.op == 1 && arp.tpa == 43.43.43.150), dnl
-action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.150; outport = inport; flags.loopback = 1; output;)
-  table=3 (lr_in_ip_input     ), priority=90   , dnl
 match=(arp.op == 1 && arp.tpa == 43.43.43.2), dnl
 action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.2; outport = inport; flags.loopback = 1; output;)
   table=3 (lr_in_ip_input     ), priority=90   , dnl
@@ -1891,6 +1888,8 @@ action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:100
 # Priority 91 drop flows (per distributed gw port), if port is not resident.
 AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=91" | grep "arp\|nd" | sort], [0], [dnl
   table=3 (lr_in_ip_input     ), priority=91   , dnl
+match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.150), action=(drop;)
+  table=3 (lr_in_ip_input     ), priority=91   , dnl
 match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.2), dnl
 action=(drop;)
   table=3 (lr_in_ip_input     ), priority=91   , dnl
@@ -1904,6 +1903,9 @@ action=(drop;)
 # Priority 92 ARP/NS responders (per distributed gw port), if port is resident.
 AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=92" | grep "arp\|nd" | sort], [0], [dnl
   table=3 (lr_in_ip_input     ), priority=92   , dnl
+match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.150 && is_chassis_resident("cr-lrp-public")), dnl
+action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.150; outport = inport; flags.loopback = 1; output;)
+  table=3 (lr_in_ip_input     ), priority=92   , dnl
 match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.2 && is_chassis_resident("cr-lrp-public")), dnl
 action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.2; outport = inport; flags.loopback = 1; output;)
   table=3 (lr_in_ip_input     ), priority=92   , dnl
diff --git a/tests/ovn.at b/tests/ovn.at
index 5aef4b082..41fe577ff 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -21738,6 +21738,266 @@ done
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 
+AT_SETUP([ovn -- ARP replies for SNAT external ips])
+ovn_start
+
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+ovs-vsctl -- add-port br-int hv1-vif1 -- \
+    set interface hv1-vif1 external-ids:iface-id=sw0-port1 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap \
+    ofport-request=1
+ovs-vsctl -- add-port br-int hv1-vif2 -- \
+    set interface hv1-vif2 external-ids:iface-id=sw3-port1 \
+    options:tx_pcap=hv1/vif2-tx.pcap \
+    options:rxq_pcap=hv1/vif2-rx.pcap \
+    ofport-request=2
+AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys])
+ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false
+
+sim_add hv2
+as hv2
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.2
+ovs-vsctl -- add-port br-int hv2-vif1 -- \
+    set interface hv2-vif1 external-ids:iface-id=sw0-port2 \
+    options:tx_pcap=hv2/vif1-tx.pcap \
+    options:rxq_pcap=hv2/vif1-rx.pcap \
+    ofport-request=1
+AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys])
+ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false
+
+sim_add hv3
+as hv3
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.3
+ovs-vsctl -- add-port br-int hv3-vif1 -- \
+    set interface hv3-vif1 external-ids:iface-id=sw1-port1 \
+    options:tx_pcap=hv3/vif1-tx.pcap \
+    options:rxq_pcap=hv3/vif1-rx.pcap \
+    ofport-request=1
+AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys])
+ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false
+
+ovn-nbctl ls-add sw0
+ovn-nbctl lsp-add sw0 sw0-port1
+ovn-nbctl lsp-set-addresses sw0-port1 "10:54:00:00:00:03 10.0.0.3 1000::3"
+ovn-nbctl lsp-add sw0 sw0-port2
+ovn-nbctl lsp-set-addresses sw0-port2 "10:54:00:00:00:04 10.0.0.4 1000::4"
+
+ovn-nbctl ls-add sw1
+ovn-nbctl lsp-add sw1 sw1-port1
+ovn-nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
+
+ovn-nbctl lr-add lr0
+ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
+ovn-nbctl lsp-add sw0 sw0-lr0
+ovn-nbctl lsp-set-type sw0-lr0 router
+ovn-nbctl lsp-set-addresses sw0-lr0 router
+ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
+
+ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
+ovn-nbctl lsp-add sw1 sw1-lr0
+ovn-nbctl lsp-set-type sw1-lr0 router
+ovn-nbctl lsp-set-addresses sw1-lr0 router
+ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
+
+ovn-nbctl ls-add public
+ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.16.0.100/24 3000::a/64
+ovn-nbctl lsp-add public public-lr0
+ovn-nbctl lsp-set-type public-lr0 router
+ovn-nbctl lsp-set-addresses public-lr0 router
+ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
+
+# localnet port
+ovn-nbctl lsp-add public ln-public
+ovn-nbctl lsp-set-type ln-public localnet
+ovn-nbctl lsp-set-addresses ln-public unknown
+ovn-nbctl lsp-set-options ln-public network_name=physnet1
+
+# schedule the gw router port to a chassis.
+ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20
+
+# Create NAT entries for the ports
+
+# sw0-port1
+ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.16.0.110 10.0.0.3 sw0-port1 30:54:00:00:00:03
+ovn-nbctl lr-nat-add lr0 dnat_and_snat 3000::c 1000::3 sw0-port1 40:54:00:00:00:03
+# sw1-port1
+ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.16.0.120 20.0.0.3 sw1-port1 30:54:00:00:00:04
+ovn-nbctl lr-nat-add lr0 dnat_and_snat 3000::d 2000::3 sw1-port1 40:54:00:00:00:04
+
+# Add snat entriess
+ovn-nbctl lr-nat-add lr0 snat 172.16.0.100 10.0.0.0/24
+ovn-nbctl lr-nat-add lr0 snat 172.16.0.101 10.0.0.10
+ovn-nbctl lr-nat-add lr0 snat 172.16.0.102 10.0.0.11
+ovn-nbctl lr-nat-add lr0 snat 172.16.0.100 20.0.0.0/24
+
+ovn-nbctl ls-add sw3
+ovn-nbctl lsp-add sw3 sw3-port1
+ovn-nbctl lsp-set-addresses sw3-port1 "20:14:00:00:00:03 30.0.0.3 3000::3"
+
+ovn-nbctl lr-add lr1
+ovn-nbctl lrp-add lr1 lr1-sw3 00:00:00:10:ff:03 30.0.0.1/24 3000::a/64
+ovn-nbctl lsp-add sw3 sw3-lr1
+ovn-nbctl lsp-set-type sw3-lr1 router
+ovn-nbctl lsp-set-addresses sw3-lr1 router
+ovn-nbctl lsp-set-options sw3-lr1 router-port=lr1-sw3
+
+ovn-nbctl ls-add join
+
+# Connect lr1 to join
+ovn-nbctl lrp-add lr1 lr1-join 00:00:04:01:02:03 170.0.0.1/24
+ovn-nbctl lsp-add join join-lr1
+ovn-nbctl lsp-set-type join-lr1 router
+ovn-nbctl lsp-set-addresses join-lr1 router
+ovn-nbctl lsp-set-options join-lr1 router-port=lr1-join
+
+# Create GW router
+ovn-nbctl lr-add gw_router
+# connect gw_router to join
+ovn-nbctl lrp-add gw_router gw_router-join 00:00:03:11:12:13 170.0.0.2/24
+ovn-nbctl lsp-add join join-gw_router
+ovn-nbctl lsp-set-type join-gw_router router
+ovn-nbctl lsp-set-addresses join-gw_router router
+ovn-nbctl lsp-set-options join-gw_router router-port=gw_router-join
+
+# Connect gw_router to public
+ovn-nbctl lrp-add gw_router gw_router-public 00:00:30:30:32:33 172.16.0.200/24 3000::b/64
+ovn-nbctl lsp-add public public-gw_router
+ovn-nbctl lsp-set-type public-gw_router router
+ovn-nbctl lsp-set-addresses public-gw_router router
+ovn-nbctl lsp-set-options public-gw_router router-port=gw_router-public
+
+# Pin gw_router to hv3
+ovn-nbctl set logical_router gw_router options:chassis=hv3
+
+# Add snat entries on gw_router
+ovn-nbctl lr-nat-add gw_router snat 172.16.0.200 30.0.0.0/24
+ovn-nbctl lr-nat-add gw_router snat 172.16.0.201 30.0.0.3
+
+ovn-nbctl --wait=hv sync
+
+# Create an interface in br-phys in hv2 and send ARP request for 172.16.0.100
+as hv2
+ovs-vsctl -- add-port br-phys hv2-phys1 -- \
+    set interface hv2-phys1 options:tx_pcap=hv2/phys1-tx.pcap \
+    options:rxq_pcap=hv2/phys1-rx.pcap \
+    ofport-request=1
+
+reset_pcap_file() {
+    local iface=$1
+    local pcap_file=$2
+    ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \
+options:rxq_pcap=dummy-rx.pcap
+    rm -f ${pcap_file}*.pcap
+    ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \
+options:rxq_pcap=${pcap_file}-rx.pcap
+}
+
+ip_to_hex() {
+    printf "%02x%02x%02x%02x" "$@"
+}
+
+send_arp_request() {
+    local eth_src=$1 spa=$2 tpa=$3
+    local eth_dst=ffffffffffff
+    local eth_type=0806
+    local eth=${eth_dst}${eth_src}${eth_type}
+
+    local arp=0001080006040001${eth_src}${spa}${eth_dst}${tpa}
+
+    local request=${eth}${arp}
+    as hv2 ovs-appctl netdev-dummy/receive hv2-phys1 $request
+}
+
+test_arp_response () {
+    local router_mac=$1 router_ip=$2 gw=$3 nongw1=$4 nongw2=$5
+
+    echo "Checking arp reply for IP - $router_ip"
+    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
+    as hv2 reset_pcap_file br-phys_n1 hv2/br-phys_n1
+    as hv3 reset_pcap_file br-phys_n1 hv3/br-phys_n1
+    as hv2 reset_pcap_file hv2-phys1 hv2/phys1
+
+    local src_mac=000200100011
+    src_ip=$(ip_to_hex 172 16 0 40)
+    send_arp_request ${src_mac} ${src_ip} ${router_ip}
+    arp_reply=${src_mac}${router_mac}08060001080006040002${router_mac}
+    arp_reply=${arp_reply}${router_ip}${src_mac}${src_ip}
+
+    OVS_WAIT_UNTIL([
+        test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/phys1-tx.pcap | wc -l) -ge 1
+    ])
+
+    AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/phys1-tx.pcap | \
+    grep -c $arp_reply], [0], [1
+])
+
+    # $gw phys1-n1 should see the response because $gw ovn-controller responds
+    # to arp request.
+    AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $gw/br-phys_n1-tx.pcap | \
+    grep -c $arp_reply], [0], [1
+])
+
+    # $nongw1 and $nongw1 phys1-n1 should not see the response.
+    $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $nongw1/br-phys_n1-tx.pcap
+    AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $nongw1/br-phys_n1-tx.pcap | \
+    grep -c $arp_reply], [1], [0
+])
+
+    AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $nongw2/br-phys_n1-tx.pcap | \
+    grep -c $arp_reply], [1], [0
+])
+}
+
+# Send ARP request for the IPs which belongs to lr0 having
+# distributed gw router port - lr0-public.
+test_arp_response 000020201213 $(ip_to_hex 172 16 0 100) hv1 hv2 hv3
+test_arp_response 000020201213 $(ip_to_hex 172 16 0 101) hv1 hv2 hv3
+test_arp_response 000020201213 $(ip_to_hex 172 16 0 102) hv1 hv2 hv3
+
+# Send ARP request for the IP which belongs to gw_router
+test_arp_response 000030303233 $(ip_to_hex 172 16 0 200) hv3 hv1 hv2
+test_arp_response 000030303233 $(ip_to_hex 172 16 0 201) hv3 hv1 hv2
+
+# Make hv3 claim the cr-lr0-public
+ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20
+ovn-nbctl lrp-set-gateway-chassis lr0-public hv2 30
+ovn-nbctl lrp-set-gateway-chassis lr0-public hv3 40
+
+hv3_uuid=$(ovn-sbctl --bare --columns _uuid list chassis hv3)
+
+OVS_WAIT_UNTIL([
+    cr_lr0_public_ch=$(ovn-sbctl --bare --columns chassis list port_binding cr-lr0-public)
+    test $cr_lr0_public_ch = $hv3_uuid
+])
+
+test_arp_response 000020201213 $(ip_to_hex 172 16 0 100) hv3 hv1 hv2
+test_arp_response 000020201213 $(ip_to_hex 172 16 0 101) hv3 hv1 hv2
+test_arp_response 000020201213 $(ip_to_hex 172 16 0 102) hv3 hv1 hv2
+
+# Schedule gw_router on hv1.
+ovn-nbctl set logical_router gw_router options:chassis=hv1
+hv1_uuid=$(ovn-sbctl --bare --columns _uuid list chassis hv1)
+
+OVS_WAIT_UNTIL([
+    gw_router_ch=$(ovn-sbctl --bare --columns chassis list port_binding gw_router-public)
+    test $gw_router_ch = $hv1_uuid
+])
+
+# Send ARP request for the IP which belongs to gw_router
+test_arp_response 000030303233 $(ip_to_hex 172 16 0 200) hv1 hv2 hv3
+test_arp_response 000030303233 $(ip_to_hex 172 16 0 201) hv1 hv2 hv3
+
+OVN_CLEANUP([hv1],[hv2],[hv3])
+AT_CLEANUP
+
 # Duplicate ACLs (same match with same action) should work as expected.
 # Conflict ACLs (same match with different actions) behavior is unpredictable
 # (only one of them would work).
-- 
2.26.2



More information about the dev mailing list