[ovs-dev] [PATCH ovn] ovn-northd: Fix multiple ARP replies for SNAT entries configured on a distributed router.

Dumitru Ceara dceara at redhat.com
Mon Sep 14 09:47:18 UTC 2020


On 9/14/20 11:20 AM, numans at ovn.org wrote:
> From: Numan Siddique <numans at ovn.org>
> 
> The commit in the Fixes tag, while addressing the issue to send ARP replies for
> the SNAT entries, didn't take into account the gateway router port scenario.
> Because of this, all the chassis which have bridge mappings configured reply
> to ARP request for SNAT entries. This patch fixes the issue by adding
> "is_chassis_resident()" condition for such flows so that only the gateway chassis
> which claims the gateway router port responds to the ARP request.
> 
> Note: This patch doesn't require any changes to ovn-northd.8.xml as it was already
> documented with the desired flows for SNAT entries.
> 
> Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-September/050679.html
> Reported-by: Chris <kklimonda at syntaxhighlighted.com>
> Fixes: e2aa124ff7c2("ovn-northd: Add ARP responder flows for SNAT entries.")
> Signed-off-by: Numan Siddique <numans at ovn.org>
> ---

Hi Numan,

Please see my comments below.

>  northd/ovn-northd.c |  18 ++-
>  tests/ovn-northd.at |   8 +-
>  tests/ovn.at        | 260 ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 280 insertions(+), 6 deletions(-)
> 
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index f394f17c3..2569212c7 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -8856,7 +8856,11 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>                               ext_addrs->ipv4_addrs[0].addr_s;
>  
>              if (!strcmp(nat->type, "snat")) {
> -                if (sset_contains(&snat_ips, ext_addr)) {
> +                /* If its a router with distributed gateway port then don't
> +                 * add the ARP flow for SNAT entries. Otherwise all the
> +                 * chassis (which has ovn-bridge-mappings) configured will

Nit: s/has/have

> +                 * reply for the ARPrequest to the snat external_ip. */

Nit: s/ARPrequest/ARP request

> +                if (od->l3dgw_port || sset_contains(&snat_ips, ext_addr)) {
>                      continue;
>                  }
>                  sset_add(&snat_ips, ext_addr);

This can be rewritten to avoid additional hmap operations:

if (od->l3dgw_port || !sset_add(&snat_ips, ext_addr)) {
    continue;
}

> @@ -9237,6 +9241,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>              continue;
>          }
>  
> +        struct sset sset_snat_ips = SSET_INITIALIZER(&sset_snat_ips);
>          for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
>              struct ovn_nat *nat_entry = &op->od->nat_entries[i];
>              const struct nbrec_nat *nat = nat_entry->nb;
> @@ -9246,8 +9251,15 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>                  continue;
>              }
>  
> +            struct lport_addresses *ext_addrs = &nat_entry->ext_addrs;
> +            char *ext_addr = nat_entry_is_v6(nat_entry) ?
> +                             ext_addrs->ipv6_addrs[0].addr_s :
> +                             ext_addrs->ipv4_addrs[0].addr_s;

As Ilya pointed out on a different patch, coding style[1] says:
"Break long lines before the ternary operators ? and :, rather than
after them". This should be:

            char *ext_addr = (nat_entry_is_v6(nat_entry)
                              ? ext_addrs->ipv6_addrs[0].addr_s
                              : ext_addrs->ipv4_addrs[0].addr_s);

[1]
https://docs.ovn.org/en/latest/internals/contributing/coding-style.html#expressions

>              if (!strcmp(nat->type, "snat")) {
> -                continue;
> +                if (sset_contains(&sset_snat_ips, ext_addr)) {
> +                    continue;
> +                }
> +                sset_add(&sset_snat_ips, ext_addr);

To avoid additional hmap operations this can be:

if (!sset_add(&sset_snat_ips, ext_addr)) {
    continue;
}

Also, we walk the nat entries twice and the snat_ips once for each
logical router port IP. Why don't we build the snat_ips set earlier and
use it in all cases, e.g., here, instead of an array we could build the
snat_ips set:

https://github.com/ovn-org/ovn/blob/64cc065e2c59c0696edeef738180989d993ceceb/northd/ovn-northd.c#L9116

Thanks,
Dumitru

>              }
>  
>              /* Mac address to use when replying to ARP/NS. */
> @@ -9289,7 +9301,6 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>              /* Respond to ARP/NS requests on the chassis that binds the gw
>               * port. Drop the ARP/NS requests on other chassis.
>               */
> -            struct lport_addresses *ext_addrs = &nat_entry->ext_addrs;
>              if (nat_entry_is_v6(nat_entry)) {
>                  build_lrouter_nd_flow(op->od, op, "nd_na",
>                                        ext_addrs->ipv6_addrs[0].addr_s,
> @@ -9312,6 +9323,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
>                                         &nat->header_, lflows);
>              }
>          }
> +        sset_destroy(&sset_snat_ips);
>      }
>  
>      /* DHCPv6 reply handling */
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 5f531f5a6..4fe788e89 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -1863,9 +1863,6 @@ action=(xreg0[[0..47]] = 00:00:00:00:01:00; next;)
>  # Priority 90 flows (per router).
>  AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=90" | grep "arp\|nd" | sort], [0], [dnl
>    table=3 (lr_in_ip_input     ), priority=90   , dnl
> -match=(arp.op == 1 && arp.tpa == 43.43.43.150), dnl
> -action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.150; outport = inport; flags.loopback = 1; output;)
> -  table=3 (lr_in_ip_input     ), priority=90   , dnl
>  match=(arp.op == 1 && arp.tpa == 43.43.43.2), dnl
>  action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.2; outport = inport; flags.loopback = 1; output;)
>    table=3 (lr_in_ip_input     ), priority=90   , dnl
> @@ -1891,6 +1888,8 @@ action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = fe80::200:ff:fe00:100
>  # Priority 91 drop flows (per distributed gw port), if port is not resident.
>  AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=91" | grep "arp\|nd" | sort], [0], [dnl
>    table=3 (lr_in_ip_input     ), priority=91   , dnl
> +match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.150), action=(drop;)
> +  table=3 (lr_in_ip_input     ), priority=91   , dnl
>  match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.2), dnl
>  action=(drop;)
>    table=3 (lr_in_ip_input     ), priority=91   , dnl
> @@ -1904,6 +1903,9 @@ action=(drop;)
>  # Priority 92 ARP/NS responders (per distributed gw port), if port is resident.
>  AT_CHECK([ovn-sbctl lflow-list | grep -E "lr_in_ip_input.*priority=92" | grep "arp\|nd" | sort], [0], [dnl
>    table=3 (lr_in_ip_input     ), priority=92   , dnl
> +match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.150 && is_chassis_resident("cr-lrp-public")), dnl
> +action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.150; outport = inport; flags.loopback = 1; output;)
> +  table=3 (lr_in_ip_input     ), priority=92   , dnl
>  match=(inport == "lrp-public" && arp.op == 1 && arp.tpa == 43.43.43.2 && is_chassis_resident("cr-lrp-public")), dnl
>  action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa = arp.spa; arp.spa = 43.43.43.2; outport = inport; flags.loopback = 1; output;)
>    table=3 (lr_in_ip_input     ), priority=92   , dnl
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 5aef4b082..41fe577ff 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -21738,6 +21738,266 @@ done
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
>  
> +AT_SETUP([ovn -- ARP replies for SNAT external ips])
> +ovn_start
> +
> +net_add n1
> +
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.1
> +ovs-vsctl -- add-port br-int hv1-vif1 -- \
> +    set interface hv1-vif1 external-ids:iface-id=sw0-port1 \
> +    options:tx_pcap=hv1/vif1-tx.pcap \
> +    options:rxq_pcap=hv1/vif1-rx.pcap \
> +    ofport-request=1
> +ovs-vsctl -- add-port br-int hv1-vif2 -- \
> +    set interface hv1-vif2 external-ids:iface-id=sw3-port1 \
> +    options:tx_pcap=hv1/vif2-tx.pcap \
> +    options:rxq_pcap=hv1/vif2-rx.pcap \
> +    ofport-request=2
> +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys])
> +ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false
> +
> +sim_add hv2
> +as hv2
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.2
> +ovs-vsctl -- add-port br-int hv2-vif1 -- \
> +    set interface hv2-vif1 external-ids:iface-id=sw0-port2 \
> +    options:tx_pcap=hv2/vif1-tx.pcap \
> +    options:rxq_pcap=hv2/vif1-rx.pcap \
> +    ofport-request=1
> +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys])
> +ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false
> +
> +sim_add hv3
> +as hv3
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.3
> +ovs-vsctl -- add-port br-int hv3-vif1 -- \
> +    set interface hv3-vif1 external-ids:iface-id=sw1-port1 \
> +    options:tx_pcap=hv3/vif1-tx.pcap \
> +    options:rxq_pcap=hv3/vif1-rx.pcap \
> +    ofport-request=1
> +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-phys])
> +ovs-vsctl set open . external_ids:ovn-enable-lflow-cache=false
> +
> +ovn-nbctl ls-add sw0
> +ovn-nbctl lsp-add sw0 sw0-port1
> +ovn-nbctl lsp-set-addresses sw0-port1 "10:54:00:00:00:03 10.0.0.3 1000::3"
> +ovn-nbctl lsp-add sw0 sw0-port2
> +ovn-nbctl lsp-set-addresses sw0-port2 "10:54:00:00:00:04 10.0.0.4 1000::4"
> +
> +ovn-nbctl ls-add sw1
> +ovn-nbctl lsp-add sw1 sw1-port1
> +ovn-nbctl lsp-set-addresses sw1-port1 "40:54:00:00:00:03 20.0.0.3 2000::3"
> +
> +ovn-nbctl lr-add lr0
> +ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::a/64
> +ovn-nbctl lsp-add sw0 sw0-lr0
> +ovn-nbctl lsp-set-type sw0-lr0 router
> +ovn-nbctl lsp-set-addresses sw0-lr0 router
> +ovn-nbctl lsp-set-options sw0-lr0 router-port=lr0-sw0
> +
> +ovn-nbctl lrp-add lr0 lr0-sw1 00:00:00:00:ff:02 20.0.0.1/24 2000::a/64
> +ovn-nbctl lsp-add sw1 sw1-lr0
> +ovn-nbctl lsp-set-type sw1-lr0 router
> +ovn-nbctl lsp-set-addresses sw1-lr0 router
> +ovn-nbctl lsp-set-options sw1-lr0 router-port=lr0-sw1
> +
> +ovn-nbctl ls-add public
> +ovn-nbctl lrp-add lr0 lr0-public 00:00:20:20:12:13 172.16.0.100/24 3000::a/64
> +ovn-nbctl lsp-add public public-lr0
> +ovn-nbctl lsp-set-type public-lr0 router
> +ovn-nbctl lsp-set-addresses public-lr0 router
> +ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
> +
> +# localnet port
> +ovn-nbctl lsp-add public ln-public
> +ovn-nbctl lsp-set-type ln-public localnet
> +ovn-nbctl lsp-set-addresses ln-public unknown
> +ovn-nbctl lsp-set-options ln-public network_name=physnet1
> +
> +# schedule the gw router port to a chassis.
> +ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20
> +
> +# Create NAT entries for the ports
> +
> +# sw0-port1
> +ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.16.0.110 10.0.0.3 sw0-port1 30:54:00:00:00:03
> +ovn-nbctl lr-nat-add lr0 dnat_and_snat 3000::c 1000::3 sw0-port1 40:54:00:00:00:03
> +# sw1-port1
> +ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.16.0.120 20.0.0.3 sw1-port1 30:54:00:00:00:04
> +ovn-nbctl lr-nat-add lr0 dnat_and_snat 3000::d 2000::3 sw1-port1 40:54:00:00:00:04
> +
> +# Add snat entriess
> +ovn-nbctl lr-nat-add lr0 snat 172.16.0.100 10.0.0.0/24
> +ovn-nbctl lr-nat-add lr0 snat 172.16.0.101 10.0.0.10
> +ovn-nbctl lr-nat-add lr0 snat 172.16.0.102 10.0.0.11
> +ovn-nbctl lr-nat-add lr0 snat 172.16.0.100 20.0.0.0/24
> +
> +ovn-nbctl ls-add sw3
> +ovn-nbctl lsp-add sw3 sw3-port1
> +ovn-nbctl lsp-set-addresses sw3-port1 "20:14:00:00:00:03 30.0.0.3 3000::3"
> +
> +ovn-nbctl lr-add lr1
> +ovn-nbctl lrp-add lr1 lr1-sw3 00:00:00:10:ff:03 30.0.0.1/24 3000::a/64
> +ovn-nbctl lsp-add sw3 sw3-lr1
> +ovn-nbctl lsp-set-type sw3-lr1 router
> +ovn-nbctl lsp-set-addresses sw3-lr1 router
> +ovn-nbctl lsp-set-options sw3-lr1 router-port=lr1-sw3
> +
> +ovn-nbctl ls-add join
> +
> +# Connect lr1 to join
> +ovn-nbctl lrp-add lr1 lr1-join 00:00:04:01:02:03 170.0.0.1/24
> +ovn-nbctl lsp-add join join-lr1
> +ovn-nbctl lsp-set-type join-lr1 router
> +ovn-nbctl lsp-set-addresses join-lr1 router
> +ovn-nbctl lsp-set-options join-lr1 router-port=lr1-join
> +
> +# Create GW router
> +ovn-nbctl lr-add gw_router
> +# connect gw_router to join
> +ovn-nbctl lrp-add gw_router gw_router-join 00:00:03:11:12:13 170.0.0.2/24
> +ovn-nbctl lsp-add join join-gw_router
> +ovn-nbctl lsp-set-type join-gw_router router
> +ovn-nbctl lsp-set-addresses join-gw_router router
> +ovn-nbctl lsp-set-options join-gw_router router-port=gw_router-join
> +
> +# Connect gw_router to public
> +ovn-nbctl lrp-add gw_router gw_router-public 00:00:30:30:32:33 172.16.0.200/24 3000::b/64
> +ovn-nbctl lsp-add public public-gw_router
> +ovn-nbctl lsp-set-type public-gw_router router
> +ovn-nbctl lsp-set-addresses public-gw_router router
> +ovn-nbctl lsp-set-options public-gw_router router-port=gw_router-public
> +
> +# Pin gw_router to hv3
> +ovn-nbctl set logical_router gw_router options:chassis=hv3
> +
> +# Add snat entries on gw_router
> +ovn-nbctl lr-nat-add gw_router snat 172.16.0.200 30.0.0.0/24
> +ovn-nbctl lr-nat-add gw_router snat 172.16.0.201 30.0.0.3
> +
> +ovn-nbctl --wait=hv sync
> +
> +# Create an interface in br-phys in hv2 and send ARP request for 172.16.0.100
> +as hv2
> +ovs-vsctl -- add-port br-phys hv2-phys1 -- \
> +    set interface hv2-phys1 options:tx_pcap=hv2/phys1-tx.pcap \
> +    options:rxq_pcap=hv2/phys1-rx.pcap \
> +    ofport-request=1
> +
> +reset_pcap_file() {
> +    local iface=$1
> +    local pcap_file=$2
> +    ovs-vsctl -- set Interface $iface options:tx_pcap=dummy-tx.pcap \
> +options:rxq_pcap=dummy-rx.pcap
> +    rm -f ${pcap_file}*.pcap
> +    ovs-vsctl -- set Interface $iface options:tx_pcap=${pcap_file}-tx.pcap \
> +options:rxq_pcap=${pcap_file}-rx.pcap
> +}
> +
> +ip_to_hex() {
> +    printf "%02x%02x%02x%02x" "$@"
> +}
> +
> +send_arp_request() {
> +    local eth_src=$1 spa=$2 tpa=$3
> +    local eth_dst=ffffffffffff
> +    local eth_type=0806
> +    local eth=${eth_dst}${eth_src}${eth_type}
> +
> +    local arp=0001080006040001${eth_src}${spa}${eth_dst}${tpa}
> +
> +    local request=${eth}${arp}
> +    as hv2 ovs-appctl netdev-dummy/receive hv2-phys1 $request
> +}
> +
> +test_arp_response () {
> +    local router_mac=$1 router_ip=$2 gw=$3 nongw1=$4 nongw2=$5
> +
> +    echo "Checking arp reply for IP - $router_ip"
> +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> +    as hv2 reset_pcap_file br-phys_n1 hv2/br-phys_n1
> +    as hv3 reset_pcap_file br-phys_n1 hv3/br-phys_n1
> +    as hv2 reset_pcap_file hv2-phys1 hv2/phys1
> +
> +    local src_mac=000200100011
> +    src_ip=$(ip_to_hex 172 16 0 40)
> +    send_arp_request ${src_mac} ${src_ip} ${router_ip}
> +    arp_reply=${src_mac}${router_mac}08060001080006040002${router_mac}
> +    arp_reply=${arp_reply}${router_ip}${src_mac}${src_ip}
> +
> +    OVS_WAIT_UNTIL([
> +        test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/phys1-tx.pcap | wc -l) -ge 1
> +    ])
> +
> +    AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv2/phys1-tx.pcap | \
> +    grep -c $arp_reply], [0], [1
> +])
> +
> +    # $gw phys1-n1 should see the response because $gw ovn-controller responds
> +    # to arp request.
> +    AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $gw/br-phys_n1-tx.pcap | \
> +    grep -c $arp_reply], [0], [1
> +])
> +
> +    # $nongw1 and $nongw1 phys1-n1 should not see the response.
> +    $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $nongw1/br-phys_n1-tx.pcap
> +    AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $nongw1/br-phys_n1-tx.pcap | \
> +    grep -c $arp_reply], [1], [0
> +])
> +
> +    AT_CHECK([$PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $nongw2/br-phys_n1-tx.pcap | \
> +    grep -c $arp_reply], [1], [0
> +])
> +}
> +
> +# Send ARP request for the IPs which belongs to lr0 having
> +# distributed gw router port - lr0-public.
> +test_arp_response 000020201213 $(ip_to_hex 172 16 0 100) hv1 hv2 hv3
> +test_arp_response 000020201213 $(ip_to_hex 172 16 0 101) hv1 hv2 hv3
> +test_arp_response 000020201213 $(ip_to_hex 172 16 0 102) hv1 hv2 hv3
> +
> +# Send ARP request for the IP which belongs to gw_router
> +test_arp_response 000030303233 $(ip_to_hex 172 16 0 200) hv3 hv1 hv2
> +test_arp_response 000030303233 $(ip_to_hex 172 16 0 201) hv3 hv1 hv2
> +
> +# Make hv3 claim the cr-lr0-public
> +ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20
> +ovn-nbctl lrp-set-gateway-chassis lr0-public hv2 30
> +ovn-nbctl lrp-set-gateway-chassis lr0-public hv3 40
> +
> +hv3_uuid=$(ovn-sbctl --bare --columns _uuid list chassis hv3)
> +
> +OVS_WAIT_UNTIL([
> +    cr_lr0_public_ch=$(ovn-sbctl --bare --columns chassis list port_binding cr-lr0-public)
> +    test $cr_lr0_public_ch = $hv3_uuid
> +])
> +
> +test_arp_response 000020201213 $(ip_to_hex 172 16 0 100) hv3 hv1 hv2
> +test_arp_response 000020201213 $(ip_to_hex 172 16 0 101) hv3 hv1 hv2
> +test_arp_response 000020201213 $(ip_to_hex 172 16 0 102) hv3 hv1 hv2
> +
> +# Schedule gw_router on hv1.
> +ovn-nbctl set logical_router gw_router options:chassis=hv1
> +hv1_uuid=$(ovn-sbctl --bare --columns _uuid list chassis hv1)
> +
> +OVS_WAIT_UNTIL([
> +    gw_router_ch=$(ovn-sbctl --bare --columns chassis list port_binding gw_router-public)
> +    test $gw_router_ch = $hv1_uuid
> +])
> +
> +# Send ARP request for the IP which belongs to gw_router
> +test_arp_response 000030303233 $(ip_to_hex 172 16 0 200) hv1 hv2 hv3
> +test_arp_response 000030303233 $(ip_to_hex 172 16 0 201) hv1 hv2 hv3
> +
> +OVN_CLEANUP([hv1],[hv2],[hv3])
> +AT_CLEANUP
> +
>  # Duplicate ACLs (same match with same action) should work as expected.
>  # Conflict ACLs (same match with different actions) behavior is unpredictable
>  # (only one of them would work).
> 



More information about the dev mailing list