[ovs-dev] [PATCH ovn] northd: fix check_pkt_larger ingress flows with FIP

Lorenzo Bianconi lorenzo.bianconi at redhat.com
Wed Nov 3 13:49:20 UTC 2021


Add missing icmp{4,6}_error flows for ingress traffic destinated to FIP
associated to the interface.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi at redhat.com>
---
 northd/northd.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++---
 tests/ovn.at    | 58 ++++++++++++++++++++++-----------
 2 files changed, 121 insertions(+), 24 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index da4f9cd14..dedd38167 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -12530,11 +12530,82 @@ build_lrouter_out_snat_flow(struct hmap *lflows, struct ovn_datapath *od,
     }
 }
 
+static void
+build_lrouter_ingress_nat_check_pkt_len(struct hmap *lflows,
+                                        const struct nbrec_nat *nat,
+                                        struct ovn_datapath *od, bool is_v6,
+                                        struct ds *match, struct ds *actions,
+                                        int mtu, struct shash *meter_groups)
+{
+        ds_clear(match);
+        ds_put_format(match, "inport == %s && "REGBIT_PKT_LARGER
+                      " && "REGBIT_EGRESS_LOOPBACK" == 0",
+                      od->l3dgw_ports[0]->json_key);
+
+        ds_clear(actions);
+        if (!is_v6) {
+            ds_put_format(match, " && ip4 && ip4.dst == %s", nat->external_ip);
+            /* Set icmp4.frag_mtu to gw_mtu */
+            ds_put_format(actions,
+                "icmp4_error {"
+                REGBIT_EGRESS_LOOPBACK" = 1; "
+                REGBIT_PKT_LARGER" = 0; "
+                "eth.dst = eth.src; "
+                "eth.src = %s; "
+                "ip4.dst = ip4.src; "
+                "ip4.src = %s; "
+                "ip.ttl = 254; "
+                "icmp4.type = 3; /* Destination Unreachable. */ "
+                "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
+                "icmp4.frag_mtu = %d; "
+                "outport = %s; flags.loopback = 1; output; };",
+                nat->external_mac,
+                nat->external_ip,
+                mtu, od->l3dgw_ports[0]->json_key);
+            ovn_lflow_add_with_hint__(lflows, od, S_ROUTER_IN_IP_INPUT, 160,
+                                      ds_cstr(match), ds_cstr(actions),
+                                      NULL,
+                                      copp_meter_get(
+                                            COPP_ICMP4_ERR,
+                                            od->nbr->copp,
+                                            meter_groups),
+                                      &nat->header_);
+        } else {
+            ds_put_format(match, " && ip6 && ip6.dst == %s", nat->external_ip);
+            /* Set icmp6.frag_mtu to gw_mtu */
+            ds_put_format(actions,
+                "icmp6_error {"
+                REGBIT_EGRESS_LOOPBACK" = 1; "
+                REGBIT_PKT_LARGER" = 0; "
+                "eth.dst = eth.src; "
+                "eth.src = %s; "
+                "ip6.dst = ip6.src; "
+                "ip6.src = %s; "
+                "ip.ttl = 254; "
+                "icmp6.type = 2; /* Packet Too Big. */ "
+                "icmp6.code = 0; "
+                "icmp6.frag_mtu = %d; "
+                "outport = %s; flags.loopback = 1; output; };",
+                nat->external_mac,
+                nat->external_ip,
+                mtu, od->l3dgw_ports[0]->json_key);
+            ovn_lflow_add_with_hint__(lflows, od, S_ROUTER_IN_IP_INPUT, 160,
+                                      ds_cstr(match), ds_cstr(actions),
+                                      NULL,
+                                      copp_meter_get(
+                                            COPP_ICMP6_ERR,
+                                            od->nbr->copp,
+                                            meter_groups),
+                                      &nat->header_);
+        }
+}
+
 static void
 build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
                            const struct nbrec_nat *nat, struct ds *match,
                            struct ds *actions, struct eth_addr mac,
-                           bool distributed, bool is_v6)
+                           bool distributed, bool is_v6,
+                           struct shash *meter_groups)
 {
     if (od->n_l3dgw_ports && !strcmp(nat->type, "snat")) {
         ds_clear(match);
@@ -12558,7 +12629,8 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
         */
         ds_clear(actions);
 
-        build_check_pkt_len_action_string(od->l3dgw_ports[0], actions);
+        int gw_mtu = build_check_pkt_len_action_string(od->l3dgw_ports[0],
+                                                       actions);
         ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
                       od->l3dgw_ports[0]->lrp_networks.ea_s);
 
@@ -12572,6 +12644,11 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
         ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50,
                                 ds_cstr(match), ds_cstr(actions),
                                 &nat->header_);
+        if (gw_mtu) {
+            build_lrouter_ingress_nat_check_pkt_len(lflows, nat, od, is_v6,
+                                                    match, actions, gw_mtu,
+                                                    meter_groups);
+        }
     }
 }
 
@@ -12669,7 +12746,7 @@ lrouter_check_nat_entry(struct ovn_datapath *od, const struct nbrec_nat *nat,
 static void
 build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows,
                                 struct hmap *ports, struct ds *match,
-                                struct ds *actions)
+                                struct ds *actions, struct shash *meter_groups)
 {
     if (!od->nbr) {
         return;
@@ -12777,7 +12854,7 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows,
 
         /* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */
         build_lrouter_ingress_flow(lflows, od, nat, match, actions,
-                                   mac, distributed, is_v6);
+                                   mac, distributed, is_v6, meter_groups);
 
         /* Ingress Gateway Redirect Table: For NAT on a distributed
          * router, add flows that are specific to a NAT rule.  These
@@ -12948,7 +13025,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
     build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows);
     build_lrouter_arp_nd_for_datapath(od, lsi->lflows, lsi->meter_groups);
     build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->ports, &lsi->match,
-                                    &lsi->actions);
+                                    &lsi->actions, lsi->meter_groups);
 }
 
 /* Helper function to combine all lflow generation which is iterated by port.
diff --git a/tests/ovn.at b/tests/ovn.at
index 5458552d3..528e50a6f 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -16801,6 +16801,8 @@ ovn_start
 ovn-nbctl ls-add sw0
 ovn-nbctl lsp-add sw0 sw0-port1
 ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 10.0.0.3 1000::3"
+ovn-nbctl lsp-add sw0 sw0-port2
+ovn-nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:02 10.0.0.4 1000::4"
 
 ovn-nbctl lr-add lr0
 ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::1/64
@@ -16824,7 +16826,9 @@ ovn-nbctl lsp-set-options ln-public network_name=phys
 
 ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20
 ovn-nbctl lr-nat-add lr0 snat 172.168.0.100 10.0.0.0/24
+ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.168.0.110 10.0.0.4 sw0-port2 f0:00:00:01:02:04
 ovn-nbctl lr-nat-add lr0 snat 2000::1 1000::/64
+ovn-nbctl lr-nat-add lr0 dnat_and_snat 2000::2 1000::4 sw0-port2 f0:00:00:01:02:04
 
 net_add n1
 
@@ -16838,6 +16842,11 @@ ovs-vsctl -- add-port br-int hv1-vif1 -- \
     options:tx_pcap=hv1/vif1-tx.pcap \
     options:rxq_pcap=hv1/vif1-rx.pcap \
     ofport-request=1
+ovs-vsctl -- add-port br-int hv1-vif2 -- \
+    set interface hv1-vif2 external-ids:iface-id=sw0-port2 \
+    options:tx_pcap=hv1/vif2-tx.pcap \
+    options:rxq_pcap=hv1/vif2-rx.pcap \
+    ofport-request=1
 
 reset_pcap_file() {
      local iface=$1
@@ -16936,16 +16945,19 @@ test_ip_packet_larger() {
 
 # IPv4 ingress traffic generated outside the cluster
 test_ip_packet_larger_ext() {
-    local mtu=$1
+    local idx=$1
+    local checksum=$4
+    local mtu=$5
+    local reply_checksum=$6
 
     # Send ip packet from sw0-port1 to outside
     src_mac="00000012af11" # external mac
-    dst_mac="000020201213" # lr0-public mac
+    dst_mac="$2" # lr0-public mac
     src_ip=`ip_to_hex 172 168 0 4`
-    dst_ip=`ip_to_hex 172 168 0 100`
+    dst_ip="$3"
     # Set the packet length to 118.
     pkt_len=0076
-    packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf
+    packet=${dst_mac}${src_mac}08004500${pkt_len}000000004001${checksum}
     orig_packet_l3=${src_ip}${dst_ip}0900000000000000
     orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
     orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
@@ -16959,19 +16971,19 @@ test_ip_packet_larger_ext() {
     # optional_gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
     ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004
 
-    src_ip=`ip_to_hex 172 168 0 100`
+    src_ip="$3"
     dst_ip=`ip_to_hex 172 168 0 4`
     # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload))
     reply_pkt_len=0092
     ip_csum=f397
-    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2
+    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe01${reply_checksum}
     icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu)
-    icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf
+    icmp_reply=${icmp_reply}4500${pkt_len}000000004001${checksum}
     icmp_reply=${icmp_reply}${orig_packet_l3}
     echo $icmp_reply > br-phys_n1.expected
 
     as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
-    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+    as hv1 reset_pcap_file hv1-vif${idx} hv1/vif${idx}
 
     check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp
     sleep 1
@@ -17038,13 +17050,15 @@ test_ip6_packet_larger() {
 
 # IPv6 ingress traffic generated outside the cluster
 test_ip6_packet_larger_ext() {
-    local mtu=$1
+    local idx=$1
+    local mtu=$4
+    local icmp_checksum=$5
 
     local eth_src=00000012af11
-    local eth_dst=000020201213
+    local eth_dst=$2
 
     local ipv6_src=20000000000000000000000000000004
-    local ipv6_dst=20000000000000000000000000000001
+    local ipv6_dst=$3
 
     local payload=0000000000000000000000000000000000000000
     local payload=${payload}0000000000000000000000000000000000000000
@@ -17052,11 +17066,11 @@ test_ip6_packet_larger_ext() {
     local payload=${payload}0000000000000000000000000000000000000000
 
     local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
-    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload}
+    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000${icmp_checksum}62f00001${payload}
 
     # Some ** ARP ** packets might still be received - ignore them
     as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
-    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+    as hv1 reset_pcap_file hv1-vif${idx} hv1/vif${idx}
 
     local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst}
     local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src}
@@ -17114,7 +17128,7 @@ for mtu in 100 500 118; do
     OVS_WAIT_FOR_OUTPUT([
         as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu
         AT_CAPTURE_FILE([br-int-flows-$mtu])
-        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3
+        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [4
 ])
 
     AS_BOX([testing outgoing traffic mtu $mtu - IPv4])
@@ -17132,14 +17146,20 @@ AT_CAPTURE_FILE([ext-sbflows-100])
 OVS_WAIT_FOR_OUTPUT([
     as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100
     AT_CAPTURE_FILE([ext-br-int-flows-100])
-    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3
+    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [4
 ])
 
 AS_BOX([testing ingress traffic mtu 100 - IPv4])
-test_ip_packet_larger_ext 100
+test_ip_packet_larger_ext 1 000020201213 $(ip_to_hex 172 168 0 100) 20cf 100 22b2
+
+AS_BOX([testing ingress traffic mtu 100 - IPv4 FIP])
+test_ip_packet_larger_ext 2 f00000010204 $(ip_to_hex 172 168 0 110) 20c5 100 22a8
 
 AS_BOX([testing ingress traffic mtu 100 - IPv6])
-test_ip6_packet_larger_ext 100
+test_ip6_packet_larger_ext 1 000020201213 20000000000000000000000000000001 100 cc76
+
+AS_BOX([testing ingress traffic mtu 100 - IPv6 FIP])
+test_ip6_packet_larger_ext 2 f00000010204 20000000000000000000000000000002 100 cc75
 
 ovn-nbctl lsp-del sw0-lr0
 
@@ -17200,10 +17220,10 @@ OVS_WAIT_FOR_OUTPUT([
 ])
 
 AS_BOX([testing ingress traffic mtu 100 for gw router - IPv4])
-test_ip_packet_larger_ext 100
+test_ip_packet_larger_ext 1 000020201213 $(ip_to_hex 172 168 0 100) 20cf 100 22b2
 
 AS_BOX([testing ingress traffic mtu 100 for gw router - IPv6])
-test_ip6_packet_larger_ext 100
+test_ip6_packet_larger_ext 1 000020201213 20000000000000000000000000000001 100 cc76
 
 OVN_CLEANUP([hv1])
 AT_CLEANUP
-- 
2.31.1



More information about the dev mailing list