[ovs-dev] [PATCH v2 ovn] northd: fix check_pkt_larger ingress flows with FIP

Numan Siddique numans at ovn.org
Thu Nov 4 20:58:46 UTC 2021


On Thu, Nov 4, 2021 at 12:07 PM Lorenzo Bianconi
<lorenzo.bianconi at redhat.com> wrote:
>
> Add missing icmp{4,6}_error flows for ingress traffic destinated to FIP
> associated to the interface
>
> Related bz: https://bugzilla.redhat.com/show_bug.cgi?id=2018179
> Tested-by: Eran Kuris <ekuris at redhat.com>
>
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi at redhat.com>

Thanks.

I applied this patch to the main branch and branch-21.09.

Thanks
Numan

> ---
> Changes since v1:
> - add missing northd documentation
> ---
>  northd/northd.c         | 87 ++++++++++++++++++++++++++++++++++++++---
>  northd/ovn-northd.8.xml | 55 +++++++++++++++++++++++++-
>  tests/ovn.at            | 58 ++++++++++++++++++---------
>  3 files changed, 174 insertions(+), 26 deletions(-)
>
> diff --git a/northd/northd.c b/northd/northd.c
> index 59286034d..ee4d4df26 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -12371,11 +12371,82 @@ build_lrouter_out_snat_flow(struct hmap *lflows, struct ovn_datapath *od,
>      }
>  }
>
> +static void
> +build_lrouter_ingress_nat_check_pkt_len(struct hmap *lflows,
> +                                        const struct nbrec_nat *nat,
> +                                        struct ovn_datapath *od, bool is_v6,
> +                                        struct ds *match, struct ds *actions,
> +                                        int mtu, struct shash *meter_groups)
> +{
> +        ds_clear(match);
> +        ds_put_format(match, "inport == %s && "REGBIT_PKT_LARGER
> +                      " && "REGBIT_EGRESS_LOOPBACK" == 0",
> +                      od->l3dgw_ports[0]->json_key);
> +
> +        ds_clear(actions);
> +        if (!is_v6) {
> +            ds_put_format(match, " && ip4 && ip4.dst == %s", nat->external_ip);
> +            /* Set icmp4.frag_mtu to gw_mtu */
> +            ds_put_format(actions,
> +                "icmp4_error {"
> +                REGBIT_EGRESS_LOOPBACK" = 1; "
> +                REGBIT_PKT_LARGER" = 0; "
> +                "eth.dst = eth.src; "
> +                "eth.src = %s; "
> +                "ip4.dst = ip4.src; "
> +                "ip4.src = %s; "
> +                "ip.ttl = 254; "
> +                "icmp4.type = 3; /* Destination Unreachable. */ "
> +                "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
> +                "icmp4.frag_mtu = %d; "
> +                "outport = %s; flags.loopback = 1; output; };",
> +                nat->external_mac,
> +                nat->external_ip,
> +                mtu, od->l3dgw_ports[0]->json_key);
> +            ovn_lflow_add_with_hint__(lflows, od, S_ROUTER_IN_IP_INPUT, 160,
> +                                      ds_cstr(match), ds_cstr(actions),
> +                                      NULL,
> +                                      copp_meter_get(
> +                                            COPP_ICMP4_ERR,
> +                                            od->nbr->copp,
> +                                            meter_groups),
> +                                      &nat->header_);
> +        } else {
> +            ds_put_format(match, " && ip6 && ip6.dst == %s", nat->external_ip);
> +            /* Set icmp6.frag_mtu to gw_mtu */
> +            ds_put_format(actions,
> +                "icmp6_error {"
> +                REGBIT_EGRESS_LOOPBACK" = 1; "
> +                REGBIT_PKT_LARGER" = 0; "
> +                "eth.dst = eth.src; "
> +                "eth.src = %s; "
> +                "ip6.dst = ip6.src; "
> +                "ip6.src = %s; "
> +                "ip.ttl = 254; "
> +                "icmp6.type = 2; /* Packet Too Big. */ "
> +                "icmp6.code = 0; "
> +                "icmp6.frag_mtu = %d; "
> +                "outport = %s; flags.loopback = 1; output; };",
> +                nat->external_mac,
> +                nat->external_ip,
> +                mtu, od->l3dgw_ports[0]->json_key);
> +            ovn_lflow_add_with_hint__(lflows, od, S_ROUTER_IN_IP_INPUT, 160,
> +                                      ds_cstr(match), ds_cstr(actions),
> +                                      NULL,
> +                                      copp_meter_get(
> +                                            COPP_ICMP6_ERR,
> +                                            od->nbr->copp,
> +                                            meter_groups),
> +                                      &nat->header_);
> +        }
> +}
> +
>  static void
>  build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
>                             const struct nbrec_nat *nat, struct ds *match,
>                             struct ds *actions, struct eth_addr mac,
> -                           bool distributed, bool is_v6)
> +                           bool distributed, bool is_v6,
> +                           struct shash *meter_groups)
>  {
>      if (od->n_l3dgw_ports && !strcmp(nat->type, "snat")) {
>          ds_clear(match);
> @@ -12399,7 +12470,8 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
>          */
>          ds_clear(actions);
>
> -        build_check_pkt_len_action_string(od->l3dgw_ports[0], actions);
> +        int gw_mtu = build_check_pkt_len_action_string(od->l3dgw_ports[0],
> +                                                       actions);
>          ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
>                        od->l3dgw_ports[0]->lrp_networks.ea_s);
>
> @@ -12413,6 +12485,11 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
>          ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50,
>                                  ds_cstr(match), ds_cstr(actions),
>                                  &nat->header_);
> +        if (gw_mtu) {
> +            build_lrouter_ingress_nat_check_pkt_len(lflows, nat, od, is_v6,
> +                                                    match, actions, gw_mtu,
> +                                                    meter_groups);
> +        }
>      }
>  }
>
> @@ -12510,7 +12587,7 @@ lrouter_check_nat_entry(struct ovn_datapath *od, const struct nbrec_nat *nat,
>  static void
>  build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows,
>                                  struct hmap *ports, struct ds *match,
> -                                struct ds *actions)
> +                                struct ds *actions, struct shash *meter_groups)
>  {
>      if (!od->nbr) {
>          return;
> @@ -12618,7 +12695,7 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows,
>
>          /* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */
>          build_lrouter_ingress_flow(lflows, od, nat, match, actions,
> -                                   mac, distributed, is_v6);
> +                                   mac, distributed, is_v6, meter_groups);
>
>          /* Ingress Gateway Redirect Table: For NAT on a distributed
>           * router, add flows that are specific to a NAT rule.  These
> @@ -12789,7 +12866,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
>      build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows);
>      build_lrouter_arp_nd_for_datapath(od, lsi->lflows, lsi->meter_groups);
>      build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->ports, &lsi->match,
> -                                    &lsi->actions);
> +                                    &lsi->actions, lsi->meter_groups);
>  }
>
>  /* Helper function to combine all lflow generation which is iterated by port.
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index b6a5f7767..e42c70be1 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -2209,6 +2209,57 @@ next;
>      </p>
>
>      <ul>
> +      <li>
> +        <p>
> +          For each <code>dnat_and_snat</code> NAT rule on a distributed
> +          logical routers or gateway routers with gateway port
> +          configured with <code>options:gateway_mtu</code> to a valid integer
> +          value <var>M</var>, a priority-160 flow with the match
> +          <code>inport == <var>LRP</var> && REGBIT_PKT_LARGER
> +          && REGBIT_EGRESS_LOOPBACK == 0</code>, where <var>LRP</var>
> +          is the logical router port and applies the following action for ipv4
> +          and ipv6 respectively:
> +        </p>
> +
> +    <pre>
> +icmp4_error {
> +    icmp4.type = 3; /* Destination Unreachable. */
> +    icmp4.code = 4;  /* Frag Needed and DF was Set. */
> +    icmp4.frag_mtu = <var>M</var>;
> +    eth.dst = eth.src;
> +    eth.src = <var>E</var>;
> +    ip4.dst = ip4.src;
> +    ip4.src = <var>I</var>;
> +    ip.ttl = 255;
> +    REGBIT_EGRESS_LOOPBACK = 1;
> +    REGBIT_PKT_LARGER 0;
> +    outport = <var>LRP</var>;
> +    flags.loopback = 1;
> +    output;
> +};
> +
> +icmp6_error {
> +    icmp6.type = 2;
> +    icmp6.code = 0;
> +    icmp6.frag_mtu = <var>M</var>;
> +    eth.dst = eth.src;
> +    eth.src = <var>E</var>;
> +    ip6.dst = ip6.src;
> +    ip6.src = <var>I</var>;
> +    ip.ttl = 255;
> +    REGBIT_EGRESS_LOOPBACK = 1;
> +    REGBIT_PKT_LARGER 0;
> +    outport = <var>LRP</var>;
> +    flags.loopback = 1;
> +    output;
> +};
> +    </pre>
> +        <p>
> +          where <var>E</var> and <var>I</var> are the NAT rule external mac
> +          and IP respectively.
> +        </p>
> +      </li>
> +
>        <li>
>          <p>
>            For distributed logical routers or gateway routers with gateway port
> @@ -2221,7 +2272,7 @@ next;
>          </p>
>
>      <pre>
> -icmp4 {
> +icmp4_error {
>      icmp4.type = 3; /* Destination Unreachable. */
>      icmp4.code = 4;  /* Frag Needed and DF was Set. */
>      icmp4.frag_mtu = <var>M</var>;
> @@ -2234,7 +2285,7 @@ icmp4 {
>      next(pipeline=ingress, table=0);
>  };
>
> -icmp6 {
> +icmp6_error {
>      icmp6.type = 2;
>      icmp6.code = 0;
>      icmp6.frag_mtu = <var>M</var>;
> diff --git a/tests/ovn.at b/tests/ovn.at
> index ec4c877b2..6f6f5c2da 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -16801,6 +16801,8 @@ ovn_start
>  ovn-nbctl ls-add sw0
>  ovn-nbctl lsp-add sw0 sw0-port1
>  ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 10.0.0.3 1000::3"
> +ovn-nbctl lsp-add sw0 sw0-port2
> +ovn-nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:02 10.0.0.4 1000::4"
>
>  ovn-nbctl lr-add lr0
>  ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::1/64
> @@ -16824,7 +16826,9 @@ ovn-nbctl lsp-set-options ln-public network_name=phys
>
>  ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20
>  ovn-nbctl lr-nat-add lr0 snat 172.168.0.100 10.0.0.0/24
> +ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.168.0.110 10.0.0.4 sw0-port2 f0:00:00:01:02:04
>  ovn-nbctl lr-nat-add lr0 snat 2000::1 1000::/64
> +ovn-nbctl lr-nat-add lr0 dnat_and_snat 2000::2 1000::4 sw0-port2 f0:00:00:01:02:04
>
>  net_add n1
>
> @@ -16838,6 +16842,11 @@ ovs-vsctl -- add-port br-int hv1-vif1 -- \
>      options:tx_pcap=hv1/vif1-tx.pcap \
>      options:rxq_pcap=hv1/vif1-rx.pcap \
>      ofport-request=1
> +ovs-vsctl -- add-port br-int hv1-vif2 -- \
> +    set interface hv1-vif2 external-ids:iface-id=sw0-port2 \
> +    options:tx_pcap=hv1/vif2-tx.pcap \
> +    options:rxq_pcap=hv1/vif2-rx.pcap \
> +    ofport-request=1
>
>  reset_pcap_file() {
>       local iface=$1
> @@ -16936,16 +16945,19 @@ test_ip_packet_larger() {
>
>  # IPv4 ingress traffic generated outside the cluster
>  test_ip_packet_larger_ext() {
> -    local mtu=$1
> +    local idx=$1
> +    local checksum=$4
> +    local mtu=$5
> +    local reply_checksum=$6
>
>      # Send ip packet from sw0-port1 to outside
>      src_mac="00000012af11" # external mac
> -    dst_mac="000020201213" # lr0-public mac
> +    dst_mac="$2" # lr0-public mac
>      src_ip=`ip_to_hex 172 168 0 4`
> -    dst_ip=`ip_to_hex 172 168 0 100`
> +    dst_ip="$3"
>      # Set the packet length to 118.
>      pkt_len=0076
> -    packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf
> +    packet=${dst_mac}${src_mac}08004500${pkt_len}000000004001${checksum}
>      orig_packet_l3=${src_ip}${dst_ip}0900000000000000
>      orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
>      orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> @@ -16959,19 +16971,19 @@ test_ip_packet_larger_ext() {
>      # optional_gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
>      ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004
>
> -    src_ip=`ip_to_hex 172 168 0 100`
> +    src_ip="$3"
>      dst_ip=`ip_to_hex 172 168 0 4`
>      # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload))
>      reply_pkt_len=0092
>      ip_csum=f397
> -    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2
> +    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe01${reply_checksum}
>      icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu)
> -    icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf
> +    icmp_reply=${icmp_reply}4500${pkt_len}000000004001${checksum}
>      icmp_reply=${icmp_reply}${orig_packet_l3}
>      echo $icmp_reply > br-phys_n1.expected
>
>      as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> -    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> +    as hv1 reset_pcap_file hv1-vif${idx} hv1/vif${idx}
>
>      check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp
>      sleep 1
> @@ -17038,13 +17050,15 @@ test_ip6_packet_larger() {
>
>  # IPv6 ingress traffic generated outside the cluster
>  test_ip6_packet_larger_ext() {
> -    local mtu=$1
> +    local idx=$1
> +    local mtu=$4
> +    local icmp_checksum=$5
>
>      local eth_src=00000012af11
> -    local eth_dst=000020201213
> +    local eth_dst=$2
>
>      local ipv6_src=20000000000000000000000000000004
> -    local ipv6_dst=20000000000000000000000000000001
> +    local ipv6_dst=$3
>
>      local payload=0000000000000000000000000000000000000000
>      local payload=${payload}0000000000000000000000000000000000000000
> @@ -17052,11 +17066,11 @@ test_ip6_packet_larger_ext() {
>      local payload=${payload}0000000000000000000000000000000000000000
>
>      local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
> -    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload}
> +    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000${icmp_checksum}62f00001${payload}
>
>      # Some ** ARP ** packets might still be received - ignore them
>      as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> -    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> +    as hv1 reset_pcap_file hv1-vif${idx} hv1/vif${idx}
>
>      local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst}
>      local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src}
> @@ -17114,7 +17128,7 @@ for mtu in 100 500 118; do
>      OVS_WAIT_FOR_OUTPUT([
>          as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu
>          AT_CAPTURE_FILE([br-int-flows-$mtu])
> -        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3
> +        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [4
>  ])
>
>      AS_BOX([testing outgoing traffic mtu $mtu - IPv4])
> @@ -17132,14 +17146,20 @@ AT_CAPTURE_FILE([ext-sbflows-100])
>  OVS_WAIT_FOR_OUTPUT([
>      as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100
>      AT_CAPTURE_FILE([ext-br-int-flows-100])
> -    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3
> +    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [4
>  ])
>
>  AS_BOX([testing ingress traffic mtu 100 - IPv4])
> -test_ip_packet_larger_ext 100
> +test_ip_packet_larger_ext 1 000020201213 $(ip_to_hex 172 168 0 100) 20cf 100 22b2
> +
> +AS_BOX([testing ingress traffic mtu 100 - IPv4 FIP])
> +test_ip_packet_larger_ext 2 f00000010204 $(ip_to_hex 172 168 0 110) 20c5 100 22a8
>
>  AS_BOX([testing ingress traffic mtu 100 - IPv6])
> -test_ip6_packet_larger_ext 100
> +test_ip6_packet_larger_ext 1 000020201213 20000000000000000000000000000001 100 cc76
> +
> +AS_BOX([testing ingress traffic mtu 100 - IPv6 FIP])
> +test_ip6_packet_larger_ext 2 f00000010204 20000000000000000000000000000002 100 cc75
>
>  ovn-nbctl lsp-del sw0-lr0
>
> @@ -17200,10 +17220,10 @@ OVS_WAIT_FOR_OUTPUT([
>  ])
>
>  AS_BOX([testing ingress traffic mtu 100 for gw router - IPv4])
> -test_ip_packet_larger_ext 100
> +test_ip_packet_larger_ext 1 000020201213 $(ip_to_hex 172 168 0 100) 20cf 100 22b2
>
>  AS_BOX([testing ingress traffic mtu 100 for gw router - IPv6])
> -test_ip6_packet_larger_ext 100
> +test_ip6_packet_larger_ext 1 000020201213 20000000000000000000000000000001 100 cc76
>
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
> --
> 2.31.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list