[ovs-dev] [PATCH v3 3/3] northd: add check_pkt_larger lflows for ingress traffic

Numan Siddique numans at ovn.org
Mon Jun 21 21:49:13 UTC 2021


On Fri, Jun 11, 2021 at 10:32 AM Lorenzo Bianconi
<lorenzo.bianconi at redhat.com> wrote:
>
> Introduce check_pkt_larger action for ingress traffic
> entering the cluster from a distributed gw router port
> or from a gw router. This patch enables pMTU discovery
> for ingress traffic.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi at redhat.com>


Hi Lorenzo,

Please update the ovn nb logical flow documentation.

This patch calls check_pkt_larger() in two places - one in
'lr_in_admission' and the other
in 'lr_in_chk_pkt_len'.   Instead of this I'd suggest to first
determine if check pkt larger is
required or not by storing 1 in a register.
And then you can call check_pkt_larger() if the register bit is set.

Thanks
Numan

> ---
>  northd/ovn-northd.c | 166 ++++++++++++++++++++++++++------------------
>  tests/ovn.at        | 137 ++++++++++++++++++++++++++++++++++--
>  2 files changed, 231 insertions(+), 72 deletions(-)
>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index 23367dbb0..a9b046898 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -9501,6 +9501,10 @@ build_adm_ctrl_flows_for_lrouter(
>      }
>  }
>
> +static void
> +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu,
> +                                  struct ds *actions);
> +
>  /* Logical router ingress Table 0: L2 Admission Control
>   * This table drops packets that the router shouldn’t see at all based
>   * on their Ethernet headers.
> @@ -9528,6 +9532,8 @@ build_adm_ctrl_flows_for_lrouter_port(
>           * the pipeline.
>           */
>          ds_clear(actions);
> +
> +        build_check_pkt_len_action_string(op, NULL, actions);
>          ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
>                        op->lrp_networks.ea_s);
>
> @@ -10422,32 +10428,110 @@ build_arp_resolve_flows_for_lrouter_port(
>
>  }
>
> +static void
> +build_icmperr_pkt_big_flows(struct ovn_port *op, int mtu, struct hmap *lflows,
> +                            struct ds *match, struct ds *actions,
> +                            enum ovn_stage stage)
> +{
> +    if (op->lrp_networks.ipv4_addrs) {
> +        ds_clear(match);
> +        ds_put_format(match,
> +                      "inport == %s && ip4 && "REGBIT_PKT_LARGER
> +                      " && !"REGBIT_EGRESS_LOOPBACK, op->json_key);
> +
> +        ds_clear(actions);
> +        /* Set icmp4.frag_mtu to gw_mtu */
> +        ds_put_format(actions,
> +            "icmp4_error {"
> +            REGBIT_EGRESS_LOOPBACK" = 1; "
> +            REGBIT_PKT_LARGER" = 0; "
> +            "eth.dst = %s; "
> +            "ip4.dst = ip4.src; "
> +            "ip4.src = %s; "
> +            "ip.ttl = 255; "
> +            "icmp4.type = 3; /* Destination Unreachable. */ "
> +            "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
> +            "icmp4.frag_mtu = %d; "
> +            "next(pipeline=ingress, table=%d); };",
> +            op->lrp_networks.ea_s,
> +            op->lrp_networks.ipv4_addrs[0].addr_s,
> +            mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> +        ovn_lflow_add_with_hint(lflows, op->od, stage, 150,
> +                                ds_cstr(match), ds_cstr(actions),
> +                                &op->nbrp->header_);
> +    }
> +
> +    if (op->lrp_networks.ipv6_addrs) {
> +        ds_clear(match);
> +        ds_put_format(match, "inport == %s && ip6 && "REGBIT_PKT_LARGER
> +                      " && !"REGBIT_EGRESS_LOOPBACK, op->json_key);
> +
> +        ds_clear(actions);
> +        /* Set icmp6.frag_mtu to gw_mtu */
> +        ds_put_format(actions,
> +            "icmp6_error {"
> +            REGBIT_EGRESS_LOOPBACK" = 1; "
> +            REGBIT_PKT_LARGER" = 0; "
> +            "eth.dst = %s; "
> +            "ip6.dst = ip6.src; "
> +            "ip6.src = %s; "
> +            "ip.ttl = 255; "
> +            "icmp6.type = 2; /* Packet Too Big. */ "
> +            "icmp6.code = 0; "
> +            "icmp6.frag_mtu = %d; "
> +            "next(pipeline=ingress, table=%d); };",
> +            op->lrp_networks.ea_s,
> +            op->lrp_networks.ipv6_addrs[0].addr_s,
> +            mtu, ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> +        ovn_lflow_add_with_hint(lflows, op->od, stage, 150,
> +                                ds_cstr(match), ds_cstr(actions),
> +                                &op->nbrp->header_);
> +    }
> +}
> +
> +static void
> +build_check_pkt_len_action_string(struct ovn_port *op, int *pmtu,
> +                                  struct ds *actions)
> +{
> +    int gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
> +
> +    if (gw_mtu > 0) {
> +        /* Add the flows only if gateway_mtu is configured. */
> +        ds_put_format(actions,
> +                      REGBIT_PKT_LARGER" = check_pkt_larger(%d); ",
> +                      gw_mtu + VLAN_ETH_HEADER_LEN);
> +    }
> +    if (pmtu) {
> +        *pmtu = gw_mtu;
> +    }
> +}
> +
>  static void
>  build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
>                                    struct hmap *lflows, struct hmap *ports,
>                                    struct ds *match, struct ds *actions)
>  {
> -    int gw_mtu = 0;
> +    int gw_mtu;
>
> -    if (op->nbrp) {
> -         gw_mtu = smap_get_int(&op->nbrp->options, "gateway_mtu", 0);
> -    }
> -    /* Add the flows only if gateway_mtu is configured. */
> +    ds_clear(actions);
> +    build_check_pkt_len_action_string(op, &gw_mtu, actions);
>      if (gw_mtu <= 0) {
>          return;
>      }
>
> +    ds_put_format(actions, "next;");
> +
>      ds_clear(match);
>      ds_put_format(match, "outport == %s", op->json_key);
>
> -    ds_clear(actions);
> -    ds_put_format(actions,
> -                  REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
> -                  " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
>      ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_CHK_PKT_LEN, 50,
>                              ds_cstr(match), ds_cstr(actions),
>                              &op->nbrp->header_);
>
> +    /* ingress traffic */
> +    build_icmperr_pkt_big_flows(op, gw_mtu, lflows, match, actions,
> +                                S_ROUTER_IN_IP_INPUT);
> +
>      for (size_t i = 0; i < op->od->nbr->n_ports; i++) {
>          struct ovn_port *rp = ovn_port_find(ports,
>                                              op->od->nbr->ports[i]->name);
> @@ -10455,63 +10539,9 @@ build_check_pkt_len_flows_for_lrp(struct ovn_port *op,
>              continue;
>          }
>
> -        if (rp->lrp_networks.ipv4_addrs) {
> -            ds_clear(match);
> -            ds_put_format(match, "inport == %s && outport == %s"
> -                          " && ip4 && "REGBIT_PKT_LARGER,
> -                          rp->json_key, op->json_key);
> -
> -            ds_clear(actions);
> -            /* Set icmp4.frag_mtu to gw_mtu */
> -            ds_put_format(actions,
> -                "icmp4_error {"
> -                REGBIT_EGRESS_LOOPBACK" = 1; "
> -                "eth.dst = %s; "
> -                "ip4.dst = ip4.src; "
> -                "ip4.src = %s; "
> -                "ip.ttl = 255; "
> -                "icmp4.type = 3; /* Destination Unreachable. */ "
> -                "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
> -                "icmp4.frag_mtu = %d; "
> -                "next(pipeline=ingress, table=%d); };",
> -                rp->lrp_networks.ea_s,
> -                rp->lrp_networks.ipv4_addrs[0].addr_s,
> -                gw_mtu,
> -                ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> -            ovn_lflow_add_with_hint(lflows, op->od,
> -                                    S_ROUTER_IN_LARGER_PKTS, 50,
> -                                    ds_cstr(match), ds_cstr(actions),
> -                                    &rp->nbrp->header_);
> -        }
> -
> -        if (rp->lrp_networks.ipv6_addrs) {
> -            ds_clear(match);
> -            ds_put_format(match, "inport == %s && outport == %s"
> -                          " && ip6 && "REGBIT_PKT_LARGER,
> -                          rp->json_key, op->json_key);
> -
> -            ds_clear(actions);
> -            /* Set icmp6.frag_mtu to gw_mtu */
> -            ds_put_format(actions,
> -                "icmp6_error {"
> -                REGBIT_EGRESS_LOOPBACK" = 1; "
> -                "eth.dst = %s; "
> -                "ip6.dst = ip6.src; "
> -                "ip6.src = %s; "
> -                "ip.ttl = 255; "
> -                "icmp6.type = 2; /* Packet Too Big. */ "
> -                "icmp6.code = 0; "
> -                "icmp6.frag_mtu = %d; "
> -                "next(pipeline=ingress, table=%d); };",
> -                rp->lrp_networks.ea_s,
> -                rp->lrp_networks.ipv6_addrs[0].addr_s,
> -                gw_mtu,
> -                ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
> -            ovn_lflow_add_with_hint(lflows, op->od,
> -                                    S_ROUTER_IN_LARGER_PKTS, 50,
> -                                    ds_cstr(match), ds_cstr(actions),
> -                                    &rp->nbrp->header_);
> -        }
> +        /* egress traffic */
> +        build_icmperr_pkt_big_flows(rp, gw_mtu, lflows, match, actions,
> +                                    S_ROUTER_IN_LARGER_PKTS);
>      }
>  }
>
> @@ -11579,8 +11609,10 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
>          * down in the pipeline.
>          */
>          ds_clear(actions);
> +
> +        build_check_pkt_len_action_string(od->l3dgw_port, NULL, actions);
>          ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
> -                    od->l3dgw_port->lrp_networks.ea_s);
> +                      od->l3dgw_port->lrp_networks.ea_s);
>
>          ds_clear(match);
>          ds_put_format(match,
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 5c3ed2633..cf3cbd4d3 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -16312,6 +16312,52 @@ test_ip_packet_larger() {
>      fi
>  }
>
> +test_ip_packet_larger_ext() {
> +    local mtu=$1
> +
> +    # Send ip packet from sw0-port1 to outside
> +    src_mac="00000012af11" # external mac
> +    dst_mac="000020201213" # lr0-public mac
> +    src_ip=`ip_to_hex 172 168 0 4`
> +    dst_ip=`ip_to_hex 172 168 0 100`
> +    # Set the packet length to 118.
> +    pkt_len=0076
> +    packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf
> +    orig_packet_l3=${src_ip}${dst_ip}0900000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000
> +    packet=${packet}${orig_packet_l3}
> +
> +    gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> +    ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004
> +
> +    src_ip=`ip_to_hex 172 168 0 100`
> +    dst_ip=`ip_to_hex 172 168 0 4`
> +    # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload))
> +    reply_pkt_len=0092
> +    ip_csum=f397
> +    icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2
> +    icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu)
> +    icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf
> +    icmp_reply=${icmp_reply}${orig_packet_l3}
> +    echo $icmp_reply > br-phys_n1.expected
> +
> +    echo $gw_ip_garp >> br-phys_n1.expected
> +
> +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> +
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp
> +    sleep 1
> +    # Send packet from sw0-port1 to outside
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> +
> +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> +}
> +
>  test_ip6_packet_larger() {
>      local mtu=$1
>
> @@ -16327,7 +16373,7 @@ test_ip6_packet_larger() {
>      local payload=${payload}0000000000000000000000000000000000000000
>      local payload=${payload}0000000000000000000000000000000000000000
>
> -    local ip6_hdr=6000000000583aff${ipv6_src}${ipv6_dst}
> +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
>      local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000ec7662f00001${payload}
>
>      as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> @@ -16344,11 +16390,11 @@ test_ip6_packet_larger() {
>      mtu_needed=$(expr ${packet_bytes} - 18)
>      if test $mtu -lt $mtu_needed; then
>          # First construct the inner IPv6 packet.
> -        inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> +        inner_ip6=6000000000583afd${ipv6_src}${ipv6_dst}
>          inner_icmp6=8000000062f00001
>          inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
>          inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> -
> +
>          # Then the outer.
>          outer_ip6=6000000000883afe${ipv6_rt}${ipv6_src}
>          outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
> @@ -16366,6 +16412,53 @@ test_ip6_packet_larger() {
>      fi
>  }
>
> +test_ip6_packet_larger_ext() {
> +    local mtu=$1
> +
> +    local eth_src=00000012af11
> +    local eth_dst=000020201213
> +
> +    local ipv6_src=20000000000000000000000000000004
> +    local ipv6_dst=20000000000000000000000000000001
> +
> +    local payload=0000000000000000000000000000000000000000
> +    local payload=${payload}0000000000000000000000000000000000000000
> +    local payload=${payload}0000000000000000000000000000000000000000
> +    local payload=${payload}0000000000000000000000000000000000000000
> +
> +    local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst}
> +    local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload}
> +
> +    local ns=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064
> +    echo $ns > br-phys_n1.expected
> +
> +    as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1
> +    as hv1 reset_pcap_file hv1-vif1 hv1/vif1
> +
> +    local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst}
> +    local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src}
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $na
> +    sleep 1
> +    check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $packet
> +    AT_CAPTURE_FILE([trace-$mtu])
> +
> +    # First construct the inner IPv6 packet.
> +    inner_ip6=6000000000583afe${ipv6_src}${ipv6_dst}
> +    inner_icmp6=9000000062f00001
> +    inner_icmp6_and_payload=$(icmp6_csum_inplace ${inner_icmp6}${payload} ${inner_ip6})
> +    inner_packet=${inner_ip6}${inner_icmp6_and_payload}
> +
> +    # Then the outer.
> +    outer_ip6=6000000000883afe${ipv6_dst}${ipv6_src}
> +    outer_icmp6_and_payload=$(icmp6_csum_inplace 020000000000$(printf "%04x" $mtu)${inner_packet} $outer_ip6)
> +    outer_packet=${outer_ip6}${outer_icmp6_and_payload}
> +
> +    icmp6_reply=${eth_src}${eth_dst}86dd${outer_packet}
> +    echo $icmp6_reply >> br-phys_n1.expected
> +
> +    OVN_CHECK_PACKETS([hv1/br-phys_n1-tx.pcap], [br-phys_n1.expected])
> +}
> +
>  wait_for_ports_up
>  ovn-nbctl --wait=hv sync
>
> @@ -16398,7 +16491,7 @@ for mtu in 100 500 118; do
>      OVS_WAIT_FOR_OUTPUT([
>          as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu
>          AT_CAPTURE_FILE([br-int-flows-$mtu])
> -        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [1
> +        grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3
>  ])
>
>      AS_BOX([testing mtu $mtu - IPv4])
> @@ -16408,6 +16501,23 @@ for mtu in 100 500 118; do
>      test_ip6_packet_larger $mtu
>  done
>
> +AS_BOX([testing mtu $mtu])
> +check ovn-nbctl --wait=hv set logical_router_port lr0-public options:gateway_mtu=100
> +ovn-sbctl dump-flows > ext-sbflows-100
> +AT_CAPTURE_FILE([ext-sbflows-$mtu])
> +
> +OVS_WAIT_FOR_OUTPUT([
> +    as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100
> +    AT_CAPTURE_FILE([ext-br-int-flows-100])
> +    grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3
> +])
> +
> +AS_BOX([testing ext mtu 100 - IPv4])
> +test_ip_packet_larger_ext 100
> +
> +AS_BOX([testing mtu 100 - IPv6])
> +test_ip6_packet_larger_ext 100
> +
>  ovn-nbctl lsp-del sw0-lr0
>
>  ovn-nbctl lr-del lr0
> @@ -16445,7 +16555,7 @@ for mtu in 100 500 118; do
>      OVS_WAIT_FOR_OUTPUT([
>          as hv1 ovs-ofctl dump-flows br-int > br-int-gw-flows-$mtu
>          AT_CAPTURE_FILE([br-int-gw-flows-$mtu])
> -        grep "check_pkt_larger($(expr $mtu + 18))" br-int-gw-flows-$mtu | wc -l], [0], [1
> +        grep "check_pkt_larger($(expr $mtu + 18))" br-int-gw-flows-$mtu | wc -l], [0], [3
>  ])
>
>      AS_BOX([testing gw mtu $mtu - IPv4])
> @@ -16455,6 +16565,23 @@ for mtu in 100 500 118; do
>      test_ip6_packet_larger $mtu
>  done
>
> +AS_BOX([testing gw mtu $mtu])
> +check ovn-nbctl --wait=hv set logical_router_port lr1-public options:gateway_mtu=100
> +ovn-sbctl dump-flows > ext-gw-sbflows-100
> +AT_CAPTURE_FILE([ext-gw-sbflows-$mtu])
> +
> +OVS_WAIT_FOR_OUTPUT([
> +    as hv1 ovs-ofctl dump-flows br-int > ext-br-int-gw-flows-100
> +    AT_CAPTURE_FILE([ext-br-int-gw-flows-100])
> +    grep "check_pkt_larger(118)" ext-br-int-gw-flows-100 | wc -l], [0], [3
> +])
> +
> +AS_BOX([testing gw ext mtu 100 - IPv4])
> +test_ip_packet_larger_ext 100
> +
> +AS_BOX([testing gw mtu 100 - IPv6])
> +test_ip6_packet_larger_ext 100
> +
>  OVN_CLEANUP([hv1])
>  AT_CLEANUP
>  ])
> --
> 2.31.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list