[ovs-dev] [PATCH ovn] northd: Add support for NAT with multiple DGP.

Han Zhou hzhou at ovn.org
Tue Oct 5 07:23:55 UTC 2021


On Mon, Sep 20, 2021 at 7:30 AM Abhiram Sangana <sangana.abhiram at nutanix.com>
wrote:
>
> Currently, if multiple distributed gateway ports (DGP) are configured
> on a logical router, NAT is disabled as part of commit 15348b7b
> (northd: Multiple distributed gateway port support.)
>
> This patch updates the behavior by selectively applying NAT rules at DGPs.
> A NAT rule is applied on matching packets entering or leaving a specific
> DGP only if the external_ip of the rule belongs to the same subnet as the
> DGP.
>
> This patch also updates ovn-nbctl to accept multiple NAT rules of type
> `snat` with the same logical_ip but different external_ip for a logical
> router.
>
> Signed-off-by: Abhiram Sangana <sangana.abhiram at nutanix.com>

Thanks Abhiram for adding the NAT support. The approach looks good overall.
Please see my comments below.

> ---
>  NEWS                    |   1 +
>  northd/northd.c         | 155 +++++++++++++++++++++++------------
>  northd/ovn-northd.8.xml |  29 ++++---
>  ovn-architecture.7.xml  |   6 +-
>  ovn-nb.xml              |   4 +-
>  tests/ovn-nbctl.at      |  40 ++++++++-
>  tests/ovn-northd.at     | 175 +++++++++++++++++++++++++++++++++++++---
>  utilities/ovn-nbctl.c   | 156 ++++++++++++++++++++++++++++++++---
>  8 files changed, 473 insertions(+), 93 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 8a21c029e..1fdc65bc0 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -12,6 +12,7 @@ OVN v21.09.0 - xx xxx xxxx
>    - Allow static routes without nexthops.
>    - Enabled logical dp groups as a default.  CMS should disable it if not
>      desired.
> +  - Support NAT with multiple distributed gateway ports on a logical
router.

Just a reminder that now it should be moved to the "post 21.09" section.

>
>  OVN v21.06.0 - 18 Jun 2021
>  -------------------------
> diff --git a/northd/northd.c b/northd/northd.c
> index d1b87891c..21a72d238 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -593,12 +593,11 @@ struct ovn_datapath {
>
>      /* Applies to only logical router datapath.
>       * True if logical router is a gateway router. i.e options:chassis
is set.
> -     * If this is true, then 'l3dgw_port' and 'l3redirect_port' will be
> -     * ignored. */
> +     * If this is true, then 'l3dgw_ports' will be ignored. */
>      bool is_gw_router;
>
> -    /* OVN northd only needs to know about the logical router gateway
port for
> -     * NAT on a distributed router.  The "distributed gateway ports" are
> +    /* OVN northd only needs to know about logical router gateway ports
for
> +     * NAT/LB on a distributed router.  The "distributed gateway ports"
are
>       * populated only when there is a gateway chassis or ha chassis group
>       * specified for some of the ports on the logical router. Otherwise
this
>       * will be NULL. */
> @@ -747,16 +746,6 @@ init_nat_entries(struct ovn_datapath *od)
>          return;
>      }
>
> -    if (od->n_l3dgw_ports > 1) {
> -        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> -        VLOG_WARN_RL(&rl, "NAT is configured on logical router %s, which
has %"
> -                     PRIuSIZE" distributed gateway ports. NAT is not
supported"
> -                     " yet when there is more than one distributed
gateway "
> -                     "port on the router.",
> -                     od->nbr->name, od->n_l3dgw_ports);
> -        return;
> -    }
> -
>      od->nat_entries = xmalloc(od->nbr->n_nat * sizeof *od->nat_entries);
>
>      for (size_t i = 0; i < od->nbr->n_nat; i++) {
> @@ -2681,8 +2670,11 @@ get_nat_addresses(const struct ovn_port *op,
size_t *n, bool routable_only)
>          /* Gratuitous ARP for centralized NAT rules on distributed
gateway
>           * ports should be restricted to the gateway chassis. */
>          if (op->od->n_l3dgw_ports) {
> +            const struct ovn_port *l3dgw_port = (is_l3dgw_port(op)
> +                                                 ? op
> +                                                 :
op->od->l3dgw_ports[0]);

I think we shouldn't hardcode to index 0 any more when NAT for multiple
DGPs is supported. In fact, if the code reaches here, is_l3dgw_port(op)
must be true. We should instead use ovs_assert(is_l3dgw_port(op)).

In addition, the addresses collected in this function will be used to send
GARP on the external networks. It would end up sending GARPs for all NAT
external IPs on all the external networks, which is wrong. We should check
here if the NAT external IP matches the l3dgw port's subnet, similar to the
check you added in extract_lrp_networks(). (Same checks needed when adding
support for LB IPs).

>              ds_put_format(&c_addresses, " is_chassis_resident(%s)",
> -                          op->od->l3dgw_ports[0]->cr_port->json_key);
> +                          l3dgw_port->cr_port->json_key);
>          }
>
>          addresses[n_nats++] = ds_steal_cstr(&c_addresses);
> @@ -3274,9 +3266,11 @@ ovn_port_update_sbrec(struct northd_context *ctx,
>                  }
>
>                  if (op->peer->od->n_l3dgw_ports) {
> +                    const struct ovn_port *l3dgw_port = (
> +                        is_l3dgw_port(op->peer) ? op->peer
> +                                                :
op->peer->od->l3dgw_ports[0]);
>                      ds_put_format(&garp_info, " is_chassis_resident(%s)",
> -                                  op->peer->od->l3dgw_ports[0]
> -                                  ->cr_port->json_key);
> +                                  l3dgw_port->cr_port->json_key);
>                  }
>
>                  n_nats++;
> @@ -9966,8 +9960,11 @@ build_lrouter_port_nat_arp_nd_flow(struct ovn_port
*op,
>           * Also need to avoid generation of multiple ARP responses
>           * from different chassis. */
>          if (op->od->n_l3dgw_ports) {
> +            const struct ovn_port *l3dgw_port = (is_l3dgw_port(op)
> +                                                 ? op
> +                                                 :
op->od->l3dgw_ports[0]);

We should assert here as well. It is ensured by the caller the op is a
l3dgw port.  Even the above "if" condition is unnecessary.

In addition, we should check here if the NAT external IP matches the l3dgw
port's subnet before adding the ARP responder flow.

>              ds_put_format(&match, "is_chassis_resident(%s)",
> -                          op->od->l3dgw_ports[0]->cr_port->json_key);
> +                          l3dgw_port->cr_port->json_key);
>          }
>      }
>
> @@ -11368,7 +11365,7 @@ build_check_pkt_len_flows_for_lrouter(
>   *
>   * For traffic with outport equal to the l3dgw_port
>   * on a distributed router, this table redirects a subset
> - * of the traffic to the l3redirect_port which represents
> + * of the traffic to the chassis-redirect port which represents
>   * the central instance of the l3dgw_port.
>   */
>  static void
> @@ -12116,7 +12113,8 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
>  static void
>  build_lrouter_in_unsnat_flow(struct hmap *lflows, struct ovn_datapath
*od,
>                               const struct nbrec_nat *nat, struct ds
*match,
> -                             struct ds *actions, bool distributed, bool
is_v6)
> +                             struct ds *actions, bool distributed, bool
is_v6,
> +                             struct ovn_port *l3dgw_port)
>  {
>      /* Ingress UNSNAT table: It is for already established connections'
>      * reverse traffic. i.e., SNAT has already been done in egress
> @@ -12155,12 +12153,12 @@ build_lrouter_in_unsnat_flow(struct hmap
*lflows, struct ovn_datapath *od,
>          ds_clear(actions);
>          ds_put_format(match, "ip && ip%s.dst == %s && inport == %s",
>                        is_v6 ? "6" : "4", nat->external_ip,
> -                      od->l3dgw_ports[0]->json_key);
> -        if (!distributed && od->n_l3dgw_ports) {
> +                      l3dgw_port->json_key);
> +        if (!distributed) {
>              /* Flows for NAT rules that are centralized are only
>              * programmed on the gateway chassis. */
>              ds_put_format(match, " && is_chassis_resident(%s)",
> -                          od->l3dgw_ports[0]->cr_port->json_key);
> +                          l3dgw_port->cr_port->json_key);
>          }
>
>          if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
> @@ -12180,7 +12178,8 @@ static void
>  build_lrouter_in_dnat_flow(struct hmap *lflows, struct ovn_datapath *od,
>                             const struct nbrec_nat *nat, struct ds *match,
>                             struct ds *actions, bool distributed,
> -                           ovs_be32 mask, bool is_v6)
> +                           ovs_be32 mask, bool is_v6,
> +                           struct ovn_port *l3dgw_port)
>  {
>      /* Ingress DNAT table: Packets enter the pipeline with destination
>      * IP address that needs to be DNATted from a external IP address
> @@ -12232,12 +12231,12 @@ build_lrouter_in_dnat_flow(struct hmap *lflows,
struct ovn_datapath *od,
>              ds_clear(match);
>              ds_put_format(match, "ip && ip%s.dst == %s && inport == %s",
>                            is_v6 ? "6" : "4", nat->external_ip,
> -                          od->l3dgw_ports[0]->json_key);
> +                          l3dgw_port->json_key);
>              if (!distributed && od->n_l3dgw_ports) {
>                  /* Flows for NAT rules that are centralized are only
>                  * programmed on the gateway chassis. */
>                  ds_put_format(match, " && is_chassis_resident(%s)",
> -                              od->l3dgw_ports[0]->cr_port->json_key);
> +                              l3dgw_port->cr_port->json_key);
>              }
>              ds_clear(actions);
>              if (nat->allowed_ext_ips || nat->exempted_ext_ips) {
> @@ -12267,7 +12266,8 @@ static void
>  build_lrouter_out_undnat_flow(struct hmap *lflows, struct ovn_datapath
*od,
>                                const struct nbrec_nat *nat, struct ds
*match,
>                                struct ds *actions, bool distributed,
> -                              struct eth_addr mac, bool is_v6)
> +                              struct eth_addr mac, bool is_v6,
> +                              struct ovn_port *l3dgw_port)
>  {
>      /* Egress UNDNAT table: It is for already established connections'
>      * reverse traffic. i.e., DNAT has already been done in ingress
> @@ -12284,12 +12284,12 @@ build_lrouter_out_undnat_flow(struct hmap
*lflows, struct ovn_datapath *od,
>      ds_clear(match);
>      ds_put_format(match, "ip && ip%s.src == %s && outport == %s",
>                    is_v6 ? "6" : "4", nat->logical_ip,
> -                  od->l3dgw_ports[0]->json_key);
> -    if (!distributed && od->n_l3dgw_ports) {
> +                  l3dgw_port->json_key);
> +    if (!distributed) {
>          /* Flows for NAT rules that are centralized are only
>          * programmed on the gateway chassis. */
>          ds_put_format(match, " && is_chassis_resident(%s)",
> -                      od->l3dgw_ports[0]->cr_port->json_key);
> +                      l3dgw_port->cr_port->json_key);
>      }
>      ds_clear(actions);
>      if (distributed) {
> @@ -12315,7 +12315,8 @@ build_lrouter_out_snat_flow(struct hmap *lflows,
struct ovn_datapath *od,
>                              const struct nbrec_nat *nat, struct ds
*match,
>                              struct ds *actions, bool distributed,
>                              struct eth_addr mac, ovs_be32 mask,
> -                            int cidr_bits, bool is_v6)
> +                            int cidr_bits, bool is_v6,
> +                            struct ovn_port *l3dgw_port)
>  {
>      /* Egress SNAT table: Packets enter the egress pipeline with
>      * source ip address that needs to be SNATted to a external ip
> @@ -12362,13 +12363,13 @@ build_lrouter_out_snat_flow(struct hmap
*lflows, struct ovn_datapath *od,
>          ds_clear(match);
>          ds_put_format(match, "ip && ip%s.src == %s && outport == %s",
>                        is_v6 ? "6" : "4", nat->logical_ip,
> -                      od->l3dgw_ports[0]->json_key);
> +                      l3dgw_port->json_key);
>          if (!distributed && od->n_l3dgw_ports) {
>              /* Flows for NAT rules that are centralized are only
>              * programmed on the gateway chassis. */
>              priority += 128;
>              ds_put_format(match, " && is_chassis_resident(%s)",
> -                          od->l3dgw_ports[0]->cr_port->json_key);
> +                          l3dgw_port->cr_port->json_key);
>          }
>          ds_clear(actions);
>
> @@ -12407,13 +12408,14 @@ static void
>  build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od,
>                             const struct nbrec_nat *nat, struct ds *match,
>                             struct ds *actions, struct eth_addr mac,
> -                           bool distributed, bool is_v6)
> +                           bool distributed, bool is_v6,
> +                           struct ovn_port *l3dgw_port)
>  {
>      if (od->n_l3dgw_ports && !strcmp(nat->type, "snat")) {
>          ds_clear(match);
>          ds_put_format(
>              match, "inport == %s && %s == %s",
> -            od->l3dgw_ports[0]->json_key,
> +            l3dgw_port->json_key,
>              is_v6 ? "ip6.src" : "ip4.src", nat->external_ip);
>          ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT,
>                                  120, ds_cstr(match), "next;",
> @@ -12431,16 +12433,16 @@ build_lrouter_ingress_flow(struct hmap *lflows,
struct ovn_datapath *od,
>          */
>          ds_clear(actions);
>
> -        build_check_pkt_len_action_string(od->l3dgw_ports[0], actions);
> +        build_check_pkt_len_action_string(l3dgw_port, actions);
>          ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
> -                      od->l3dgw_ports[0]->lrp_networks.ea_s);
> +                      l3dgw_port->lrp_networks.ea_s);
>
>          ds_clear(match);
>          ds_put_format(match,
>                        "eth.dst == "ETH_ADDR_FMT" && inport == %s"
>                        " && is_chassis_resident(\"%s\")",
>                        ETH_ADDR_ARGS(mac),
> -                      od->l3dgw_ports[0]->json_key,
> +                      l3dgw_port->json_key,
>                        nat->logical_port);
>          ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50,
>                                  ds_cstr(match), ds_cstr(actions),
> @@ -12451,7 +12453,8 @@ build_lrouter_ingress_flow(struct hmap *lflows,
struct ovn_datapath *od,
>  static int
>  lrouter_check_nat_entry(struct ovn_datapath *od, const struct nbrec_nat
*nat,
>                          ovs_be32 *mask, bool *is_v6, int *cidr_bits,
> -                        struct eth_addr *mac, bool *distributed)
> +                        struct eth_addr *mac, bool *distributed,
> +                        struct ovn_port **nat_l3dgw_port)
>  {
>      struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
>      ovs_be32 ip;
> @@ -12485,6 +12488,55 @@ lrouter_check_nat_entry(struct ovn_datapath *od,
const struct nbrec_nat *nat,
>          *is_v6 = true;
>      }
>
> +    /* Get the l3dgw port (if present) corresponding to the external IP
> +     * of the NAT rule. */
> +    *nat_l3dgw_port = NULL;
> +
> +    for (size_t i = 0; i < od->n_l3dgw_ports; i++) {
> +        struct ovn_port *l3dgw_port = od->l3dgw_ports[i];
> +        struct lport_addresses lrp_networks;
> +
> +        if (!extract_lrp_networks(l3dgw_port->nbrp, &lrp_networks)) {
> +            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1,
1);
> +            VLOG_WARN_RL(&rl, "Extract addresses failed.");
> +            continue;
> +        }
> +
> +        if (*is_v6) {
> +            for (int j = 0; j < lrp_networks.n_ipv6_addrs; j++) {
> +                struct ipv6_netaddr *lrp6_addr =
&(lrp_networks.ipv6_addrs[j]);
> +                struct in6_addr ip6_mask =
ipv6_addr_bitand(&lrp6_addr->mask,
> +                                                            &ipv6);
> +
> +                if (ipv6_addr_equals(&ip6_mask, &(lrp6_addr->network))) {
> +                    destroy_lport_addresses(&lrp_networks);
> +                    *nat_l3dgw_port = l3dgw_port;
> +                    goto next;
> +                }
> +            }
> +        } else {
> +            for (int j = 0; j < lrp_networks.n_ipv4_addrs; j++) {
> +                struct ipv4_netaddr *lrp4_addr =
> +                                    &(lrp_networks.ipv4_addrs[j]);
> +
> +                if ((ip & lrp4_addr->mask) == lrp4_addr->network) {
> +                    destroy_lport_addresses(&lrp_networks);
> +                    *nat_l3dgw_port = l3dgw_port;
> +                    goto next;
> +                }
> +            }
> +        }
> +    }

In the above loop the lrp_networks may have memory leak, because
destroy_lport_addresses() is called at most once, but the
extract_lrp_networks() is called in every iteration.
For this reason, it is better not using "goto", but using "break" and do
some cleanup in the outer loop.

> +
> +next:
> +    if (od->n_l3dgw_ports && *nat_l3dgw_port == NULL) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
> +        VLOG_WARN_RL(&rl, "Could not map NAT external ip: %s to a "
> +                     "distributed gateway port in router "UUID_FMT"",
> +                     nat->external_ip, UUID_ARGS(&od->key));
> +        return -EINVAL;
> +    }
> +
>      /* Check the validity of nat->logical_ip. 'logical_ip' can
>      * be a subnet when the type is "snat". */
>      if (*is_v6) {
> @@ -12581,7 +12633,7 @@ build_lrouter_nat_defrag_and_lb(struct
ovn_datapath *od, struct hmap *lflows,
>      ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;");
>
>      /* NAT rules are only valid on Gateway routers and routers with
> -     * l3dgw_port (router has a port with gateway chassis
> +     * l3dgw_ports (router has port(s) with gateway chassis
>       * specified). */
>      if (!od->is_gw_router && !od->n_l3dgw_ports) {
>          return;
> @@ -12600,18 +12652,19 @@ build_lrouter_nat_defrag_and_lb(struct
ovn_datapath *od, struct hmap *lflows,
>          bool is_v6, distributed;
>          ovs_be32 mask;
>          int cidr_bits;
> +        struct ovn_port *l3dgw_port;
>
>          if (lrouter_check_nat_entry(od, nat, &mask, &is_v6, &cidr_bits,
> -                                    &mac, &distributed) < 0) {
> +                                    &mac, &distributed, &l3dgw_port) <
0) {
>              continue;
>          }
>
>          /* S_ROUTER_IN_UNSNAT */
>          build_lrouter_in_unsnat_flow(lflows, od, nat, match, actions,
distributed,
> -                                     is_v6);
> +                                     is_v6, l3dgw_port);
>          /* S_ROUTER_IN_DNAT */
>          build_lrouter_in_dnat_flow(lflows, od, nat, match, actions,
distributed,
> -                                   mask, is_v6);
> +                                   mask, is_v6, l3dgw_port);
>
>          /* ARP resolve for NAT IPs. */
>          if (od->is_gw_router) {
> @@ -12624,14 +12677,14 @@ build_lrouter_nat_defrag_and_lb(struct
ovn_datapath *od, struct hmap *lflows,
>                  ds_clear(match);
>                  ds_put_format(
>                      match, "outport == %s && %s == %s",
> -                    od->l3dgw_ports[0]->json_key,
> +                    l3dgw_port->json_key,
>                      is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4,
>                      nat->external_ip);
>                  ds_clear(actions);
>                  ds_put_format(
>                      actions, "eth.dst = %s; next;",
>                      distributed ? nat->external_mac :
> -                    od->l3dgw_ports[0]->lrp_networks.ea_s);
> +                    l3dgw_port->lrp_networks.ea_s);
>                  ovn_lflow_add_with_hint(lflows, od,
>                                          S_ROUTER_IN_ARP_RESOLVE,
>                                          100, ds_cstr(match),
> @@ -12643,14 +12696,14 @@ build_lrouter_nat_defrag_and_lb(struct
ovn_datapath *od, struct hmap *lflows,
>
>          /* S_ROUTER_OUT_UNDNAT */
>          build_lrouter_out_undnat_flow(lflows, od, nat, match, actions,
distributed,
> -                                      mac, is_v6);
> +                                      mac, is_v6, l3dgw_port);
>          /* S_ROUTER_OUT_SNAT */
>          build_lrouter_out_snat_flow(lflows, od, nat, match, actions,
distributed,
> -                                    mac, mask, cidr_bits, is_v6);
> +                                    mac, mask, cidr_bits, is_v6,
l3dgw_port);
>
>          /* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */
>          build_lrouter_ingress_flow(lflows, od, nat, match, actions,
> -                                   mac, distributed, is_v6);
> +                                   mac, distributed, is_v6, l3dgw_port);
>
>          /* Ingress Gateway Redirect Table: For NAT on a distributed
>           * router, add flows that are specific to a NAT rule.  These
> @@ -12667,7 +12720,7 @@ build_lrouter_nat_defrag_and_lb(struct
ovn_datapath *od, struct hmap *lflows,
>              ds_put_format(match,
>                            "ip%s.src == %s && outport == %s",
>                            is_v6 ? "6" : "4", nat->logical_ip,
> -                          od->l3dgw_ports[0]->json_key);
> +                          l3dgw_port->json_key);
>              /* Add a rule to drop traffic from a distributed NAT if
>               * the virtual port has not claimed yet becaused otherwise
>               * the traffic will be centralized misconfiguring the TOR
switch.
> @@ -12700,10 +12753,10 @@ build_lrouter_nat_defrag_and_lb(struct
ovn_datapath *od, struct hmap *lflows,
>              ds_put_format(match, "ip%s.dst == %s && outport == %s",
>                            is_v6 ? "6" : "4",
>                            nat->external_ip,
> -                          od->l3dgw_ports[0]->json_key);
> +                          l3dgw_port->json_key);
>              if (!distributed) {
>                  ds_put_format(match, " && is_chassis_resident(%s)",
> -                              od->l3dgw_ports[0]->cr_port->json_key);
> +                              l3dgw_port->cr_port->json_key);
>              } else {
>                  ds_put_format(match, " && is_chassis_resident(\"%s\")",
>                                nat->logical_port);
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index 39f4eaa0c..ff585e13f 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -2837,10 +2837,10 @@ icmp6 {
>            ip4.dst == <var>B</var> && inport ==
<var>GW</var></code> or
>            <code>ip &&
>            ip6.dst == <var>B</var> && inport ==
<var>GW</var></code>
> -          where <var>GW</var> is the logical router gateway port, with an
> -          action <code>ct_snat;</code>. If the NAT rule is of type
> -          dnat_and_snat and has <code>stateless=true</code> in the
> -          options, then the action would be <code>ip4/6.dst=
> +          where <var>GW</var> is the logical router gateway port
corresponding
> +          to IP <var>B</var>, with an action <code>ct_snat;</code>. If
the NAT
> +          rule is of type dnat_and_snat and has
<code>stateless=true</code> in
> +          the options, then the action would be <code>ip4/6.dst=
>            (<var>B</var>)</code>.
>          </p>
>
> @@ -3115,9 +3115,10 @@ icmp6 {
>            to change the destination IP address of a packet from
<var>A</var> to
>            <var>B</var>, a priority-100 flow matches <code>ip &&
>            ip4.dst == <var>B</var> && inport ==
<var>GW</var></code>,
> -          where <var>GW</var> is the logical router gateway port, with an
> -          action <code>ct_dnat(<var>B</var>);</code>.  The match will
> -          include <code>ip6.dst == <var>B</var></code> in the IPv6 case.
> +          where <var>GW</var> is the logical router gateway port
corresponding
> +          to IP <var>A</var>, with an action
> +          <code>ct_dnat(<var>B</var>);</code>.  The match will include
> +          <code>ip6.dst == <var>B</var></code> in the IPv6 case.
>            If the NAT rule is of type dnat_and_snat and has
>            <code>stateless=true</code> in the options, then the action
>            would be <code>ip4/6.dst=(<var>B</var>)</code>.
> @@ -3876,10 +3877,11 @@ icmp6 {
>          flow with match <code>ip4.src == <var>B</var> &&
>          outport == <var>GW</var></code> &&
>          is_chassis_resident(<var>P</var>), where <var>GW</var> is
> -        the logical router distributed gateway port and <var>P</var>
> -        is the NAT logical port. IP traffic matching the above rule
> -        will be managed locally setting <code>reg1</code> to <var>C</var>
> -        and <code>eth.src</code> to <var>D</var>, where <var>C</var> is
NAT
> +        the logical router distributed gateway port corresponding to the
> +        NAT external IP and <var>P</var> is the NAT logical port. IP
traffic
> +        matching the above rule will be managed locally setting
> +        <code>reg1</code> to <var>C</var> and
> +        <code>eth.src</code> to <var>D</var>, where <var>C</var> is NAT
>          external ip and <var>D</var> is NAT external mac.
>        </li>
>
> @@ -4282,8 +4284,9 @@ nd_ns {
>            outport == <var>GW</var> &&
>            is_chassis_resident(<var>P</var>)</code>, where <var>E</var>
is the
>            external IP address specified in the NAT rule, <var>GW</var>
> -          is the logical router distributed gateway port. For
dnat_and_snat
> -          NAT rule, <var>P</var> is the logical port specified in the
NAT rule.
> +          is the logical router distributed gateway port corresponding
to the
> +          NAT external IP. For dnat_and_snat NAT rule, <var>P</var> is
the
> +          logical port specified in the NAT rule.
>            If <ref column="logical_port"
>            table="NAT" db="OVN_Northbound"/> column of
>            <ref table="NAT" db="OVN_Northbound"/> table is NOT set, then
> diff --git a/ovn-architecture.7.xml b/ovn-architecture.7.xml
> index 3d2910358..e1209dca8 100644
> --- a/ovn-architecture.7.xml
> +++ b/ovn-architecture.7.xml
> @@ -733,9 +733,9 @@
>
>    <p>
>      A logical router can have multiple distributed gateway ports, each
> -    connecting different external networks. However, some features, such
as NAT
> -    and load balancers, are not supported yet for logical routers with
more
> -    than one distributed gateway port configured.
> +    connecting different external networks. Load balancing is not yet
> +    supported for logical routers with more than one distributed gateway
> +    port configured.
>    </p>
>
>    <h4>Physical VLAN MTU Issues</h4>
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 390cc5a44..bd366b5b0 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -2504,8 +2504,8 @@
>        <p>
>          There can be more than one distributed gateway ports configured
>          on each logical router, each connecting to different L2 segments.
> -        However, features such as NAT and load-balancer are not supported
> -        on logical routers with more than one distributed gateway ports.
> +        Load-balancing is not yet supported on logical routers with more
> +        than one distributed gateway ports.
>        </p>
>
>        <p>
> diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
> index 9b80ae410..c7a80338c 100644
> --- a/tests/ovn-nbctl.at
> +++ b/tests/ovn-nbctl.at
> @@ -743,7 +743,45 @@ AT_CHECK([ovn-nbctl lr-nat-update-ext-ip lr0 snat
192.168.16 allowed_range], [1]
>  [ovn-nbctl: 192.168.16: Invalid IP address or CIDR
>  ])
>
> -AT_CHECK([ovn-nbctl lr-nat-del lr0])])
> +AT_CHECK([ovn-nbctl lr-nat-del lr0])
> +
> +AT_CHECK([ovn-nbctl lrp-add lr0 lrp0 00:00:00:01:02:03 192.168.1.1/24])
> +AT_CHECK([ovn-nbctl lrp-add lr0 lrp1 00:00:00:01:02:04 172.64.1.1/24])
> +AT_CHECK([ovn-nbctl lrp-set-gateway-chassis lrp0 chassis1])
> +AT_CHECK([ovn-nbctl lrp-set-gateway-chassis lrp1 chassis2])
> +
> +AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 192.168.1.10 20.0.0.10])
> +AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 172.64.1.10 20.0.0.10])
> +AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 192.168.1.20 20.0.0.10], [1], [],
> +[ovn-nbctl: a NAT with this type (snat) and logical_ip (20.0.0.10)
already exists
> +])
> +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 172.64.1.20 20.0.0.20])
> +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 172.64.1.20 20.0.0.30], [1], [],
> +[ovn-nbctl: a NAT with this type (dnat) and external_ip (172.64.1.20)
already exists
> +])
> +AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl
> +TYPE             EXTERNAL_IP        EXTERNAL_PORT    LOGICAL_IP
   EXTERNAL_MAC         LOGICAL_PORT
> +dnat             172.64.1.20                         20.0.0.20
> +snat             172.64.1.10                         20.0.0.10
> +snat             192.168.1.10                        20.0.0.10
> +])
> +AT_CHECK([ovn-nbctl lr-nat-del lr0 snat 20.0.0.10 172.64.1.10])
> +AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl
> +TYPE             EXTERNAL_IP        EXTERNAL_PORT    LOGICAL_IP
   EXTERNAL_MAC         LOGICAL_PORT
> +dnat             172.64.1.20                         20.0.0.20
> +snat             192.168.1.10                        20.0.0.10
> +])
> +AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 172.64.1.10 20.0.0.10])
> +AT_CHECK([ovn-nbctl lr-nat-del lr0 snat 20.0.0.10])
> +AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl
> +TYPE             EXTERNAL_IP        EXTERNAL_PORT    LOGICAL_IP
   EXTERNAL_MAC         LOGICAL_PORT
> +dnat             172.64.1.20                         20.0.0.20
> +])
> +AT_CHECK([ovn-nbctl lr-nat-del lr0 snat 20.0.0.10], [1], [],
> +[ovn-nbctl: no matching NAT with the type (snat) and logical_ip
(20.0.0.10)
> +])
> +AT_CHECK([ovn-nbctl lr-nat-del lr0])
> +])
>
>  dnl ---------------------------------------------------------------------
>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 5de554455..7d387ad6f 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -847,7 +847,7 @@ ovn_start
>  ovn-sbctl chassis-add gw1 geneve 127.0.0.1
>
>  ovn-nbctl lr-add R1
> -ovn-nbctl lrp-add R1 R1-S1 02:ac:10:01:00:01 172.16.1.1/24
> +ovn-nbctl lrp-add R1 R1-S1 02:ac:10:01:00:01 172.16.1.1/24 3000::a/64
>
>  ovn-nbctl ls-add S1
>  ovn-nbctl lsp-add S1 S1-R1
> @@ -888,13 +888,13 @@ ovn-nbctl lr-nat-del R1 dnat_and_snat  172.16.1.1
>
>  echo
>  echo "IPv6: stateful"
> -ovn-nbctl --wait=sb lr-nat-add R1 dnat_and_snat fd01::1 fd11::2
> +ovn-nbctl --wait=sb lr-nat-add R1 dnat_and_snat 3000::c 1000::3
>  check_flow_match_sets 2 2 3 0 0 0 0
> -ovn-nbctl lr-nat-del R1 dnat_and_snat  fd01::1
> +ovn-nbctl lr-nat-del R1 dnat_and_snat 3000::c
>
>  echo
>  echo "IPv6: stateless"
> -ovn-nbctl --wait=sb --stateless lr-nat-add R1 dnat_and_snat fd01::1
fd11::2
> +ovn-nbctl --wait=sb --stateless lr-nat-add R1 dnat_and_snat 3000::c
1000::3
>  check_flow_match_sets 2 0 1 0 0 2 2
>
>  AT_CLEANUP
> @@ -3952,16 +3952,16 @@ check ovn-nbctl lsp-set-type lrp1-attachment
router
>  check ovn-nbctl lsp-set-addresses lrp1-attachment 00:00:00:00:ff:02
>  check ovn-nbctl lsp-set-options lrp1-attachment router-port=lrp1
>
> -check ovn-nbctl lr-nat-add lr0 dnat 42.42.42.42 192.168.0.2
> +check ovn-nbctl lr-nat-add lr0 dnat 11.0.0.42 192.168.0.2
>  check ovn-nbctl --wait=sb sync
>
> -AT_CHECK([ovn-trace --minimal 'inport == "sw1-port1" && eth.src ==
50:54:00:00:00:03 && eth.dst == 00:00:00:00:ff:02 && ip4.dst == 42.42.42.42
&& ip4.src == 11.0.0.2 && ip.ttl == 64' | grep "output(\"sw0-port1\")"],
[0], [ignore])
> +AT_CHECK([ovn-trace --minimal 'inport == "sw1-port1" && eth.src ==
50:54:00:00:00:03 && eth.dst == 00:00:00:00:ff:02 && ip4.dst == 11.0.0.42
&& ip4.src == 11.0.0.2 && ip.ttl == 64' | grep "output(\"sw0-port1\")"],
[0], [ignore])
>
>  dnl If we remove the DNAT entry we will be unable to trace to the DNAT
address
> -check ovn-nbctl lr-nat-del lr0 dnat 42.42.42.42
> +check ovn-nbctl lr-nat-del lr0 dnat 11.0.0.42
>  check ovn-nbctl --wait=sb sync
>
> -AT_CHECK([ovn-trace --minimal 'inport == "sw1-port1" && eth.src ==
50:54:00:00:00:03 && eth.dst == 00:00:00:00:ff:02 && ip4.dst == 42.42.42.42
&& ip4.src == 11.0.0.2 && ip.ttl == 64' | grep "output(\"sw0-port1\")"],
[1], [ignore])
> +AT_CHECK([ovn-trace --minimal 'inport == "sw1-port1" && eth.src ==
50:54:00:00:00:03 && eth.dst == 00:00:00:00:ff:02 && ip4.dst == 11.0.0.42
&& ip4.src == 11.0.0.2 && ip.ttl == 64' | grep "output(\"sw0-port1\")"],
[1], [ignore])
>
>  AT_CLEANUP
>  ])
> @@ -3991,16 +3991,16 @@ check ovn-nbctl lsp-set-type lrp1-attachment
router
>  check ovn-nbctl lsp-set-addresses lrp1-attachment 00:00:00:00:ff:02
>  check ovn-nbctl lsp-set-options lrp1-attachment router-port=lrp1
>
> -check ovn-nbctl lr-nat-add lr0 dnat fd42::42 fd68::2
> +check ovn-nbctl lr-nat-add lr0 dnat fd11::42 fd68::2
>  check ovn-nbctl --wait=sb sync
>
> -AT_CHECK([ovn-trace --minimal 'inport == "sw1-port1" && eth.src ==
50:54:00:00:00:03 && eth.dst == 00:00:00:00:ff:02 && ip6.dst == fd42::42 &&
ip6.src == fd11::2 && ip.ttl == 64' | grep "output(\"sw0-port1\")"], [0],
[ignore])
> +AT_CHECK([ovn-trace --minimal 'inport == "sw1-port1" && eth.src ==
50:54:00:00:00:03 && eth.dst == 00:00:00:00:ff:02 && ip6.dst == fd11::42 &&
ip6.src == fd11::2 && ip.ttl == 64' | grep "output(\"sw0-port1\")"], [0],
[ignore])
>
>  dnl If we remove the DNAT entry we will be unable to trace to the DNAT
address
> -check ovn-nbctl lr-nat-del lr0 dnat fd42::42
> +check ovn-nbctl lr-nat-del lr0 dnat fd11::42
>  check ovn-nbctl --wait=sb sync
>
> -AT_CHECK([ovn-trace --minimal 'inport == "sw1-port1" && eth.src ==
50:54:00:00:00:03 && eth.dst == 00:00:00:00:ff:02 && ip6.dst == fd42::42 &&
ip6.src == fd11::2 && ip.ttl == 64' | grep "output(\"sw0-port1\")"], [1],
[ignore])
> +AT_CHECK([ovn-trace --minimal 'inport == "sw1-port1" && eth.src ==
50:54:00:00:00:03 && eth.dst == 00:00:00:00:ff:02 && ip6.dst == fd11::42 &&
ip6.src == fd11::2 && ip.ttl == 64' | grep "output(\"sw0-port1\")"], [1],
[ignore])
>
>  AT_CLEANUP
>  ])
> @@ -5223,3 +5223,154 @@ AT_CHECK([grep lr_in_gw_redirect lrflows | grep
cr-DR | sed 's/table=../table=??
>
>  AT_CLEANUP
>  ])
> +
> +AT_SETUP([ovn-northd -- lr multiple gw ports NAT])
> +AT_KEYWORDS([multiple-l3dgw-ports])
> +ovn_start
> +
> +# Logical network:
> +# 1 Logical Router, 3 bridged Logical Switches,
> +# 1 gateway chassis attached to each corresponding LRP.
> +#
> +#                | S1 (gw1)
> +#                |
> +#      ls  ----  DR -- S3 (gw3)
> +# (20.0.0.0/24)  |
> +#                | S2 (gw2)
> +#
> +# Validate SNAT, DNAT and DNAT_AND_SNAT behavior with multiple
> +# distributed gateway LRPs.
> +
> +check ovn-sbctl chassis-add gw1 geneve 127.0.0.1
> +check ovn-sbctl chassis-add gw2 geneve 128.0.0.1
> +check ovn-sbctl chassis-add gw3 geneve 129.0.0.1
> +
> +check ovn-nbctl lr-add DR
> +check ovn-nbctl lrp-add DR DR-S1 02:ac:10:01:00:01 172.16.1.1/24
> +check ovn-nbctl lrp-add DR DR-S2 03:ac:10:01:00:01 10.0.0.1/24
> +check ovn-nbctl lrp-add DR DR-S3 04:ac:10:01:00:01 192.168.0.1/24
> +check ovn-nbctl lrp-add DR DR-ls 05:ac:10:01:00:01 20.0.0.0/24
> +
> +check ovn-nbctl ls-add S1
> +check ovn-nbctl lsp-add S1 S1-DR
> +check ovn-nbctl lsp-set-type S1-DR router
> +check ovn-nbctl lsp-set-addresses S1-DR router
> +check ovn-nbctl --wait=sb lsp-set-options S1-DR router-port=DR-S1
> +
> +check ovn-nbctl ls-add S2
> +check ovn-nbctl lsp-add S2 S2-DR
> +check ovn-nbctl lsp-set-type S2-DR router
> +check ovn-nbctl lsp-set-addresses S2-DR router
> +check ovn-nbctl --wait=sb lsp-set-options S2-DR router-port=DR-S2
> +
> +check ovn-nbctl ls-add S3
> +check ovn-nbctl lsp-add S3 S3-DR
> +check ovn-nbctl lsp-set-type S3-DR router
> +check ovn-nbctl lsp-set-addresses S3-DR router
> +check ovn-nbctl --wait=sb lsp-set-options S3-DR router-port=DR-S3
> +
> +check ovn-nbctl ls-add  ls
> +check ovn-nbctl lsp-add ls ls-DR
> +check ovn-nbctl lsp-set-type ls-DR router
> +check ovn-nbctl lsp-set-addresses ls-DR router
> +check ovn-nbctl --wait=sb lsp-set-options ls-DR router-port=DR-ls
> +
> +check ovn-nbctl lrp-set-gateway-chassis DR-S1 gw1
> +check ovn-nbctl lrp-set-gateway-chassis DR-S2 gw2
> +check ovn-nbctl lrp-set-gateway-chassis DR-S3 gw3
> +
> +check ovn-nbctl --wait=sb sync
> +
> +# Configure SNAT
> +check ovn-nbctl lr-nat-add DR snat  172.16.1.10  20.0.0.10
> +check ovn-nbctl lr-nat-add DR snat  10.0.0.10    20.0.0.10
> +check ovn-nbctl lr-nat-add DR snat  192.168.0.10 20.0.0.10
> +check ovn-nbctl lr-nat-add DR snat  192.168.123.10 20.0.0.10
> +
> +ovn-sbctl dump-flows DR > lrflows
> +AT_CAPTURE_FILE([lrflows])
> +
> +check_lr_in_unsnat_flows() {
> +    AT_CHECK([grep lr_in_unsnat lrflows | grep ct_snat | sed
's/table=../table=??/' | sort], [0], [dnl
> +  table=??(lr_in_unsnat       ), priority=100  , match=(ip && ip4.dst ==
10.0.0.10 && inport == "DR-S2" && is_chassis_resident("cr-DR-S2")),
action=(ct_snat;)
> +  table=??(lr_in_unsnat       ), priority=100  , match=(ip && ip4.dst ==
172.16.1.10 && inport == "DR-S1" && is_chassis_resident("cr-DR-S1")),
action=(ct_snat;)
> +  table=??(lr_in_unsnat       ), priority=100  , match=(ip && ip4.dst ==
192.168.0.10 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")),
action=(ct_snat;)
> +])
> +}
> +
> +check_lr_out_snat_flows() {
> +    AT_CHECK([grep lr_out_snat lrflows | grep ct_snat | sed
's/table=../table=??/' | sort], [0], [dnl
> +  table=??(lr_out_snat        ), priority=161  , match=(ip && ip4.src ==
20.0.0.10 && outport == "DR-S1" && is_chassis_resident("cr-DR-S1")),
action=(ct_snat(172.16.1.10);)
> +  table=??(lr_out_snat        ), priority=161  , match=(ip && ip4.src ==
20.0.0.10 && outport == "DR-S2" && is_chassis_resident("cr-DR-S2")),
action=(ct_snat(10.0.0.10);)
> +  table=??(lr_out_snat        ), priority=161  , match=(ip && ip4.src ==
20.0.0.10 && outport == "DR-S3" && is_chassis_resident("cr-DR-S3")),
action=(ct_snat(192.168.0.10);)
> +])
> +}
> +
> +check_lr_in_unsnat_flows
> +check_lr_out_snat_flows
> +
> +check ovn-nbctl lr-nat-del DR snat 20.0.0.10
> +AT_CHECK([ovn-sbctl dump-flows DR | grep -e lr_in_unsnat -e lr_out_snat
| grep ct_snat | wc -l], [0], [0
> +])
> +
> +# Configure DNAT
> +check ovn-nbctl lr-nat-add DR dnat  172.16.1.10  20.0.0.10
> +check ovn-nbctl lr-nat-add DR dnat  10.0.0.10    20.0.0.10
> +check ovn-nbctl lr-nat-add DR dnat  192.168.0.10 20.0.0.10
> +check ovn-nbctl lr-nat-add DR dnat  192.168.123.10 20.0.0.10
> +
> +ovn-sbctl dump-flows DR > lrflows
> +AT_CAPTURE_FILE([lrflows])
> +
> +check_lr_in_dnat_flows() {
> +    AT_CHECK([grep lr_in_dnat lrflows | grep ct_dnat | sed
's/table=../table=??/' | sort], [0], [dnl
> +  table=??(lr_in_dnat         ), priority=100  , match=(ip && ip4.dst ==
10.0.0.10 && inport == "DR-S2" && is_chassis_resident("cr-DR-S2")),
action=(ct_dnat(20.0.0.10);)
> +  table=??(lr_in_dnat         ), priority=100  , match=(ip && ip4.dst ==
172.16.1.10 && inport == "DR-S1" && is_chassis_resident("cr-DR-S1")),
action=(ct_dnat(20.0.0.10);)
> +  table=??(lr_in_dnat         ), priority=100  , match=(ip && ip4.dst ==
192.168.0.10 && inport == "DR-S3" && is_chassis_resident("cr-DR-S3")),
action=(ct_dnat(20.0.0.10);)
> +])
> +}
> +
> +check_lr_out_undnat_flows() {
> +    AT_CHECK([grep lr_out_undnat lrflows | grep ct_dnat | sed
's/table=../table=??/' | sort], [0], [dnl
> +  table=??(lr_out_undnat      ), priority=100  , match=(ip && ip4.src ==
20.0.0.10 && outport == "DR-S1" && is_chassis_resident("cr-DR-S1")),
action=(ct_dnat;)
> +  table=??(lr_out_undnat      ), priority=100  , match=(ip && ip4.src ==
20.0.0.10 && outport == "DR-S2" && is_chassis_resident("cr-DR-S2")),
action=(ct_dnat;)
> +  table=??(lr_out_undnat      ), priority=100  , match=(ip && ip4.src ==
20.0.0.10 && outport == "DR-S3" && is_chassis_resident("cr-DR-S3")),
action=(ct_dnat;)
> +  table=??(lr_out_undnat      ), priority=50   , match=(ip),
action=(flags.loopback = 1; ct_dnat;)
> +])
> +}
> +
> +check_lr_in_dnat_flows
> +check_lr_out_undnat_flows
> +
> +check ovn-nbctl lr-nat-del DR dnat  172.16.1.10
> +check ovn-nbctl lr-nat-del DR dnat  10.0.0.10
> +check ovn-nbctl lr-nat-del DR dnat  192.168.0.10
> +check ovn-nbctl lr-nat-del DR dnat  192.168.123.10
> +
> +AT_CHECK([ovn-sbctl dump-flows DR | grep -e lr_in_dnat -e lr_out_undnat
| grep ct_dnat | wc -l], [0], [0
> +])
> +
> +# Configure DNAT_AND_SNAT
> +check ovn-nbctl lr-nat-add DR dnat_and_snat  172.16.1.10    20.0.0.10
> +check ovn-nbctl lr-nat-add DR dnat_and_snat  10.0.0.10      20.0.0.10
> +check ovn-nbctl lr-nat-add DR dnat_and_snat  192.168.0.10   20.0.0.10
> +check ovn-nbctl lr-nat-add DR dnat_and_snat  192.168.123.10   20.0.0.10
> +
> +ovn-sbctl dump-flows DR > lrflows
> +AT_CAPTURE_FILE([lrflows])
> +
> +check_lr_in_unsnat_flows
> +check_lr_out_snat_flows
> +check_lr_in_dnat_flows
> +check_lr_out_undnat_flows
> +
> +check ovn-nbctl lr-nat-del DR dnat_and_snat  172.16.1.10
> +check ovn-nbctl lr-nat-del DR dnat_and_snat  10.0.0.10
> +check ovn-nbctl lr-nat-del DR dnat_and_snat  192.168.0.10
> +check ovn-nbctl lr-nat-del DR dnat_and_snat  192.168.123.10
> +
> +AT_CHECK([ovn-sbctl dump-flows DR | grep -e lr_in_unsnat -e lr_out_snat
-e lr_in_dnat -e lr_out_undnat | grep ct_snat| wc -l], [0], [0
> +])
> +
> +AT_CLEANUP
> +])

In this test case it is better to check GARP related addresses (in SB
port-binding), and the ARP responder flows. Make sure the addresses/flows
match DGP's subnet.
Do we need an integration test (or even system test)?

> diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
> index 10217dcd5..89507c617 100644
> --- a/utilities/ovn-nbctl.c
> +++ b/utilities/ovn-nbctl.c
> @@ -373,7 +373,7 @@ NAT commands:\n\
>    lr-nat-add ROUTER TYPE EXTERNAL_IP LOGICAL_IP [LOGICAL_PORT
EXTERNAL_MAC]\n\
>                              [EXTERNAL_PORT_RANGE]\n\
>                              add a NAT to ROUTER\n\
> -  lr-nat-del ROUTER [TYPE [IP]]\n\
> +  lr-nat-del ROUTER [TYPE [IP [EXTERNAL_IP]]]\n\

It's better to have same order of the parameters as in "lr-nat-add", i.e.
[[EXTERNAL_IP] LOGICAL_IP]. Otherwise it is easy to mislead users. The code
should be able to figure out, if there is only one IP, then it is
LOGICAL_IP, otherwise if there are two, then the first one is EXTERNAL IP.

In addition, the man page needs to be updated, too.

Thanks,
Han

>                              remove NATs from ROUTER\n\
>    lr-nat-list ROUTER        print NATs for ROUTER\n\
>  \n\
> @@ -4412,6 +4412,92 @@ nbctl_lr_route_del(struct ctl_context *ctx)
>      free(nexthop);
>  }
>
> +static bool
> +is_nat_rule_conflict(const struct nbrec_logical_router *lr,
> +                     char *new_external_ip, char *old_external_ip, bool
is_v6)
> +{
> +    int num_l3dgw_ports = 0;
> +    bool is_conflict = false;
> +
> +    struct lport_addresses old_external_ip_addr, new_external_ip_addr;
> +
> +    if (!extract_ip_addresses(new_external_ip, &new_external_ip_addr) ||
> +        !extract_ip_addresses(old_external_ip, &old_external_ip_addr)) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
> +        VLOG_WARN_RL(&rl, "Extract addresses failed.");
> +        return true;
> +    }
> +
> +    if (is_v6) {
> +        ovs_assert(new_external_ip_addr.n_ipv6_addrs == 1);
> +        ovs_assert(old_external_ip_addr.n_ipv6_addrs == 1);
> +    } else {
> +        ovs_assert(new_external_ip_addr.n_ipv4_addrs == 1);
> +        ovs_assert(old_external_ip_addr.n_ipv4_addrs == 1);
> +    }
> +
> +    for (size_t i = 0; i < lr->n_ports; i++) {
> +        const struct nbrec_logical_router_port *lrp = lr->ports[i];
> +        const struct nbrec_logical_router_port *old_port = NULL;
> +        const struct nbrec_logical_router_port *new_port = NULL;
> +        if (lrp->n_gateway_chassis) {
> +            num_l3dgw_ports++;
> +            struct lport_addresses lrp_addrs;
> +            if (!extract_lrp_networks(lrp, &lrp_addrs)) {
> +                static struct vlog_rate_limit rl =
VLOG_RATE_LIMIT_INIT(1, 1);
> +                VLOG_WARN_RL(&rl, "Extract addresses failed.");
> +                continue;
> +            }
> +
> +            if (is_v6) {
> +                for (int j = 0; j < lrp_addrs.n_ipv6_addrs; j++) {
> +                    struct ipv6_netaddr *lrp6_addr =
&lrp_addrs.ipv6_addrs[j];
> +                    struct in6_addr new_ip6_mask, old_ip6_mask;
> +                    new_ip6_mask = ipv6_addr_bitand(
> +                        &lrp6_addr->mask,
> +                        &new_external_ip_addr.ipv6_addrs[0].addr);
> +                    old_ip6_mask = ipv6_addr_bitand(
> +                        &lrp6_addr->mask,
> +                        &old_external_ip_addr.ipv6_addrs[0].addr);
> +                    if (ipv6_addr_equals(&new_ip6_mask,
> +                                         &(lrp6_addr->network))) {
> +                        new_port = lrp;
> +                    }
> +                    if (ipv6_addr_equals(&old_ip6_mask,
> +                                         &(lrp6_addr->network))) {
> +                        old_port = lrp;
> +                    }
> +                }
> +            } else {
> +                for (int j = 0; j < lrp_addrs.n_ipv4_addrs; j++) {
> +                    uint32_t nw_addr =
ntohl(lrp_addrs.ipv4_addrs[j].network);
> +                    uint32_t mask = ntohl(lrp_addrs.ipv4_addrs[j].mask);
> +                    uint32_t new_ip, old_ip;
> +                    new_ip =
ntohl(new_external_ip_addr.ipv4_addrs[0].addr);
> +                    old_ip =
ntohl(old_external_ip_addr.ipv4_addrs[0].addr);
> +                    if ((new_ip & mask) == nw_addr) {
> +                        new_port = lrp;
> +                    }
> +                    if ((old_ip & mask) == nw_addr) {
> +                        old_port = lrp;
> +                    }
> +                }
> +            }
> +            if ((old_port || new_port) && (old_port == new_port)) {
> +                is_conflict = true;
> +            }
> +            destroy_lport_addresses(&lrp_addrs);
> +        }
> +    }
> +    destroy_lport_addresses(&old_external_ip_addr);
> +    destroy_lport_addresses(&new_external_ip_addr);
> +
> +    if (num_l3dgw_ports > 1 && !is_conflict) {
> +        return false;
> +    }
> +    return true;
> +}
> +
>  static bool
>  is_valid_port_range(const char *port_range)
>  {
> @@ -4472,6 +4558,7 @@ nbctl_pre_lr_nat_add(struct ctl_context *ctx)
>  {
>      ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_name);
>      ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_nat);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_col_ports);
>
>      ovsdb_idl_add_column(ctx->idl, &nbrec_logical_switch_port_col_name);
>
> @@ -4481,6 +4568,11 @@ nbctl_pre_lr_nat_add(struct ctl_context *ctx)
>      ovsdb_idl_add_column(ctx->idl, &nbrec_nat_col_logical_port);
>      ovsdb_idl_add_column(ctx->idl, &nbrec_nat_col_external_mac);
>      ovsdb_idl_add_column(ctx->idl, &nbrec_nat_col_options);
> +
> +    ovsdb_idl_add_column(ctx->idl,
&nbrec_logical_router_port_col_networks);
> +    ovsdb_idl_add_column(ctx->idl, &nbrec_logical_router_port_col_mac);
> +    ovsdb_idl_add_column(ctx->idl,
> +                         &nbrec_logical_router_port_col_gateway_chassis);
>  }
>
>  static void
> @@ -4650,12 +4742,16 @@ nbctl_lr_nat_add(struct ctl_context *ctx)
>                              should_return = true;
>                          }
>                  } else {
> -                    ctl_error(ctx, "a NAT with this type (%s) and %s
(%s) "
> -                              "already exists",
> -                              nat_type,
> -                              is_snat ? "logical_ip" : "external_ip",
> -                              is_snat ? new_logical_ip :
new_external_ip);
> -                    should_return = true;
> +                    if (!is_snat ||
> +                        is_nat_rule_conflict(lr, new_external_ip,
> +                                             old_external_ip, is_v6)) {
> +                        ctl_error(ctx, "a NAT with this type (%s) and %s
(%s) "
> +                                  "already exists",
> +                                  nat_type,
> +                                  is_snat ? "logical_ip" : "external_ip",
> +                                  is_snat ? new_logical_ip :
new_external_ip);
> +                        should_return = true;
> +                    }
>                  }
>              }
>          }
> @@ -4765,6 +4861,20 @@ nbctl_lr_nat_del(struct ctl_context *ctx)
>      }
>
>      int is_snat = !strcmp("snat", nat_type);
> +    char *nat_external_ip = NULL;
> +    if (ctx->argc == 5) {
> +        if (is_snat) {
> +            nat_external_ip = normalize_prefix_str(ctx->argv[4]);
> +            if (!nat_external_ip) {
> +                ctl_error(ctx, "%s: Invalid IP address or CIDR",
ctx->argv[4]);
> +            }
> +        } else {
> +            ctl_error(ctx, "%s type takes a maximum of one ip address",
> +                      nat_type);
> +        }
> +    }
> +    bool is_exist = false;
> +
>      /* Remove the matching NAT. */
>      for (size_t i = 0; i < lr->n_nat; i++) {
>          struct nbrec_nat *nat = lr->nat[i];
> @@ -4776,8 +4886,29 @@ nbctl_lr_nat_del(struct ctl_context *ctx)
>              continue;
>          }
>          if (!strcmp(nat_type, nat->type) && !strcmp(nat_ip, old_ip)) {
> -            nbrec_logical_router_update_nat_delvalue(lr, nat);
> -            should_return = true;
> +            if (nat_external_ip != NULL) {
> +                char *old_external_ip =
normalize_prefix_str(nat->external_ip);
> +                if (!old_external_ip) {
> +                    continue;
> +                }
> +                if (!strcmp(nat_external_ip, old_external_ip)) {
> +                    nbrec_logical_router_update_nat_delvalue(lr, nat);
> +                    free(old_external_ip);
> +                    is_exist = true;
> +                    should_return = true;
> +                }
> +            } else {
> +                nbrec_logical_router_update_nat_delvalue(lr, nat);
> +                /* When nat_type is snat and external_ip is not
specified, we
> +                 * need to iterate over all the rules and delete all nat
entries
> +                 * matching the logical ip. Hence don't set
should_return for
> +                 * snat case.
> +                 */
> +                if (!is_snat) {
> +                    should_return = true;
> +                }
> +                is_exist = true;
> +            }
>          }
>          free(old_ip);
>          if (should_return) {
> @@ -4785,13 +4916,16 @@ nbctl_lr_nat_del(struct ctl_context *ctx)
>          }
>      }
>
> -    if (must_exist) {
> +    if (must_exist && !is_exist) {
>          ctl_error(ctx, "no matching NAT with the type (%s) and %s (%s)",
>                    nat_type, is_snat ? "logical_ip" : "external_ip",
nat_ip);
>      }
>
>  cleanup:
>      free(nat_ip);
> +    if (nat_external_ip != NULL) {
> +        free(nat_external_ip);
> +    }
>  }
>
>  static void
> @@ -6964,7 +7098,7 @@ static const struct ctl_command_syntax
nbctl_commands[] = {
>        "[LOGICAL_PORT EXTERNAL_MAC] [EXTERNAL_PORT_RANGE]",
>        nbctl_pre_lr_nat_add, nbctl_lr_nat_add,
>        NULL, "--may-exist,--stateless,--portrange,--add-route", RW },
> -    { "lr-nat-del", 1, 3, "ROUTER [TYPE [IP]]",
> +    { "lr-nat-del", 1, 4, "ROUTER [TYPE [IP [EXTERNAL_IP]]]",
>        nbctl_pre_lr_nat_del, nbctl_lr_nat_del, NULL, "--if-exists", RW },
>      { "lr-nat-list", 1, 1, "ROUTER", nbctl_pre_lr_nat_list,
>        nbctl_lr_nat_list, NULL, "", RO },
> --
> 2.22.3
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev


More information about the dev mailing list