[ovs-dev] [OVN Patch v8 11/11] ovn-northd: move NAT, Defrag and lb to a function

anton.ivanov at cambridgegreys.com anton.ivanov at cambridgegreys.com
Tue Jan 5 17:49:38 UTC 2021


From: Anton Ivanov <anton.ivanov at cambridgegreys.com>

Signed-off-by: Anton Ivanov <anton.ivanov at cambridgegreys.com>
---
 northd/ovn-northd.c | 4128 +++++++++++++++++++++----------------------
 1 file changed, 2058 insertions(+), 2070 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 73ea6e096..cd13d9fdf 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -8927,2391 +8927,2380 @@ build_lrouter_force_snat_flows(struct hmap *lflows, struct ovn_datapath *od,
     ds_destroy(&actions);
 }
 
+/* Logical router ingress Table 0: L2 Admission Control
+ * Generic admission control flows (without inport check).
+ */
 static void
-build_lrouter_flows(struct hmap *datapaths,
-                    struct hmap *lflows, struct shash *meter_groups,
-                    struct hmap *lbs)
+build_adm_ctrl_flows_for_lrouter(
+        struct ovn_datapath *od, struct hmap *lflows)
 {
-    /* This flow table structure is documented in ovn-northd(8), so please
-     * update ovn-northd.8.xml if you change anything. */
-
-    struct ds match = DS_EMPTY_INITIALIZER;
-    struct ds actions = DS_EMPTY_INITIALIZER;
+    if (od->nbr) {
+        /* Logical VLANs not supported.
+         * Broadcast/multicast source address is invalid. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
+                      "vlan.present || eth.src[40]", "drop;");
+    }
+}
 
-    struct ovn_datapath *od;
+/* Logical router ingress Table 0: L2 Admission Control
+ * This table drops packets that the router shouldn’t see at all based
+ * on their Ethernet headers.
+ */
+static void
+build_adm_ctrl_flows_for_lrouter_port(
+        struct ovn_port *op, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    if (op->nbrp) {
+        if (!lrport_is_enabled(op->nbrp)) {
+            /* Drop packets from disabled logical ports (since logical flow
+             * tables are default-drop). */
+            return;
+        }
 
-    /* NAT, Defrag and load balancing. */
-    HMAP_FOR_EACH (od, key_node, datapaths) {
-        if (!od->nbr) {
-            continue;
+        if (op->derived) {
+            /* No ingress packets should be received on a chassisredirect
+             * port. */
+            return;
         }
 
-        /* Packets are allowed by default. */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
-        ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
-        ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
-        ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;");
+        /* Store the ethernet address of the port receiving the packet.
+         * This will save us from having to match on inport further down in
+         * the pipeline.
+         */
+        ds_clear(actions);
+        ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
+                      op->lrp_networks.ea_s);
 
-        /* Send the IPv6 NS packets to next table. When ovn-controller
-         * generates IPv6 NS (for the action - nd_ns{}), the injected
-         * packet would go through conntrack - which is not required. */
-        ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;");
+        ds_clear(match);
+        ds_put_format(match, "eth.mcast && inport == %s", op->json_key);
+        ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
+                                ds_cstr(match), ds_cstr(actions),
+                                &op->nbrp->header_);
 
-        /* NAT rules are only valid on Gateway routers and routers with
-         * l3dgw_port (router has a port with gateway chassis
-         * specified). */
-        if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
-            continue;
+        ds_clear(match);
+        ds_put_format(match, "eth.dst == %s && inport == %s",
+                      op->lrp_networks.ea_s, op->json_key);
+        if (op->od->l3dgw_port && op == op->od->l3dgw_port
+            && op->od->l3redirect_port) {
+            /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
+             * should only be received on the gateway chassis. */
+            ds_put_format(match, " && is_chassis_resident(%s)",
+                          op->od->l3redirect_port->json_key);
         }
+        ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
+                                ds_cstr(match),  ds_cstr(actions),
+                                &op->nbrp->header_);
+    }
+}
 
-        struct sset nat_entries = SSET_INITIALIZER(&nat_entries);
 
-        bool dnat_force_snat_ip =
-            !lport_addresses_is_empty(&od->dnat_force_snat_addrs);
-        bool lb_force_snat_ip =
-            !lport_addresses_is_empty(&od->lb_force_snat_addrs);
+/* Logical router ingress Table 1 and 2: Neighbor lookup and learning
+ * lflows for logical routers. */
+static void
+build_neigh_learning_flows_for_lrouter(
+        struct ovn_datapath *od, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    if (od->nbr) {
 
-        for (int i = 0; i < od->nbr->n_nat; i++) {
-            const struct nbrec_nat *nat;
+        /* Learn MAC bindings from ARP/IPv6 ND.
+         *
+         * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the
+         * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp'
+         * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit.
+         * If "always_learn_from_arp_request" is set to false, it will also
+         * lookup for the (arp.spa) in the mac binding table using the
+         * "lookup_arp_ip" action for ARP request packets, and stores the
+         * result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit; or set that bit
+         * to "1" directly for ARP response packets.
+         *
+         * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup
+         * for the (nd.target, nd.tll) in the mac binding table using the
+         * 'lookup_nd' action and stores the result in
+         * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
+         * "always_learn_from_arp_request" is set to false,
+         * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit is set.
+         *
+         * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup
+         * for the (ip6.src, nd.sll) in the mac binding table using the
+         * 'lookup_nd' action and stores the result in
+         * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
+         * "always_learn_from_arp_request" is set to false, it will also lookup
+         * for the (ip6.src) in the mac binding table using the "lookup_nd_ip"
+         * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
+         * bit.
+         *
+         * Table LEARN_NEIGHBOR learns the mac-binding using the action
+         * - 'put_arp/put_nd'. Learning mac-binding is skipped if
+         *   REGBIT_LOOKUP_NEIGHBOR_RESULT bit is set or
+         *   REGBIT_LOOKUP_NEIGHBOR_IP_RESULT is not set.
+         *
+         * */
 
-            nat = od->nbr->nat[i];
+        /* Flows for LOOKUP_NEIGHBOR. */
+        bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
+            "always_learn_from_arp_request", true);
+        ds_clear(actions);
+        ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+                      " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
+                      learn_from_arp_request ? "" :
+                      REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
+                      "arp.op == 2", ds_cstr(actions));
 
-            ovs_be32 ip, mask;
-            struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
-            bool is_v6 = false;
-            bool stateless = lrouter_nat_is_stateless(nat);
-            struct nbrec_address_set *allowed_ext_ips =
-                                      nat->allowed_ext_ips;
-            struct nbrec_address_set *exempted_ext_ips =
-                                      nat->exempted_ext_ips;
+        ds_clear(actions);
+        ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+                      " = lookup_nd(inport, nd.target, nd.tll); %snext;",
+                      learn_from_arp_request ? "" :
+                      REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
+                      ds_cstr(actions));
 
-            if (allowed_ext_ips && exempted_ext_ips) {
-                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
-                VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since "
-                             "both allowed and exempt external ips set",
-                             UUID_ARGS(&(nat->header_.uuid)));
-                continue;
-            }
+        ds_clear(actions);
+        ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+                      " = lookup_nd(inport, ip6.src, nd.sll); %snext;",
+                      learn_from_arp_request ? "" :
+                      REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
+                      " = lookup_nd_ip(inport, ip6.src); ");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns",
+                      ds_cstr(actions));
 
-            char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
-            if (error || mask != OVS_BE32_MAX) {
-                free(error);
-                error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
-                if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) {
-                    /* Invalid for both IPv4 and IPv6 */
-                    static struct vlog_rate_limit rl =
-                        VLOG_RATE_LIMIT_INIT(5, 1);
-                    VLOG_WARN_RL(&rl, "bad external ip %s for nat",
-                                 nat->external_ip);
-                    free(error);
-                    continue;
-                }
-                /* It was an invalid IPv4 address, but valid IPv6.
-                 * Treat the rest of the handling of this NAT rule
-                 * as IPv6. */
-                is_v6 = true;
-            }
+        /* For other packet types, we can skip neighbor learning.
+         * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1",
+                      REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;");
 
-            /* Check the validity of nat->logical_ip. 'logical_ip' can
-             * be a subnet when the type is "snat". */
-            int cidr_bits;
-            if (is_v6) {
-                error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6);
-                cidr_bits = ipv6_count_cidr_bits(&mask_v6);
-            } else {
-                error = ip_parse_masked(nat->logical_ip, &ip, &mask);
-                cidr_bits = ip_count_cidr_bits(mask);
-            }
-            if (!strcmp(nat->type, "snat")) {
-                if (error) {
-                    /* Invalid for both IPv4 and IPv6 */
-                    static struct vlog_rate_limit rl =
-                        VLOG_RATE_LIMIT_INIT(5, 1);
-                    VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
-                                 "in router "UUID_FMT"",
-                                 nat->logical_ip, UUID_ARGS(&od->key));
-                    free(error);
-                    continue;
-                }
-            } else {
-                if (error || (!is_v6 && mask != OVS_BE32_MAX)
-                    || (is_v6 && memcmp(&mask_v6, &v6_exact,
-                                        sizeof mask_v6))) {
-                    /* Invalid for both IPv4 and IPv6 */
-                    static struct vlog_rate_limit rl =
-                        VLOG_RATE_LIMIT_INIT(5, 1);
-                    VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
-                        ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
-                    free(error);
-                    continue;
-                }
-            }
+        /* Flows for LEARN_NEIGHBOR. */
+        /* Skip Neighbor learning if not required. */
+        ds_clear(match);
+        ds_put_format(match, REGBIT_LOOKUP_NEIGHBOR_RESULT" == 1%s",
+                      learn_from_arp_request ? "" :
+                      " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100,
+                      ds_cstr(match), "next;");
 
-            /* For distributed router NAT, determine whether this NAT rule
-             * satisfies the conditions for distributed NAT processing. */
-            bool distributed = false;
-            struct eth_addr mac;
-            if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
-                nat->logical_port && nat->external_mac) {
-                if (eth_addr_from_string(nat->external_mac, &mac)) {
-                    distributed = true;
-                } else {
-                    static struct vlog_rate_limit rl =
-                        VLOG_RATE_LIMIT_INIT(5, 1);
-                    VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
-                        ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
-                    continue;
-                }
-            }
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+                      "arp", "put_arp(inport, arp.spa, arp.sha); next;");
 
-            /* Ingress UNSNAT table: It is for already established connections'
-             * reverse traffic. i.e., SNAT has already been done in egress
-             * pipeline and now the packet has entered the ingress pipeline as
-             * part of a reply. We undo the SNAT here.
-             *
-             * Undoing SNAT has to happen before DNAT processing.  This is
-             * because when the packet was DNATed in ingress pipeline, it did
-             * not know about the possibility of eventual additional SNAT in
-             * egress pipeline. */
-            if (!strcmp(nat->type, "snat")
-                || !strcmp(nat->type, "dnat_and_snat")) {
-                if (!od->l3dgw_port) {
-                    /* Gateway router. */
-                    ds_clear(&match);
-                    ds_clear(&actions);
-                    ds_put_format(&match, "ip && ip%s.dst == %s",
-                                  is_v6 ? "6" : "4",
-                                  nat->external_ip);
-                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-                       ds_put_format(&actions, "ip%s.dst=%s; next;",
-                                     is_v6 ? "6" : "4", nat->logical_ip);
-                    } else {
-                       ds_put_cstr(&actions, "ct_snat;");
-                    }
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+                      "nd_na", "put_nd(inport, nd.target, nd.tll); next;");
 
-                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
-                                            90, ds_cstr(&match),
-                                            ds_cstr(&actions),
-                                            &nat->header_);
-                } else {
-                    /* Distributed router. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
+                      "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;");
+    }
 
-                    /* Traffic received on l3dgw_port is subject to NAT. */
-                    ds_clear(&match);
-                    ds_clear(&actions);
-                    ds_put_format(&match, "ip && ip%s.dst == %s"
-                                          " && inport == %s",
-                                  is_v6 ? "6" : "4",
-                                  nat->external_ip,
-                                  od->l3dgw_port->json_key);
-                    if (!distributed && od->l3redirect_port) {
-                        /* Flows for NAT rules that are centralized are only
-                         * programmed on the gateway chassis. */
-                        ds_put_format(&match, " && is_chassis_resident(%s)",
-                                      od->l3redirect_port->json_key);
-                    }
+}
 
-                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-                        ds_put_format(&actions, "ip%s.dst=%s; next;",
-                                      is_v6 ? "6" : "4", nat->logical_ip);
-                    } else {
-                        ds_put_cstr(&actions, "ct_snat;");
-                    }
+/* Logical router ingress Table 1: Neighbor lookup lflows
+ * for logical router ports. */
+static void
+build_neigh_learning_flows_for_lrouter_port(
+        struct ovn_port *op, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    if (op->nbrp) {
 
-                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
-                                            100,
-                                            ds_cstr(&match), ds_cstr(&actions),
-                                            &nat->header_);
+        bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options,
+            "always_learn_from_arp_request", true);
+
+        /* Check if we need to learn mac-binding from ARP requests. */
+        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+            if (!learn_from_arp_request) {
+                /* ARP request to this address should always get learned,
+                 * so add a priority-110 flow to set
+                 * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */
+                ds_clear(match);
+                ds_put_format(match,
+                              "inport == %s && arp.spa == %s/%u && "
+                              "arp.tpa == %s && arp.op == 1",
+                              op->json_key,
+                              op->lrp_networks.ipv4_addrs[i].network_s,
+                              op->lrp_networks.ipv4_addrs[i].plen,
+                              op->lrp_networks.ipv4_addrs[i].addr_s);
+                if (op->od->l3dgw_port && op == op->od->l3dgw_port
+                    && op->od->l3redirect_port) {
+                    ds_put_format(match, " && is_chassis_resident(%s)",
+                                  op->od->l3redirect_port->json_key);
                 }
+                const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT
+                                  " = lookup_arp(inport, arp.spa, arp.sha); "
+                                  REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;"
+                                  " next;";
+                ovn_lflow_add_with_hint(lflows, op->od,
+                                        S_ROUTER_IN_LOOKUP_NEIGHBOR, 110,
+                                        ds_cstr(match), actions_s,
+                                        &op->nbrp->header_);
+            }
+            ds_clear(match);
+            ds_put_format(match,
+                          "inport == %s && arp.spa == %s/%u && arp.op == 1",
+                          op->json_key,
+                          op->lrp_networks.ipv4_addrs[i].network_s,
+                          op->lrp_networks.ipv4_addrs[i].plen);
+            if (op->od->l3dgw_port && op == op->od->l3dgw_port
+                && op->od->l3redirect_port) {
+                ds_put_format(match, " && is_chassis_resident(%s)",
+                              op->od->l3redirect_port->json_key);
             }
+            ds_clear(actions);
+            ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
+                          " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
+                          learn_from_arp_request ? "" :
+                          REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
+                          " = lookup_arp_ip(inport, arp.spa); ");
+            ovn_lflow_add_with_hint(lflows, op->od,
+                                    S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
+                                    ds_cstr(match), ds_cstr(actions),
+                                    &op->nbrp->header_);
+        }
+    }
+}
 
-            /* Ingress DNAT table: Packets enter the pipeline with destination
-             * IP address that needs to be DNATted from a external IP address
-             * to a logical IP address. */
-            if (!strcmp(nat->type, "dnat")
-                || !strcmp(nat->type, "dnat_and_snat")) {
-                if (!od->l3dgw_port) {
-                    /* Gateway router. */
-                    /* Packet when it goes from the initiator to destination.
-                     * We need to set flags.loopback because the router can
-                     * send the packet back through the same interface. */
-                    ds_clear(&match);
-                    ds_put_format(&match, "ip && ip%s.dst == %s",
-                                  is_v6 ? "6" : "4",
-                                  nat->external_ip);
-                    ds_clear(&actions);
-                    if (allowed_ext_ips || exempted_ext_ips) {
-                        lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
-                                                     is_v6, true, mask);
-                    }
+/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router
+ * Adv (RA) options and response. */
+static void
+build_ND_RA_flows_for_lrouter_port(
+        struct ovn_port *op, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    if (!op->nbrp || op->nbrp->peer || !op->peer) {
+        return;
+    }
 
-                    if (dnat_force_snat_ip) {
-                        /* Indicate to the future tables that a DNAT has taken
-                         * place and a force SNAT needs to be done in the
-                         * Egress SNAT table. */
-                        ds_put_format(&actions,
-                                      "flags.force_snat_for_dnat = 1; ");
-                    }
+    if (!op->lrp_networks.n_ipv6_addrs) {
+        return;
+    }
 
-                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-                        ds_put_format(&actions, "flags.loopback = 1; "
-                                      "ip%s.dst=%s; next;",
-                                      is_v6 ? "6" : "4", nat->logical_ip);
-                    } else {
-                        ds_put_format(&actions, "flags.loopback = 1; "
-                                      "ct_dnat(%s", nat->logical_ip);
+    struct smap options;
+    smap_clone(&options, &op->sb->options);
 
-                        if (nat->external_port_range[0]) {
-                            ds_put_format(&actions, ",%s",
-                                          nat->external_port_range);
-                        }
-                        ds_put_format(&actions, ");");
-                    }
+    /* enable IPv6 prefix delegation */
+    bool prefix_delegation = smap_get_bool(&op->nbrp->options,
+                                           "prefix_delegation", false);
+    if (!lrport_is_enabled(op->nbrp)) {
+        prefix_delegation = false;
+    }
+    smap_add(&options, "ipv6_prefix_delegation",
+             prefix_delegation ? "true" : "false");
 
-                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
-                                            ds_cstr(&match), ds_cstr(&actions),
-                                            &nat->header_);
-                } else {
-                    /* Distributed router. */
+    bool ipv6_prefix = smap_get_bool(&op->nbrp->options,
+                                     "prefix", false);
+    if (!lrport_is_enabled(op->nbrp)) {
+        ipv6_prefix = false;
+    }
+    smap_add(&options, "ipv6_prefix",
+             ipv6_prefix ? "true" : "false");
+    sbrec_port_binding_set_options(op->sb, &options);
 
-                    /* Traffic received on l3dgw_port is subject to NAT. */
-                    ds_clear(&match);
-                    ds_put_format(&match, "ip && ip%s.dst == %s"
-                                          " && inport == %s",
-                                  is_v6 ? "6" : "4",
-                                  nat->external_ip,
-                                  od->l3dgw_port->json_key);
-                    if (!distributed && od->l3redirect_port) {
-                        /* Flows for NAT rules that are centralized are only
-                         * programmed on the gateway chassis. */
-                        ds_put_format(&match, " && is_chassis_resident(%s)",
-                                      od->l3redirect_port->json_key);
-                    }
-                    ds_clear(&actions);
-                    if (allowed_ext_ips || exempted_ext_ips) {
-                        lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
-                                                     is_v6, true, mask);
-                    }
+    smap_destroy(&options);
 
-                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-                        ds_put_format(&actions, "ip%s.dst=%s; next;",
-                                      is_v6 ? "6" : "4", nat->logical_ip);
-                    } else {
-                        ds_put_format(&actions, "ct_dnat(%s", nat->logical_ip);
-                        if (nat->external_port_range[0]) {
-                            ds_put_format(&actions, ",%s",
-                                          nat->external_port_range);
-                        }
-                        ds_put_format(&actions, ");");
-                    }
+    const char *address_mode = smap_get(
+        &op->nbrp->ipv6_ra_configs, "address_mode");
 
-                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
-                                            ds_cstr(&match), ds_cstr(&actions),
-                                            &nat->header_);
-                }
-            }
+    if (!address_mode) {
+        return;
+    }
+    if (strcmp(address_mode, "slaac") &&
+        strcmp(address_mode, "dhcpv6_stateful") &&
+        strcmp(address_mode, "dhcpv6_stateless")) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+        VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
+                     address_mode);
+        return;
+    }
 
-            /* ARP resolve for NAT IPs. */
-            if (od->l3dgw_port) {
-                if (!strcmp(nat->type, "snat")) {
-                    ds_clear(&match);
-                    ds_put_format(
-                        &match, "inport == %s && %s == %s",
-                        od->l3dgw_port->json_key,
-                        is_v6 ? "ip6.src" : "ip4.src", nat->external_ip);
-                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT,
-                                            120, ds_cstr(&match), "next;",
-                                            &nat->header_);
-                }
+    if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
+                      false)) {
+        copy_ra_to_sb(op, address_mode);
+    }
 
-                if (!sset_contains(&nat_entries, nat->external_ip)) {
-                    ds_clear(&match);
-                    ds_put_format(
-                        &match, "outport == %s && %s == %s",
-                        od->l3dgw_port->json_key,
-                        is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4,
-                        nat->external_ip);
-                    ds_clear(&actions);
-                    ds_put_format(
-                        &actions, "eth.dst = %s; next;",
-                        distributed ? nat->external_mac :
-                        od->l3dgw_port->lrp_networks.ea_s);
-                    ovn_lflow_add_with_hint(lflows, od,
-                                            S_ROUTER_IN_ARP_RESOLVE,
-                                            100, ds_cstr(&match),
-                                            ds_cstr(&actions),
-                                            &nat->header_);
-                    sset_add(&nat_entries, nat->external_ip);
-                }
-            } else {
-                /* Add the NAT external_ip to the nat_entries even for
-                 * gateway routers. This is required for adding load balancer
-                 * flows.*/
-                sset_add(&nat_entries, nat->external_ip);
-            }
+    ds_clear(match);
+    ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
+                          op->json_key);
+    ds_clear(actions);
 
-            /* Egress UNDNAT table: It is for already established connections'
-             * reverse traffic. i.e., DNAT has already been done in ingress
-             * pipeline and now the packet has entered the egress pipeline as
-             * part of a reply. We undo the DNAT here.
-             *
-             * Note that this only applies for NAT on a distributed router.
-             * Undo DNAT on a gateway router is done in the ingress DNAT
-             * pipeline stage. */
-            if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
-                || !strcmp(nat->type, "dnat_and_snat"))) {
-                ds_clear(&match);
-                ds_put_format(&match, "ip && ip%s.src == %s"
-                                      " && outport == %s",
-                              is_v6 ? "6" : "4",
-                              nat->logical_ip,
-                              od->l3dgw_port->json_key);
-                if (!distributed && od->l3redirect_port) {
-                    /* Flows for NAT rules that are centralized are only
-                     * programmed on the gateway chassis. */
-                    ds_put_format(&match, " && is_chassis_resident(%s)",
-                                  od->l3redirect_port->json_key);
-                }
-                ds_clear(&actions);
-                if (distributed) {
-                    ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
-                                  ETH_ADDR_ARGS(mac));
-                }
+    const char *mtu_s = smap_get(
+        &op->nbrp->ipv6_ra_configs, "mtu");
 
-                if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-                    ds_put_format(&actions, "ip%s.src=%s; next;",
-                                  is_v6 ? "6" : "4", nat->external_ip);
-                } else {
-                    ds_put_format(&actions, "ct_dnat;");
-                }
+    /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
+    uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
 
-                ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
-                                        ds_cstr(&match), ds_cstr(&actions),
-                                        &nat->header_);
-            }
+    ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
+                  "addr_mode = \"%s\", slla = %s",
+                  address_mode, op->lrp_networks.ea_s);
+    if (mtu > 0) {
+        ds_put_format(actions, ", mtu = %u", mtu);
+    }
 
-            /* Egress SNAT table: Packets enter the egress pipeline with
-             * source ip address that needs to be SNATted to a external ip
-             * address. */
-            if (!strcmp(nat->type, "snat")
-                || !strcmp(nat->type, "dnat_and_snat")) {
-                if (!od->l3dgw_port) {
-                    /* Gateway router. */
-                    ds_clear(&match);
-                    ds_put_format(&match, "ip && ip%s.src == %s",
-                                  is_v6 ? "6" : "4",
-                                  nat->logical_ip);
-                    ds_clear(&actions);
+    const char *prf = smap_get_def(
+        &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
+    if (strcmp(prf, "MEDIUM")) {
+        ds_put_format(actions, ", router_preference = \"%s\"", prf);
+    }
 
-                    if (allowed_ext_ips || exempted_ext_ips) {
-                        lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
-                                                     is_v6, false, mask);
-                    }
+    bool add_rs_response_flow = false;
 
-                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-                        ds_put_format(&actions, "ip%s.src=%s; next;",
-                                      is_v6 ? "6" : "4", nat->external_ip);
-                    } else {
-                        ds_put_format(&actions, "ct_snat(%s",
-                                      nat->external_ip);
+    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+        if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
+            continue;
+        }
 
-                        if (nat->external_port_range[0]) {
-                            ds_put_format(&actions, ",%s",
-                                          nat->external_port_range);
-                        }
-                        ds_put_format(&actions, ");");
-                    }
+        ds_put_format(actions, ", prefix = %s/%u",
+                      op->lrp_networks.ipv6_addrs[i].network_s,
+                      op->lrp_networks.ipv6_addrs[i].plen);
 
-                    /* The priority here is calculated such that the
-                     * nat->logical_ip with the longest mask gets a higher
-                     * priority. */
-                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
-                                            cidr_bits + 1,
-                                            ds_cstr(&match), ds_cstr(&actions),
-                                            &nat->header_);
-                } else {
-                    uint16_t priority = cidr_bits + 1;
+        add_rs_response_flow = true;
+    }
 
-                    /* Distributed router. */
-                    ds_clear(&match);
-                    ds_put_format(&match, "ip && ip%s.src == %s"
-                                          " && outport == %s",
-                                  is_v6 ? "6" : "4",
-                                  nat->logical_ip,
-                                  od->l3dgw_port->json_key);
-                    if (!distributed && od->l3redirect_port) {
-                        /* Flows for NAT rules that are centralized are only
-                         * programmed on the gateway chassis. */
-                        priority += 128;
-                        ds_put_format(&match, " && is_chassis_resident(%s)",
-                                      od->l3redirect_port->json_key);
-                    }
-                    ds_clear(&actions);
+    if (add_rs_response_flow) {
+        ds_put_cstr(actions, "); next;");
+        ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
+                                50, ds_cstr(match), ds_cstr(actions),
+                                &op->nbrp->header_);
+        ds_clear(actions);
+        ds_clear(match);
+        ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
+                      "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
 
-                    if (allowed_ext_ips || exempted_ext_ips) {
-                        lrouter_nat_add_ext_ip_match(od, lflows, &match, nat,
-                                                     is_v6, false, mask);
-                    }
+        char ip6_str[INET6_ADDRSTRLEN + 1];
+        struct in6_addr lla;
+        in6_generate_lla(op->lrp_networks.ea, &lla);
+        memset(ip6_str, 0, sizeof(ip6_str));
+        ipv6_string_mapped(ip6_str, &lla);
+        ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
+                      "ip6.dst = ip6.src; ip6.src = %s; "
+                      "outport = inport; flags.loopback = 1; "
+                      "output;",
+                      op->lrp_networks.ea_s, ip6_str);
+        ovn_lflow_add_with_hint(lflows, op->od,
+                                S_ROUTER_IN_ND_RA_RESPONSE, 50,
+                                ds_cstr(match), ds_cstr(actions),
+                                &op->nbrp->header_);
+    }
+}
 
-                    if (distributed) {
-                        ds_put_format(&actions, "eth.src = "ETH_ADDR_FMT"; ",
-                                      ETH_ADDR_ARGS(mac));
-                    }
+/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
+ * responder, by default goto next. (priority 0). */
+static void
+build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
+{
+    if (od->nbr) {
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
+    }
+}
 
-                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
-                        ds_put_format(&actions, "ip%s.src=%s; next;",
-                                      is_v6 ? "6" : "4", nat->external_ip);
-                    } else {
-                        ds_put_format(&actions, "ct_snat(%s",
-                                      nat->external_ip);
-                        if (nat->external_port_range[0]) {
-                            ds_put_format(&actions, ",%s",
-                                          nat->external_port_range);
-                        }
-                        ds_put_format(&actions, ");");
-                    }
+/* Logical router ingress table IP_ROUTING : IP Routing.
+ *
+ * A packet that arrives at this table is an IP packet that should be
+ * routed to the address in 'ip[46].dst'.
+ *
+ * For regular routes without ECMP, table IP_ROUTING sets outport to the
+ * correct output port, eth.src to the output port's MAC address, and
+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
+ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
+ * advances to the next table.
+ *
+ * For ECMP routes, i.e. multiple routes with same policy and prefix, table
+ * IP_ROUTING remembers ECMP group id and selects a member id, and advances
+ * to table IP_ROUTING_ECMP, which sets outport, eth.src and
+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
+ */
+static void
+build_ip_routing_flows_for_lrouter_port(
+        struct ovn_port *op, struct hmap *lflows)
+{
+    if (op->nbrp) {
 
-                    /* The priority here is calculated such that the
-                     * nat->logical_ip with the longest mask gets a higher
-                     * priority. */
-                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
-                                            priority, ds_cstr(&match),
-                                            ds_cstr(&actions),
-                                            &nat->header_);
-                }
-            }
+        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+            add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
+                      op->lrp_networks.ipv4_addrs[i].network_s,
+                      op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
+                      &op->nbrp->header_);
+        }
 
-            /* Logical router ingress table 0:
-             * For NAT on a distributed router, add rules allowing
-             * ingress traffic with eth.dst matching nat->external_mac
-             * on the l3dgw_port instance where nat->logical_port is
-             * resident. */
-            if (distributed) {
-                /* Store the ethernet address of the port receiving the packet.
-                 * This will save us from having to match on inport further
-                 * down in the pipeline.
-                 */
-                ds_clear(&actions);
-                ds_put_format(&actions, REG_INPORT_ETH_ADDR " = %s; next;",
-                              od->l3dgw_port->lrp_networks.ea_s);
+        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+            add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
+                      op->lrp_networks.ipv6_addrs[i].network_s,
+                      op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
+                      &op->nbrp->header_);
+        }
+    }
+}
 
-                ds_clear(&match);
-                ds_put_format(&match,
-                              "eth.dst == "ETH_ADDR_FMT" && inport == %s"
-                              " && is_chassis_resident(\"%s\")",
-                              ETH_ADDR_ARGS(mac),
-                              od->l3dgw_port->json_key,
-                              nat->logical_port);
-                ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50,
-                                        ds_cstr(&match), ds_cstr(&actions),
-                                        &nat->header_);
-            }
+static void
+build_static_route_flows_for_lrouter(
+        struct ovn_datapath *od, struct hmap *lflows,
+        struct hmap *ports)
+{
+    if (od->nbr) {
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
+                      REG_ECMP_GROUP_ID" == 0", "next;");
 
-            /* Ingress Gateway Redirect Table: For NAT on a distributed
-             * router, add flows that are specific to a NAT rule.  These
-             * flows indicate the presence of an applicable NAT rule that
-             * can be applied in a distributed manner.
-             * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to
-             * NAT external IP and NAT external mac so the ARP request
-             * generated in the following stage is sent out with proper IP/MAC
-             * src addresses.
-             */
-            if (distributed) {
-                ds_clear(&match);
-                ds_clear(&actions);
-                ds_put_format(&match,
-                              "ip%s.src == %s && outport == %s && "
-                              "is_chassis_resident(\"%s\")",
-                              is_v6 ? "6" : "4", nat->logical_ip,
-                              od->l3dgw_port->json_key, nat->logical_port);
-                ds_put_format(&actions, "eth.src = %s; %s = %s; next;",
-                              nat->external_mac,
-                              is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4,
-                              nat->external_ip);
-                ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT,
-                                        100, ds_cstr(&match),
-                                        ds_cstr(&actions), &nat->header_);
+        struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
+        struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
+        struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
+        struct ecmp_groups_node *group;
+        for (int i = 0; i < od->nbr->n_static_routes; i++) {
+            struct parsed_route *route =
+                parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]);
+            if (!route) {
+                continue;
             }
-
-            /* Egress Loopback table: For NAT on a distributed router.
-             * If packets in the egress pipeline on the distributed
-             * gateway port have ip.dst matching a NAT external IP, then
-             * loop a clone of the packet back to the beginning of the
-             * ingress pipeline with inport = outport. */
-            if (od->l3dgw_port) {
-                /* Distributed router. */
-                ds_clear(&match);
-                ds_put_format(&match, "ip%s.dst == %s && outport == %s",
-                              is_v6 ? "6" : "4",
-                              nat->external_ip,
-                              od->l3dgw_port->json_key);
-                if (!distributed) {
-                    ds_put_format(&match, " && is_chassis_resident(%s)",
-                                  od->l3redirect_port->json_key);
-                } else {
-                    ds_put_format(&match, " && is_chassis_resident(\"%s\")",
-                                  nat->logical_port);
-                }
-
-                ds_clear(&actions);
-                ds_put_format(&actions,
-                              "clone { ct_clear; "
-                              "inport = outport; outport = \"\"; "
-                              "flags = 0; flags.loopback = 1; ");
-                for (int j = 0; j < MFF_N_LOG_REGS; j++) {
-                    ds_put_format(&actions, "reg%d = 0; ", j);
+            group = ecmp_groups_find(&ecmp_groups, route);
+            if (group) {
+                ecmp_groups_add_route(group, route);
+            } else {
+                const struct parsed_route *existed_route =
+                    unique_routes_remove(&unique_routes, route);
+                if (existed_route) {
+                    group = ecmp_groups_add(&ecmp_groups, existed_route);
+                    if (group) {
+                        ecmp_groups_add_route(group, route);
+                    }
+                } else {
+                    unique_routes_add(&unique_routes, route);
                 }
-                ds_put_format(&actions, REGBIT_EGRESS_LOOPBACK" = 1; "
-                              "next(pipeline=ingress, table=%d); };",
-                              ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
-                ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
-                                        ds_cstr(&match), ds_cstr(&actions),
-                                        &nat->header_);
             }
         }
-
-        /* Handle force SNAT options set in the gateway router. */
-        if (!od->l3dgw_port) {
-            if (dnat_force_snat_ip) {
-                if (od->dnat_force_snat_addrs.n_ipv4_addrs) {
-                    build_lrouter_force_snat_flows(lflows, od, "4",
-                        od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s,
-                        "dnat");
-                }
-                if (od->dnat_force_snat_addrs.n_ipv6_addrs) {
-                    build_lrouter_force_snat_flows(lflows, od, "6",
-                        od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s,
-                        "dnat");
-                }
-            }
-            if (lb_force_snat_ip) {
-                if (od->lb_force_snat_addrs.n_ipv4_addrs) {
-                    build_lrouter_force_snat_flows(lflows, od, "4",
-                        od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb");
-                }
-                if (od->lb_force_snat_addrs.n_ipv6_addrs) {
-                    build_lrouter_force_snat_flows(lflows, od, "6",
-                        od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb");
-                }
-            }
-
-            /* For gateway router, re-circulate every packet through
-            * the DNAT zone.  This helps with the following.
-            *
-            * Any packet that needs to be unDNATed in the reverse
-            * direction gets unDNATed. Ideally this could be done in
-            * the egress pipeline. But since the gateway router
-            * does not have any feature that depends on the source
-            * ip address being external IP address for IP routing,
-            * we can do it here, saving a future re-circulation. */
-            ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
-                          "ip", "flags.loopback = 1; ct_dnat;");
+        HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
+            /* add a flow in IP_ROUTING, and one flow for each member in
+             * IP_ROUTING_ECMP. */
+            build_ecmp_route_flow(lflows, od, ports, group);
         }
-
-        /* Load balancing and packet defrag are only valid on
-         * Gateway routers or router with gateway port. */
-        if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
-            sset_destroy(&nat_entries);
-            continue;
+        const struct unique_routes_node *ur;
+        HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
+            build_static_route_flow(lflows, od, ports, ur->route);
         }
+        ecmp_groups_destroy(&ecmp_groups);
+        unique_routes_destroy(&unique_routes);
+        parsed_routes_destroy(&parsed_routes);
+    }
+}
 
-        /* A set to hold all ips that need defragmentation and tracking. */
-        struct sset all_ips = SSET_INITIALIZER(&all_ips);
+/* IP Multicast lookup. Here we set the output port, adjust TTL and
+ * advance to next table (priority 500).
+ */
+static void
+build_mcast_lookup_flows_for_lrouter(
+        struct ovn_datapath *od, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    if (od->nbr) {
 
-        for (int i = 0; i < od->nbr->n_load_balancer; i++) {
-            struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i];
-            struct ovn_northd_lb *lb =
-                ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
-            ovs_assert(lb);
+        /* Drop IPv6 multicast traffic that shouldn't be forwarded,
+         * i.e., router solicitation and router advertisement.
+         */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
+                      "nd_rs || nd_ra", "drop;");
+        if (!od->mcast_info.rtr.relay) {
+            return;
+        }
 
-            for (size_t j = 0; j < lb->n_vips; j++) {
-                struct ovn_lb_vip *lb_vip = &lb->vips[j];
-                struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j];
-                ds_clear(&actions);
-                build_lb_vip_actions(lb_vip, lb_vip_nb, &actions,
-                                     lb->selection_fields, false);
+        struct ovn_igmp_group *igmp_group;
 
-                if (!sset_contains(&all_ips, lb_vip->vip_str)) {
-                    sset_add(&all_ips, lb_vip->vip_str);
-                    /* If there are any load balancing rules, we should send
-                     * the packet to conntrack for defragmentation and
-                     * tracking.  This helps with two things.
-                     *
-                     * 1. With tracking, we can send only new connections to
-                     *    pick a DNAT ip address from a group.
-                     * 2. If there are L4 ports in load balancing rules, we
-                     *    need the defragmentation to match on L4 ports. */
-                    ds_clear(&match);
-                    if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
-                        ds_put_format(&match, "ip && ip4.dst == %s",
-                                      lb_vip->vip_str);
-                    } else {
-                        ds_put_format(&match, "ip && ip6.dst == %s",
-                                      lb_vip->vip_str);
-                    }
-                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG,
-                                            100, ds_cstr(&match), "ct_next;",
-                                            &nb_lb->header_);
-                }
+        LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
+            ds_clear(match);
+            ds_clear(actions);
+            if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
+                ds_put_format(match, "ip4 && ip4.dst == %s ",
+                            igmp_group->mcgroup.name);
+            } else {
+                ds_put_format(match, "ip6 && ip6.dst == %s ",
+                            igmp_group->mcgroup.name);
+            }
+            if (od->mcast_info.rtr.flood_static) {
+                ds_put_cstr(actions,
+                            "clone { "
+                                "outport = \""MC_STATIC"\"; "
+                                "ip.ttl--; "
+                                "next; "
+                            "};");
+            }
+            ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
+                          igmp_group->mcgroup.name);
+            ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
+                                 ds_cstr(match), ds_cstr(actions));
+        }
 
-                /* Higher priority rules are added for load-balancing in DNAT
-                 * table.  For every match (on a VIP[:port]), we add two flows
-                 * via add_router_lb_flow().  One flow is for specific matching
-                 * on ct.new with an action of "ct_lb($targets);".  The other
-                 * flow is for ct.est with an action of "ct_dnat;". */
-                ds_clear(&match);
-                if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
-                    ds_put_format(&match, "ip && ip4.dst == %s",
-                                  lb_vip->vip_str);
-                } else {
-                    ds_put_format(&match, "ip && ip6.dst == %s",
-                                  lb_vip->vip_str);
-                }
+        /* If needed, flood unregistered multicast on statically configured
+         * ports. Otherwise drop any multicast traffic.
+         */
+        if (od->mcast_info.rtr.flood_static) {
+            ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+                          "ip4.mcast || ip6.mcast",
+                          "clone { "
+                                "outport = \""MC_STATIC"\"; "
+                                "ip.ttl--; "
+                                "next; "
+                          "};");
+        } else {
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+                          "ip4.mcast || ip6.mcast", "drop;");
+        }
+    }
+}
 
-                int prio = 110;
-                bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp");
-                bool is_sctp = nullable_string_is_equal(nb_lb->protocol,
-                                                        "sctp");
-                const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp";
+/* Logical router ingress table POLICY: Policy.
+ *
+ * A packet that arrives at this table is an IP packet that should be
+ * permitted/denied/rerouted to the address in the rule's nexthop.
+ * This table sets outport to the correct out_port,
+ * eth.src to the output port's MAC address,
+ * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
+ * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
+ * advances to the next table for ARP/ND resolution. */
+static void
+build_ingress_policy_flows_for_lrouter(
+        struct ovn_datapath *od, struct hmap *lflows,
+        struct hmap *ports)
+{
+    if (od->nbr) {
+        /* This is a catch-all rule. It has the lowest priority (0)
+         * does a match-all("1") and pass-through (next) */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1",
+                      REG_ECMP_GROUP_ID" = 0; next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150,
+                      REG_ECMP_GROUP_ID" == 0", "next;");
 
-                if (lb_vip->vip_port) {
-                    ds_put_format(&match, " && %s && %s.dst == %d", proto,
-                                  proto, lb_vip->vip_port);
-                    prio = 120;
-                }
+        /* Convert routing policies to flows. */
+        uint16_t ecmp_group_id = 1;
+        for (int i = 0; i < od->nbr->n_policies; i++) {
+            const struct nbrec_logical_router_policy *rule
+                = od->nbr->policies[i];
+            bool is_ecmp_reroute =
+                (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1);
 
-                if (od->l3redirect_port &&
-                    (lb_vip->n_backends || !lb_vip->empty_backend_rej)) {
-                    ds_put_format(&match, " && is_chassis_resident(%s)",
-                                  od->l3redirect_port->json_key);
-                }
-                add_router_lb_flow(lflows, od, &match, &actions, prio,
-                                   lb_force_snat_ip, lb_vip, proto,
-                                   nb_lb, meter_groups, &nat_entries);
+            if (is_ecmp_reroute) {
+                build_ecmp_routing_policy_flows(lflows, od, ports, rule,
+                                                ecmp_group_id);
+                ecmp_group_id++;
+            } else {
+                build_routing_policy_flow(lflows, od, ports, rule,
+                                          &rule->header_);
             }
         }
-        sset_destroy(&all_ips);
-        sset_destroy(&nat_entries);
     }
-
-    ds_destroy(&match);
-    ds_destroy(&actions);
 }
 
-/* Logical router ingress Table 0: L2 Admission Control
- * Generic admission control flows (without inport check).
- */
+/* Local router ingress table ARP_RESOLVE: ARP Resolution. */
 static void
-build_adm_ctrl_flows_for_lrouter(
+build_arp_resolve_flows_for_lrouter(
         struct ovn_datapath *od, struct hmap *lflows)
 {
     if (od->nbr) {
-        /* Logical VLANs not supported.
-         * Broadcast/multicast source address is invalid. */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
-                      "vlan.present || eth.src[40]", "drop;");
+        /* Multicast packets already have the outport set so just advance to
+         * next table (priority 500). */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500,
+                      "ip4.mcast || ip6.mcast", "next;");
+
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
+                      "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;");
+
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
+                      "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;");
     }
 }
 
-/* Logical router ingress Table 0: L2 Admission Control
- * This table drops packets that the router shouldn’t see at all based
- * on their Ethernet headers.
- */
-static void
-build_adm_ctrl_flows_for_lrouter_port(
+/* Local router ingress table ARP_RESOLVE: ARP Resolution.
+ *
+ * Any unicast packet that reaches this table is an IP packet whose
+ * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6
+ * (ip4.dst/ipv6.dst is the final destination).
+ * This table resolves the IP address in
+ * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and
+ * an Ethernet address in eth.dst.
+ */
+static void
+build_arp_resolve_flows_for_lrouter_port(
         struct ovn_port *op, struct hmap *lflows,
+        struct hmap *ports,
         struct ds *match, struct ds *actions)
 {
-    if (op->nbrp) {
-        if (!lrport_is_enabled(op->nbrp)) {
-            /* Drop packets from disabled logical ports (since logical flow
-             * tables are default-drop). */
-            return;
-        }
+    if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
+        return;
+    }
 
-        if (op->derived) {
-            /* No ingress packets should be received on a chassisredirect
-             * port. */
-            return;
-        }
+    if (op->nbrp) {
+        /* This is a logical router port. If next-hop IP address in
+         * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this
+         * router port, then the packet is intended to eventually be sent
+         * to this logical port. Set the destination mac address using
+         * this port's mac address.
+         *
+         * The packet is still in peer's logical pipeline. So the match
+         * should be on peer's outport. */
+        if (op->peer && op->nbrp->peer) {
+            if (op->lrp_networks.n_ipv4_addrs) {
+                ds_clear(match);
+                ds_put_format(match, "outport == %s && "
+                              REG_NEXT_HOP_IPV4 "== ",
+                              op->peer->json_key);
+                op_put_v4_networks(match, op, false);
 
-        /* Store the ethernet address of the port receiving the packet.
-         * This will save us from having to match on inport further down in
-         * the pipeline.
-         */
-        ds_clear(actions);
-        ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
-                      op->lrp_networks.ea_s);
+                ds_clear(actions);
+                ds_put_format(actions, "eth.dst = %s; next;",
+                              op->lrp_networks.ea_s);
+                ovn_lflow_add_with_hint(lflows, op->peer->od,
+                                        S_ROUTER_IN_ARP_RESOLVE, 100,
+                                        ds_cstr(match), ds_cstr(actions),
+                                        &op->nbrp->header_);
+            }
 
-        ds_clear(match);
-        ds_put_format(match, "eth.mcast && inport == %s", op->json_key);
-        ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
-                                ds_cstr(match), ds_cstr(actions),
-                                &op->nbrp->header_);
+            if (op->lrp_networks.n_ipv6_addrs) {
+                ds_clear(match);
+                ds_put_format(match, "outport == %s && "
+                              REG_NEXT_HOP_IPV6 " == ",
+                              op->peer->json_key);
+                op_put_v6_networks(match, op);
 
-        ds_clear(match);
-        ds_put_format(match, "eth.dst == %s && inport == %s",
-                      op->lrp_networks.ea_s, op->json_key);
-        if (op->od->l3dgw_port && op == op->od->l3dgw_port
-            && op->od->l3redirect_port) {
-            /* Traffic with eth.dst = l3dgw_port->lrp_networks.ea_s
-             * should only be received on the gateway chassis. */
-            ds_put_format(match, " && is_chassis_resident(%s)",
-                          op->od->l3redirect_port->json_key);
+                ds_clear(actions);
+                ds_put_format(actions, "eth.dst = %s; next;",
+                              op->lrp_networks.ea_s);
+                ovn_lflow_add_with_hint(lflows, op->peer->od,
+                                        S_ROUTER_IN_ARP_RESOLVE, 100,
+                                        ds_cstr(match), ds_cstr(actions),
+                                        &op->nbrp->header_);
+            }
         }
-        ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
-                                ds_cstr(match),  ds_cstr(actions),
-                                &op->nbrp->header_);
-    }
-}
 
+        if (!op->derived && op->od->l3redirect_port) {
+            const char *redirect_type = smap_get(&op->nbrp->options,
+                                                 "redirect-type");
+            if (redirect_type && !strcasecmp(redirect_type, "bridged")) {
+                /* Packet is on a non gateway chassis and
+                 * has an unresolved ARP on a network behind gateway
+                 * chassis attached router port. Since, redirect type
+                 * is "bridged", instead of calling "get_arp"
+                 * on this node, we will redirect the packet to gateway
+                 * chassis, by setting destination mac router port mac.*/
+                ds_clear(match);
+                ds_put_format(match, "outport == %s && "
+                              "!is_chassis_resident(%s)", op->json_key,
+                              op->od->l3redirect_port->json_key);
+                ds_clear(actions);
+                ds_put_format(actions, "eth.dst = %s; next;",
+                              op->lrp_networks.ea_s);
 
-/* Logical router ingress Table 1 and 2: Neighbor lookup and learning
- * lflows for logical routers. */
-static void
-build_neigh_learning_flows_for_lrouter(
-        struct ovn_datapath *od, struct hmap *lflows,
-        struct ds *match, struct ds *actions)
-{
-    if (od->nbr) {
+                ovn_lflow_add_with_hint(lflows, op->od,
+                                        S_ROUTER_IN_ARP_RESOLVE, 50,
+                                        ds_cstr(match), ds_cstr(actions),
+                                        &op->nbrp->header_);
+            }
+        }
 
-        /* Learn MAC bindings from ARP/IPv6 ND.
-         *
-         * For ARP packets, table LOOKUP_NEIGHBOR does a lookup for the
-         * (arp.spa, arp.sha) in the mac binding table using the 'lookup_arp'
-         * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_RESULT bit.
-         * If "always_learn_from_arp_request" is set to false, it will also
-         * lookup for the (arp.spa) in the mac binding table using the
-         * "lookup_arp_ip" action for ARP request packets, and stores the
-         * result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit; or set that bit
-         * to "1" directly for ARP response packets.
-         *
-         * For IPv6 ND NA packets, table LOOKUP_NEIGHBOR does a lookup
-         * for the (nd.target, nd.tll) in the mac binding table using the
-         * 'lookup_nd' action and stores the result in
-         * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
-         * "always_learn_from_arp_request" is set to false,
-         * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT bit is set.
-         *
-         * For IPv6 ND NS packets, table LOOKUP_NEIGHBOR does a lookup
-         * for the (ip6.src, nd.sll) in the mac binding table using the
-         * 'lookup_nd' action and stores the result in
-         * REGBIT_LOOKUP_NEIGHBOR_RESULT bit. If
-         * "always_learn_from_arp_request" is set to false, it will also lookup
-         * for the (ip6.src) in the mac binding table using the "lookup_nd_ip"
-         * action and stores the result in REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
-         * bit.
-         *
-         * Table LEARN_NEIGHBOR learns the mac-binding using the action
-         * - 'put_arp/put_nd'. Learning mac-binding is skipped if
-         *   REGBIT_LOOKUP_NEIGHBOR_RESULT bit is set or
-         *   REGBIT_LOOKUP_NEIGHBOR_IP_RESULT is not set.
+        /* Drop IP traffic destined to router owned IPs. Part of it is dropped
+         * in stage "lr_in_ip_input" but traffic that could have been unSNATed
+         * but didn't match any existing session might still end up here.
          *
-         * */
-
-        /* Flows for LOOKUP_NEIGHBOR. */
-        bool learn_from_arp_request = smap_get_bool(&od->nbr->options,
-            "always_learn_from_arp_request", true);
-        ds_clear(actions);
-        ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
-                      " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
-                      learn_from_arp_request ? "" :
-                      REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
-                      "arp.op == 2", ds_cstr(actions));
+         * Priority 1.
+         */
+        build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true,
+                                    lflows);
+    } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
+               && strcmp(op->nbsp->type, "virtual")) {
+        /* This is a logical switch port that backs a VM or a container.
+         * Extract its addresses. For each of the address, go through all
+         * the router ports attached to the switch (to which this port
+         * connects) and if the address in question is reachable from the
+         * router port, add an ARP/ND entry in that router's pipeline. */
 
-        ds_clear(actions);
-        ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
-                      " = lookup_nd(inport, nd.target, nd.tll); %snext;",
-                      learn_from_arp_request ? "" :
-                      REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na",
-                      ds_cstr(actions));
+        for (size_t i = 0; i < op->n_lsp_addrs; i++) {
+            const char *ea_s = op->lsp_addrs[i].ea_s;
+            for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
+                const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
+                for (size_t k = 0; k < op->od->n_router_ports; k++) {
+                    /* Get the Logical_Router_Port that the
+                     * Logical_Switch_Port is connected to, as
+                     * 'peer'. */
+                    const char *peer_name = smap_get(
+                        &op->od->router_ports[k]->nbsp->options,
+                        "router-port");
+                    if (!peer_name) {
+                        continue;
+                    }
 
-        ds_clear(actions);
-        ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
-                      " = lookup_nd(inport, ip6.src, nd.sll); %snext;",
-                      learn_from_arp_request ? "" :
-                      REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
-                      " = lookup_nd_ip(inport, ip6.src); ");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_ns",
-                      ds_cstr(actions));
+                    struct ovn_port *peer = ovn_port_find(ports, peer_name);
+                    if (!peer || !peer->nbrp) {
+                        continue;
+                    }
 
-        /* For other packet types, we can skip neighbor learning.
-         * So set REGBIT_LOOKUP_NEIGHBOR_RESULT to 1. */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 0, "1",
-                      REGBIT_LOOKUP_NEIGHBOR_RESULT" = 1; next;");
+                    if (!find_lrp_member_ip(peer, ip_s)) {
+                        continue;
+                    }
 
-        /* Flows for LEARN_NEIGHBOR. */
-        /* Skip Neighbor learning if not required. */
-        ds_clear(match);
-        ds_put_format(match, REGBIT_LOOKUP_NEIGHBOR_RESULT" == 1%s",
-                      learn_from_arp_request ? "" :
-                      " || "REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" == 0");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 100,
-                      ds_cstr(match), "next;");
+                    ds_clear(match);
+                    ds_put_format(match, "outport == %s && "
+                                  REG_NEXT_HOP_IPV4 " == %s",
+                                  peer->json_key, ip_s);
 
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
-                      "arp", "put_arp(inport, arp.spa, arp.sha); next;");
+                    ds_clear(actions);
+                    ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+                    ovn_lflow_add_with_hint(lflows, peer->od,
+                                            S_ROUTER_IN_ARP_RESOLVE, 100,
+                                            ds_cstr(match),
+                                            ds_cstr(actions),
+                                            &op->nbsp->header_);
+                }
+            }
 
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
-                      "nd_na", "put_nd(inport, nd.target, nd.tll); next;");
+            for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
+                const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
+                for (size_t k = 0; k < op->od->n_router_ports; k++) {
+                    /* Get the Logical_Router_Port that the
+                     * Logical_Switch_Port is connected to, as
+                     * 'peer'. */
+                    const char *peer_name = smap_get(
+                        &op->od->router_ports[k]->nbsp->options,
+                        "router-port");
+                    if (!peer_name) {
+                        continue;
+                    }
 
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_LEARN_NEIGHBOR, 90,
-                      "nd_ns", "put_nd(inport, ip6.src, nd.sll); next;");
-    }
-
-}
+                    struct ovn_port *peer = ovn_port_find(ports, peer_name);
+                    if (!peer || !peer->nbrp) {
+                        continue;
+                    }
 
-/* Logical router ingress Table 1: Neighbor lookup lflows
- * for logical router ports. */
-static void
-build_neigh_learning_flows_for_lrouter_port(
-        struct ovn_port *op, struct hmap *lflows,
-        struct ds *match, struct ds *actions)
-{
-    if (op->nbrp) {
+                    if (!find_lrp_member_ip(peer, ip_s)) {
+                        continue;
+                    }
 
-        bool learn_from_arp_request = smap_get_bool(&op->od->nbr->options,
-            "always_learn_from_arp_request", true);
+                    ds_clear(match);
+                    ds_put_format(match, "outport == %s && "
+                                  REG_NEXT_HOP_IPV6 " == %s",
+                                  peer->json_key, ip_s);
 
-        /* Check if we need to learn mac-binding from ARP requests. */
-        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-            if (!learn_from_arp_request) {
-                /* ARP request to this address should always get learned,
-                 * so add a priority-110 flow to set
-                 * REGBIT_LOOKUP_NEIGHBOR_IP_RESULT to 1. */
-                ds_clear(match);
-                ds_put_format(match,
-                              "inport == %s && arp.spa == %s/%u && "
-                              "arp.tpa == %s && arp.op == 1",
-                              op->json_key,
-                              op->lrp_networks.ipv4_addrs[i].network_s,
-                              op->lrp_networks.ipv4_addrs[i].plen,
-                              op->lrp_networks.ipv4_addrs[i].addr_s);
-                if (op->od->l3dgw_port && op == op->od->l3dgw_port
-                    && op->od->l3redirect_port) {
-                    ds_put_format(match, " && is_chassis_resident(%s)",
-                                  op->od->l3redirect_port->json_key);
+                    ds_clear(actions);
+                    ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+                    ovn_lflow_add_with_hint(lflows, peer->od,
+                                            S_ROUTER_IN_ARP_RESOLVE, 100,
+                                            ds_cstr(match),
+                                            ds_cstr(actions),
+                                            &op->nbsp->header_);
                 }
-                const char *actions_s = REGBIT_LOOKUP_NEIGHBOR_RESULT
-                                  " = lookup_arp(inport, arp.spa, arp.sha); "
-                                  REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1;"
-                                  " next;";
-                ovn_lflow_add_with_hint(lflows, op->od,
-                                        S_ROUTER_IN_LOOKUP_NEIGHBOR, 110,
-                                        ds_cstr(match), actions_s,
-                                        &op->nbrp->header_);
-            }
-            ds_clear(match);
-            ds_put_format(match,
-                          "inport == %s && arp.spa == %s/%u && arp.op == 1",
-                          op->json_key,
-                          op->lrp_networks.ipv4_addrs[i].network_s,
-                          op->lrp_networks.ipv4_addrs[i].plen);
-            if (op->od->l3dgw_port && op == op->od->l3dgw_port
-                && op->od->l3redirect_port) {
-                ds_put_format(match, " && is_chassis_resident(%s)",
-                              op->od->l3redirect_port->json_key);
             }
-            ds_clear(actions);
-            ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT
-                          " = lookup_arp(inport, arp.spa, arp.sha); %snext;",
-                          learn_from_arp_request ? "" :
-                          REGBIT_LOOKUP_NEIGHBOR_IP_RESULT
-                          " = lookup_arp_ip(inport, arp.spa); ");
-            ovn_lflow_add_with_hint(lflows, op->od,
-                                    S_ROUTER_IN_LOOKUP_NEIGHBOR, 100,
-                                    ds_cstr(match), ds_cstr(actions),
-                                    &op->nbrp->header_);
         }
-    }
-}
-
-/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: IPv6 Router
- * Adv (RA) options and response. */
-static void
-build_ND_RA_flows_for_lrouter_port(
-        struct ovn_port *op, struct hmap *lflows,
-        struct ds *match, struct ds *actions)
-{
-    if (!op->nbrp || op->nbrp->peer || !op->peer) {
-        return;
-    }
+    } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
+               && !strcmp(op->nbsp->type, "virtual")) {
+        /* This is a virtual port. Add ARP replies for the virtual ip with
+         * the mac of the present active virtual parent.
+         * If the logical port doesn't have virtual parent set in
+         * Port_Binding table, then add the flow to set eth.dst to
+         * 00:00:00:00:00:00 and advance to next table so that ARP is
+         * resolved by router pipeline using the arp{} action.
+         * The MAC_Binding entry for the virtual ip might be invalid. */
+        ovs_be32 ip;
 
-    if (!op->lrp_networks.n_ipv6_addrs) {
-        return;
-    }
+        const char *vip = smap_get(&op->nbsp->options,
+                                   "virtual-ip");
+        const char *virtual_parents = smap_get(&op->nbsp->options,
+                                               "virtual-parents");
+        if (!vip || !virtual_parents ||
+            !ip_parse(vip, &ip) || !op->sb) {
+            return;
+        }
 
-    struct smap options;
-    smap_clone(&options, &op->sb->options);
+        if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] ||
+            !op->sb->chassis) {
+            /* The virtual port is not claimed yet. */
+            for (size_t i = 0; i < op->od->n_router_ports; i++) {
+                const char *peer_name = smap_get(
+                    &op->od->router_ports[i]->nbsp->options,
+                    "router-port");
+                if (!peer_name) {
+                    continue;
+                }
 
-    /* enable IPv6 prefix delegation */
-    bool prefix_delegation = smap_get_bool(&op->nbrp->options,
-                                           "prefix_delegation", false);
-    if (!lrport_is_enabled(op->nbrp)) {
-        prefix_delegation = false;
-    }
-    smap_add(&options, "ipv6_prefix_delegation",
-             prefix_delegation ? "true" : "false");
+                struct ovn_port *peer = ovn_port_find(ports, peer_name);
+                if (!peer || !peer->nbrp) {
+                    continue;
+                }
 
-    bool ipv6_prefix = smap_get_bool(&op->nbrp->options,
-                                     "prefix", false);
-    if (!lrport_is_enabled(op->nbrp)) {
-        ipv6_prefix = false;
-    }
-    smap_add(&options, "ipv6_prefix",
-             ipv6_prefix ? "true" : "false");
-    sbrec_port_binding_set_options(op->sb, &options);
+                if (find_lrp_member_ip(peer, vip)) {
+                    ds_clear(match);
+                    ds_put_format(match, "outport == %s && "
+                                  REG_NEXT_HOP_IPV4 " == %s",
+                                  peer->json_key, vip);
 
-    smap_destroy(&options);
+                    const char *arp_actions =
+                                  "eth.dst = 00:00:00:00:00:00; next;";
+                    ovn_lflow_add_with_hint(lflows, peer->od,
+                                            S_ROUTER_IN_ARP_RESOLVE, 100,
+                                            ds_cstr(match),
+                                            arp_actions,
+                                            &op->nbsp->header_);
+                    break;
+                }
+            }
+        } else {
+            struct ovn_port *vp =
+                ovn_port_find(ports, op->sb->virtual_parent);
+            if (!vp || !vp->nbsp) {
+                return;
+            }
 
-    const char *address_mode = smap_get(
-        &op->nbrp->ipv6_ra_configs, "address_mode");
+            for (size_t i = 0; i < vp->n_lsp_addrs; i++) {
+                bool found_vip_network = false;
+                const char *ea_s = vp->lsp_addrs[i].ea_s;
+                for (size_t j = 0; j < vp->od->n_router_ports; j++) {
+                    /* Get the Logical_Router_Port that the
+                    * Logical_Switch_Port is connected to, as
+                    * 'peer'. */
+                    const char *peer_name = smap_get(
+                        &vp->od->router_ports[j]->nbsp->options,
+                        "router-port");
+                    if (!peer_name) {
+                        continue;
+                    }
 
-    if (!address_mode) {
-        return;
-    }
-    if (strcmp(address_mode, "slaac") &&
-        strcmp(address_mode, "dhcpv6_stateful") &&
-        strcmp(address_mode, "dhcpv6_stateless")) {
-        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
-        VLOG_WARN_RL(&rl, "Invalid address mode [%s] defined",
-                     address_mode);
-        return;
-    }
+                    struct ovn_port *peer =
+                        ovn_port_find(ports, peer_name);
+                    if (!peer || !peer->nbrp) {
+                        continue;
+                    }
 
-    if (smap_get_bool(&op->nbrp->ipv6_ra_configs, "send_periodic",
-                      false)) {
-        copy_ra_to_sb(op, address_mode);
-    }
+                    if (!find_lrp_member_ip(peer, vip)) {
+                        continue;
+                    }
 
-    ds_clear(match);
-    ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && nd_rs",
-                          op->json_key);
-    ds_clear(actions);
+                    ds_clear(match);
+                    ds_put_format(match, "outport == %s && "
+                                  REG_NEXT_HOP_IPV4 " == %s",
+                                  peer->json_key, vip);
 
-    const char *mtu_s = smap_get(
-        &op->nbrp->ipv6_ra_configs, "mtu");
+                    ds_clear(actions);
+                    ds_put_format(actions, "eth.dst = %s; next;", ea_s);
+                    ovn_lflow_add_with_hint(lflows, peer->od,
+                                            S_ROUTER_IN_ARP_RESOLVE, 100,
+                                            ds_cstr(match),
+                                            ds_cstr(actions),
+                                            &op->nbsp->header_);
+                    found_vip_network = true;
+                    break;
+                }
 
-    /* As per RFC 2460, 1280 is minimum IPv6 MTU. */
-    uint32_t mtu = (mtu_s && atoi(mtu_s) >= 1280) ? atoi(mtu_s) : 0;
+                if (found_vip_network) {
+                    break;
+                }
+            }
+        }
+    } else if (lsp_is_router(op->nbsp)) {
+        /* This is a logical switch port that connects to a router. */
 
-    ds_put_format(actions, REGBIT_ND_RA_OPTS_RESULT" = put_nd_ra_opts("
-                  "addr_mode = \"%s\", slla = %s",
-                  address_mode, op->lrp_networks.ea_s);
-    if (mtu > 0) {
-        ds_put_format(actions, ", mtu = %u", mtu);
-    }
+        /* The peer of this switch port is the router port for which
+         * we need to add logical flows such that it can resolve
+         * ARP entries for all the other router ports connected to
+         * the switch in question. */
 
-    const char *prf = smap_get_def(
-        &op->nbrp->ipv6_ra_configs, "router_preference", "MEDIUM");
-    if (strcmp(prf, "MEDIUM")) {
-        ds_put_format(actions, ", router_preference = \"%s\"", prf);
-    }
+        const char *peer_name = smap_get(&op->nbsp->options,
+                                         "router-port");
+        if (!peer_name) {
+            return;
+        }
 
-    bool add_rs_response_flow = false;
+        struct ovn_port *peer = ovn_port_find(ports, peer_name);
+        if (!peer || !peer->nbrp) {
+            return;
+        }
 
-    for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-        if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
-            continue;
+        if (peer->od->nbr &&
+            smap_get_bool(&peer->od->nbr->options,
+                          "dynamic_neigh_routers", false)) {
+            return;
         }
 
-        ds_put_format(actions, ", prefix = %s/%u",
-                      op->lrp_networks.ipv6_addrs[i].network_s,
-                      op->lrp_networks.ipv6_addrs[i].plen);
+        for (size_t i = 0; i < op->od->n_router_ports; i++) {
+            const char *router_port_name = smap_get(
+                                &op->od->router_ports[i]->nbsp->options,
+                                "router-port");
+            struct ovn_port *router_port = ovn_port_find(ports,
+                                                         router_port_name);
+            if (!router_port || !router_port->nbrp) {
+                continue;
+            }
 
-        add_rs_response_flow = true;
-    }
+            /* Skip the router port under consideration. */
+            if (router_port == peer) {
+               continue;
+            }
 
-    if (add_rs_response_flow) {
-        ds_put_cstr(actions, "); next;");
-        ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_ND_RA_OPTIONS,
-                                50, ds_cstr(match), ds_cstr(actions),
-                                &op->nbrp->header_);
-        ds_clear(actions);
-        ds_clear(match);
-        ds_put_format(match, "inport == %s && ip6.dst == ff02::2 && "
-                      "nd_ra && "REGBIT_ND_RA_OPTS_RESULT, op->json_key);
+            if (router_port->lrp_networks.n_ipv4_addrs) {
+                ds_clear(match);
+                ds_put_format(match, "outport == %s && "
+                              REG_NEXT_HOP_IPV4 " == ",
+                              peer->json_key);
+                op_put_v4_networks(match, router_port, false);
 
-        char ip6_str[INET6_ADDRSTRLEN + 1];
-        struct in6_addr lla;
-        in6_generate_lla(op->lrp_networks.ea, &lla);
-        memset(ip6_str, 0, sizeof(ip6_str));
-        ipv6_string_mapped(ip6_str, &lla);
-        ds_put_format(actions, "eth.dst = eth.src; eth.src = %s; "
-                      "ip6.dst = ip6.src; ip6.src = %s; "
-                      "outport = inport; flags.loopback = 1; "
-                      "output;",
-                      op->lrp_networks.ea_s, ip6_str);
-        ovn_lflow_add_with_hint(lflows, op->od,
-                                S_ROUTER_IN_ND_RA_RESPONSE, 50,
-                                ds_cstr(match), ds_cstr(actions),
-                                &op->nbrp->header_);
-    }
-}
+                ds_clear(actions);
+                ds_put_format(actions, "eth.dst = %s; next;",
+                                          router_port->lrp_networks.ea_s);
+                ovn_lflow_add_with_hint(lflows, peer->od,
+                                        S_ROUTER_IN_ARP_RESOLVE, 100,
+                                        ds_cstr(match), ds_cstr(actions),
+                                        &op->nbsp->header_);
+            }
 
-/* Logical router ingress table ND_RA_OPTIONS & ND_RA_RESPONSE: RS
- * responder, by default goto next. (priority 0). */
-static void
-build_ND_RA_flows_for_lrouter(struct ovn_datapath *od, struct hmap *lflows)
-{
-    if (od->nbr) {
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_OPTIONS, 0, "1", "next;");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ND_RA_RESPONSE, 0, "1", "next;");
+            if (router_port->lrp_networks.n_ipv6_addrs) {
+                ds_clear(match);
+                ds_put_format(match, "outport == %s && "
+                              REG_NEXT_HOP_IPV6 " == ",
+                              peer->json_key);
+                op_put_v6_networks(match, router_port);
+
+                ds_clear(actions);
+                ds_put_format(actions, "eth.dst = %s; next;",
+                              router_port->lrp_networks.ea_s);
+                ovn_lflow_add_with_hint(lflows, peer->od,
+                                        S_ROUTER_IN_ARP_RESOLVE, 100,
+                                        ds_cstr(match), ds_cstr(actions),
+                                        &op->nbsp->header_);
+            }
+        }
     }
+
 }
 
-/* Logical router ingress table IP_ROUTING : IP Routing.
+/* Local router ingress table CHK_PKT_LEN: Check packet length.
  *
- * A packet that arrives at this table is an IP packet that should be
- * routed to the address in 'ip[46].dst'.
+ * Any IPv4 packet with outport set to the distributed gateway
+ * router port, check the packet length and store the result in the
+ * 'REGBIT_PKT_LARGER' register bit.
  *
- * For regular routes without ECMP, table IP_ROUTING sets outport to the
- * correct output port, eth.src to the output port's MAC address, and
- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
- * advances to the next table.
+ * Local router ingress table LARGER_PKTS: Handle larger packets.
  *
- * For ECMP routes, i.e. multiple routes with same policy and prefix, table
- * IP_ROUTING remembers ECMP group id and selects a member id, and advances
- * to table IP_ROUTING_ECMP, which sets outport, eth.src and
- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 for the selected ECMP member.
- */
+ * Any IPv4 packet with outport set to the distributed gateway
+ * router port and the 'REGBIT_PKT_LARGER' register bit is set,
+ * generate ICMPv4 packet with type 3 (Destination Unreachable) and
+ * code 4 (Fragmentation needed).
+ * */
 static void
-build_ip_routing_flows_for_lrouter_port(
-        struct ovn_port *op, struct hmap *lflows)
+build_check_pkt_len_flows_for_lrouter(
+        struct ovn_datapath *od, struct hmap *lflows,
+        struct hmap *ports,
+        struct ds *match, struct ds *actions)
 {
-    if (op->nbrp) {
+    if (od->nbr) {
 
-        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-            add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
-                      op->lrp_networks.ipv4_addrs[i].network_s,
-                      op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
-                      &op->nbrp->header_);
-        }
+        /* Packets are allowed by default. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
+                      "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
+                      "next;");
 
-        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-            add_route(lflows, op, op->lrp_networks.ipv6_addrs[i].addr_s,
-                      op->lrp_networks.ipv6_addrs[i].network_s,
-                      op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
-                      &op->nbrp->header_);
+        if (od->l3dgw_port && od->l3redirect_port) {
+            int gw_mtu = 0;
+            if (od->l3dgw_port->nbrp) {
+                 gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options,
+                                       "gateway_mtu", 0);
+            }
+            /* Add the flows only if gateway_mtu is configured. */
+            if (gw_mtu <= 0) {
+                return;
+            }
+
+            ds_clear(match);
+            ds_put_format(match, "outport == %s", od->l3dgw_port->json_key);
+
+            ds_clear(actions);
+            ds_put_format(actions,
+                          REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
+                          " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
+            ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50,
+                                    ds_cstr(match), ds_cstr(actions),
+                                    &od->l3dgw_port->nbrp->header_);
+
+            for (size_t i = 0; i < od->nbr->n_ports; i++) {
+                struct ovn_port *rp = ovn_port_find(ports,
+                                                    od->nbr->ports[i]->name);
+                if (!rp || rp == od->l3dgw_port) {
+                    continue;
+                }
+
+                if (rp->lrp_networks.ipv4_addrs) {
+                    ds_clear(match);
+                    ds_put_format(match, "inport == %s && outport == %s"
+                                  " && ip4 && "REGBIT_PKT_LARGER,
+                                  rp->json_key, od->l3dgw_port->json_key);
+
+                    ds_clear(actions);
+                    /* Set icmp4.frag_mtu to gw_mtu */
+                    ds_put_format(actions,
+                        "icmp4_error {"
+                        REGBIT_EGRESS_LOOPBACK" = 1; "
+                        "eth.dst = %s; "
+                        "ip4.dst = ip4.src; "
+                        "ip4.src = %s; "
+                        "ip.ttl = 255; "
+                        "icmp4.type = 3; /* Destination Unreachable. */ "
+                        "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
+                        "icmp4.frag_mtu = %d; "
+                        "next(pipeline=ingress, table=%d); };",
+                        rp->lrp_networks.ea_s,
+                        rp->lrp_networks.ipv4_addrs[0].addr_s,
+                        gw_mtu,
+                        ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+                    ovn_lflow_add_with_hint(lflows, od,
+                                            S_ROUTER_IN_LARGER_PKTS, 50,
+                                            ds_cstr(match), ds_cstr(actions),
+                                            &rp->nbrp->header_);
+                }
+
+                if (rp->lrp_networks.ipv6_addrs) {
+                    ds_clear(match);
+                    ds_put_format(match, "inport == %s && outport == %s"
+                                  " && ip6 && "REGBIT_PKT_LARGER,
+                                  rp->json_key, od->l3dgw_port->json_key);
+
+                    ds_clear(actions);
+                    /* Set icmp6.frag_mtu to gw_mtu */
+                    ds_put_format(actions,
+                        "icmp6_error {"
+                        REGBIT_EGRESS_LOOPBACK" = 1; "
+                        "eth.dst = %s; "
+                        "ip6.dst = ip6.src; "
+                        "ip6.src = %s; "
+                        "ip.ttl = 255; "
+                        "icmp6.type = 2; /* Packet Too Big. */ "
+                        "icmp6.code = 0; "
+                        "icmp6.frag_mtu = %d; "
+                        "next(pipeline=ingress, table=%d); };",
+                        rp->lrp_networks.ea_s,
+                        rp->lrp_networks.ipv6_addrs[0].addr_s,
+                        gw_mtu,
+                        ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+                    ovn_lflow_add_with_hint(lflows, od,
+                                            S_ROUTER_IN_LARGER_PKTS, 50,
+                                            ds_cstr(match), ds_cstr(actions),
+                                            &rp->nbrp->header_);
+                }
+            }
         }
     }
 }
 
+/* Logical router ingress table GW_REDIRECT: Gateway redirect.
+ *
+ * For traffic with outport equal to the l3dgw_port
+ * on a distributed router, this table redirects a subset
+ * of the traffic to the l3redirect_port which represents
+ * the central instance of the l3dgw_port.
+ */
 static void
-build_static_route_flows_for_lrouter(
+build_gateway_redirect_flows_for_lrouter(
         struct ovn_datapath *od, struct hmap *lflows,
-        struct hmap *ports)
+        struct ds *match, struct ds *actions)
 {
     if (od->nbr) {
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING_ECMP, 150,
-                      REG_ECMP_GROUP_ID" == 0", "next;");
+        if (od->l3dgw_port && od->l3redirect_port) {
+            const struct ovsdb_idl_row *stage_hint = NULL;
 
-        struct hmap ecmp_groups = HMAP_INITIALIZER(&ecmp_groups);
-        struct hmap unique_routes = HMAP_INITIALIZER(&unique_routes);
-        struct ovs_list parsed_routes = OVS_LIST_INITIALIZER(&parsed_routes);
-        struct ecmp_groups_node *group;
-        for (int i = 0; i < od->nbr->n_static_routes; i++) {
-            struct parsed_route *route =
-                parsed_routes_add(&parsed_routes, od->nbr->static_routes[i]);
-            if (!route) {
-                continue;
+            if (od->l3dgw_port->nbrp) {
+                stage_hint = &od->l3dgw_port->nbrp->header_;
             }
-            group = ecmp_groups_find(&ecmp_groups, route);
-            if (group) {
-                ecmp_groups_add_route(group, route);
-            } else {
-                const struct parsed_route *existed_route =
-                    unique_routes_remove(&unique_routes, route);
-                if (existed_route) {
-                    group = ecmp_groups_add(&ecmp_groups, existed_route);
-                    if (group) {
-                        ecmp_groups_add_route(group, route);
-                    }
-                } else {
-                    unique_routes_add(&unique_routes, route);
-                }
-            }
-        }
-        HMAP_FOR_EACH (group, hmap_node, &ecmp_groups) {
-            /* add a flow in IP_ROUTING, and one flow for each member in
-             * IP_ROUTING_ECMP. */
-            build_ecmp_route_flow(lflows, od, ports, group);
-        }
-        const struct unique_routes_node *ur;
-        HMAP_FOR_EACH (ur, hmap_node, &unique_routes) {
-            build_static_route_flow(lflows, od, ports, ur->route);
+
+            /* For traffic with outport == l3dgw_port, if the
+             * packet did not match any higher priority redirect
+             * rule, then the traffic is redirected to the central
+             * instance of the l3dgw_port. */
+            ds_clear(match);
+            ds_put_format(match, "outport == %s",
+                          od->l3dgw_port->json_key);
+            ds_clear(actions);
+            ds_put_format(actions, "outport = %s; next;",
+                          od->l3redirect_port->json_key);
+            ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
+                                    ds_cstr(match), ds_cstr(actions),
+                                    stage_hint);
         }
-        ecmp_groups_destroy(&ecmp_groups);
-        unique_routes_destroy(&unique_routes);
-        parsed_routes_destroy(&parsed_routes);
+
+        /* Packets are allowed by default. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
     }
 }
 
-/* IP Multicast lookup. Here we set the output port, adjust TTL and
- * advance to next table (priority 500).
- */
+/* Local router ingress table ARP_REQUEST: ARP request.
+ *
+ * In the common case where the Ethernet destination has been resolved,
+ * this table outputs the packet (priority 0).  Otherwise, it composes
+ * and sends an ARP/IPv6 NA request (priority 100). */
 static void
-build_mcast_lookup_flows_for_lrouter(
+build_arp_request_flows_for_lrouter(
         struct ovn_datapath *od, struct hmap *lflows,
         struct ds *match, struct ds *actions)
 {
     if (od->nbr) {
+        for (int i = 0; i < od->nbr->n_static_routes; i++) {
+            const struct nbrec_logical_router_static_route *route;
 
-        /* Drop IPv6 multicast traffic that shouldn't be forwarded,
-         * i.e., router solicitation and router advertisement.
-         */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
-                      "nd_rs || nd_ra", "drop;");
-        if (!od->mcast_info.rtr.relay) {
-            return;
-        }
-
-        struct ovn_igmp_group *igmp_group;
+            route = od->nbr->static_routes[i];
+            struct in6_addr gw_ip6;
+            unsigned int plen;
+            char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen);
+            if (error || plen != 128) {
+                free(error);
+                continue;
+            }
 
-        LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
             ds_clear(match);
+            ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && "
+                          "ip6 && " REG_NEXT_HOP_IPV6 " == %s",
+                          route->nexthop);
+            struct in6_addr sn_addr;
+            struct eth_addr eth_dst;
+            in6_addr_solicited_node(&sn_addr, &gw_ip6);
+            ipv6_multicast_to_ethernet(&eth_dst, &sn_addr);
+
+            char sn_addr_s[INET6_ADDRSTRLEN + 1];
+            ipv6_string_mapped(sn_addr_s, &sn_addr);
+
             ds_clear(actions);
-            if (IN6_IS_ADDR_V4MAPPED(&igmp_group->address)) {
-                ds_put_format(match, "ip4 && ip4.dst == %s ",
-                            igmp_group->mcgroup.name);
-            } else {
-                ds_put_format(match, "ip6 && ip6.dst == %s ",
-                            igmp_group->mcgroup.name);
-            }
-            if (od->mcast_info.rtr.flood_static) {
-                ds_put_cstr(actions,
-                            "clone { "
-                                "outport = \""MC_STATIC"\"; "
-                                "ip.ttl--; "
-                                "next; "
-                            "};");
-            }
-            ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
-                          igmp_group->mcgroup.name);
-            ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
-                                 ds_cstr(match), ds_cstr(actions));
-        }
+            ds_put_format(actions,
+                          "nd_ns { "
+                          "eth.dst = "ETH_ADDR_FMT"; "
+                          "ip6.dst = %s; "
+                          "nd.target = %s; "
+                          "output; "
+                          "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s,
+                          route->nexthop);
 
-        /* If needed, flood unregistered multicast on statically configured
-         * ports. Otherwise drop any multicast traffic.
-         */
-        if (od->mcast_info.rtr.flood_static) {
-            ovn_lflow_add_unique(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
-                          "ip4.mcast || ip6.mcast",
-                          "clone { "
-                                "outport = \""MC_STATIC"\"; "
-                                "ip.ttl--; "
-                                "next; "
-                          "};");
-        } else {
-            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
-                          "ip4.mcast || ip6.mcast", "drop;");
+            ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200,
+                                    ds_cstr(match), ds_cstr(actions),
+                                    &route->header_);
         }
+
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
+                      "eth.dst == 00:00:00:00:00:00 && ip4",
+                      "arp { "
+                      "eth.dst = ff:ff:ff:ff:ff:ff; "
+                      "arp.spa = " REG_SRC_IPV4 "; "
+                      "arp.tpa = " REG_NEXT_HOP_IPV4 "; "
+                      "arp.op = 1; " /* ARP request */
+                      "output; "
+                      "};");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
+                      "eth.dst == 00:00:00:00:00:00 && ip6",
+                      "nd_ns { "
+                      "nd.target = " REG_NEXT_HOP_IPV6 "; "
+                      "output; "
+                      "};");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
     }
 }
 
-/* Logical router ingress table POLICY: Policy.
+/* Logical router egress table DELIVERY: Delivery (priority 100-110).
  *
- * A packet that arrives at this table is an IP packet that should be
- * permitted/denied/rerouted to the address in the rule's nexthop.
- * This table sets outport to the correct out_port,
- * eth.src to the output port's MAC address,
- * and REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 to the next-hop IP address
- * (leaving 'ip[46].dst', the packet’s final destination, unchanged), and
- * advances to the next table for ARP/ND resolution. */
+ * Priority 100 rules deliver packets to enabled logical ports.
+ * Priority 110 rules match multicast packets and update the source
+ * mac before delivering to enabled logical ports. IP multicast traffic
+ * bypasses S_ROUTER_IN_IP_ROUTING route lookups.
+ */
 static void
-build_ingress_policy_flows_for_lrouter(
-        struct ovn_datapath *od, struct hmap *lflows,
-        struct hmap *ports)
+build_egress_delivery_flows_for_lrouter_port(
+        struct ovn_port *op, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
 {
-    if (od->nbr) {
-        /* This is a catch-all rule. It has the lowest priority (0)
-         * does a match-all("1") and pass-through (next) */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY, 0, "1",
-                      REG_ECMP_GROUP_ID" = 0; next;");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_POLICY_ECMP, 150,
-                      REG_ECMP_GROUP_ID" == 0", "next;");
+    if (op->nbrp) {
+        if (!lrport_is_enabled(op->nbrp)) {
+            /* Drop packets to disabled logical ports (since logical flow
+             * tables are default-drop). */
+            return;
+        }
 
-        /* Convert routing policies to flows. */
-        uint16_t ecmp_group_id = 1;
-        for (int i = 0; i < od->nbr->n_policies; i++) {
-            const struct nbrec_logical_router_policy *rule
-                = od->nbr->policies[i];
-            bool is_ecmp_reroute =
-                (!strcmp(rule->action, "reroute") && rule->n_nexthops > 1);
+        if (op->derived) {
+            /* No egress packets should be processed in the context of
+             * a chassisredirect port.  The chassisredirect port should
+             * be replaced by the l3dgw port in the local output
+             * pipeline stage before egress processing. */
+            return;
+        }
 
-            if (is_ecmp_reroute) {
-                build_ecmp_routing_policy_flows(lflows, od, ports, rule,
-                                                ecmp_group_id);
-                ecmp_group_id++;
-            } else {
-                build_routing_policy_flow(lflows, od, ports, rule,
-                                          &rule->header_);
-            }
+        /* If multicast relay is enabled then also adjust source mac for IP
+         * multicast traffic.
+         */
+        if (op->od->mcast_info.rtr.relay) {
+            ds_clear(match);
+            ds_clear(actions);
+            ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s",
+                          op->json_key);
+            ds_put_format(actions, "eth.src = %s; output;",
+                          op->lrp_networks.ea_s);
+            ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110,
+                          ds_cstr(match), ds_cstr(actions));
         }
+
+        ds_clear(match);
+        ds_put_format(match, "outport == %s", op->json_key);
+        ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
+                      ds_cstr(match), "output;");
     }
+
 }
 
-/* Local router ingress table ARP_RESOLVE: ARP Resolution. */
 static void
-build_arp_resolve_flows_for_lrouter(
+build_misc_local_traffic_drop_flows_for_lrouter(
         struct ovn_datapath *od, struct hmap *lflows)
 {
     if (od->nbr) {
-        /* Multicast packets already have the outport set so just advance to
-         * next table (priority 500). */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 500,
-                      "ip4.mcast || ip6.mcast", "next;");
+        /* L3 admission control: drop multicast and broadcast source, localhost
+         * source or destination, and zero network source or destination
+         * (priority 100). */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
+                      "ip4.src_mcast ||"
+                      "ip4.src == 255.255.255.255 || "
+                      "ip4.src == 127.0.0.0/8 || "
+                      "ip4.dst == 127.0.0.0/8 || "
+                      "ip4.src == 0.0.0.0/8 || "
+                      "ip4.dst == 0.0.0.0/8",
+                      "drop;");
 
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip4",
-                      "get_arp(outport, " REG_NEXT_HOP_IPV4 "); next;");
+        /* Drop ARP packets (priority 85). ARP request packets for router's own
+         * IPs are handled with priority-90 flows.
+         * Drop IPv6 ND packets (priority 85). ND NA packets for router's own
+         * IPs are handled with priority-90 flows.
+         */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85,
+                      "arp || nd", "drop;");
 
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "ip6",
-                      "get_nd(outport, " REG_NEXT_HOP_IPV6 "); next;");
-    }
-}
+        /* Allow IPv6 multicast traffic that's supposed to reach the
+         * router pipeline (e.g., router solicitations).
+         */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra",
+                      "next;");
+
+        /* Drop other reserved multicast. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83,
+                      "ip6.mcast_rsvd", "drop;");
+
+        /* Allow other multicast if relay enabled (priority 82). */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82,
+                      "ip4.mcast || ip6.mcast",
+                      od->mcast_info.rtr.relay ? "next;" : "drop;");
+
+        /* Drop Ethernet local broadcast.  By definition this traffic should
+         * not be forwarded.*/
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
+                      "eth.bcast", "drop;");
+
+        /* TTL discard */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
+                      "ip4 && ip.ttl == {0, 1}", "drop;");
+
+        /* Pass other traffic not already handled to the next table for
+         * routing. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
+    }
+}
 
-/* Local router ingress table ARP_RESOLVE: ARP Resolution.
- *
- * Any unicast packet that reaches this table is an IP packet whose
- * next-hop IP address is in REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6
- * (ip4.dst/ipv6.dst is the final destination).
- * This table resolves the IP address in
- * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 into an output port in outport and
- * an Ethernet address in eth.dst.
- */
 static void
-build_arp_resolve_flows_for_lrouter_port(
+build_dhcpv6_reply_flows_for_lrouter_port(
         struct ovn_port *op, struct hmap *lflows,
-        struct hmap *ports,
-        struct ds *match, struct ds *actions)
+        struct ds *match)
 {
-    if (op->nbsp && !lsp_is_enabled(op->nbsp)) {
-        return;
+    if (op->nbrp && (!op->derived)) {
+        for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+            ds_clear(match);
+            ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&"
+                          " udp.dst == 546",
+                          op->lrp_networks.ipv6_addrs[i].addr_s);
+            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
+                          ds_cstr(match),
+                          "reg0 = 0; handle_dhcpv6_reply;");
+        }
     }
 
-    if (op->nbrp) {
-        /* This is a logical router port. If next-hop IP address in
-         * REG_NEXT_HOP_IPV4/REG_NEXT_HOP_IPV6 matches IP address of this
-         * router port, then the packet is intended to eventually be sent
-         * to this logical port. Set the destination mac address using
-         * this port's mac address.
-         *
-         * The packet is still in peer's logical pipeline. So the match
-         * should be on peer's outport. */
-        if (op->peer && op->nbrp->peer) {
-            if (op->lrp_networks.n_ipv4_addrs) {
-                ds_clear(match);
-                ds_put_format(match, "outport == %s && "
-                              REG_NEXT_HOP_IPV4 "== ",
-                              op->peer->json_key);
-                op_put_v4_networks(match, op, false);
+}
 
-                ds_clear(actions);
-                ds_put_format(actions, "eth.dst = %s; next;",
-                              op->lrp_networks.ea_s);
-                ovn_lflow_add_with_hint(lflows, op->peer->od,
-                                        S_ROUTER_IN_ARP_RESOLVE, 100,
-                                        ds_cstr(match), ds_cstr(actions),
-                                        &op->nbrp->header_);
-            }
+static void
+build_ipv6_input_flows_for_lrouter_port(
+        struct ovn_port *op, struct hmap *lflows,
+        struct ds *match, struct ds *actions)
+{
+    if (op->nbrp && (!op->derived)) {
+        /* No ingress packets are accepted on a chassisredirect
+         * port, so no need to program flows for that port. */
+        if (op->lrp_networks.n_ipv6_addrs) {
+            /* ICMPv6 echo reply.  These flows reply to echo requests
+             * received for the router's IP address. */
+            ds_clear(match);
+            ds_put_cstr(match, "ip6.dst == ");
+            op_put_v6_networks(match, op);
+            ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0");
 
-            if (op->lrp_networks.n_ipv6_addrs) {
-                ds_clear(match);
-                ds_put_format(match, "outport == %s && "
-                              REG_NEXT_HOP_IPV6 " == ",
-                              op->peer->json_key);
-                op_put_v6_networks(match, op);
+            const char *lrp_actions =
+                        "ip6.dst <-> ip6.src; "
+                        "ip.ttl = 255; "
+                        "icmp6.type = 129; "
+                        "flags.loopback = 1; "
+                        "next; ";
+            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+                                    ds_cstr(match), lrp_actions,
+                                    &op->nbrp->header_);
+        }
 
-                ds_clear(actions);
-                ds_put_format(actions, "eth.dst = %s; next;",
-                              op->lrp_networks.ea_s);
-                ovn_lflow_add_with_hint(lflows, op->peer->od,
-                                        S_ROUTER_IN_ARP_RESOLVE, 100,
-                                        ds_cstr(match), ds_cstr(actions),
-                                        &op->nbrp->header_);
+        /* ND reply.  These flows reply to ND solicitations for the
+         * router's own IP address. */
+        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+            ds_clear(match);
+            if (op->od->l3dgw_port && op == op->od->l3dgw_port
+                && op->od->l3redirect_port) {
+                /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
+                 * should only be sent from the gateway chassi, so that
+                 * upstream MAC learning points to the gateway chassis.
+                 * Also need to avoid generation of multiple ND replies
+                 * from different chassis. */
+                ds_put_format(match, "is_chassis_resident(%s)",
+                              op->od->l3redirect_port->json_key);
             }
+
+            build_lrouter_nd_flow(op->od, op, "nd_na_router",
+                                  op->lrp_networks.ipv6_addrs[i].addr_s,
+                                  op->lrp_networks.ipv6_addrs[i].sn_addr_s,
+                                  REG_INPORT_ETH_ADDR, match, false, 90,
+                                  &op->nbrp->header_, lflows);
         }
 
-        if (!op->derived && op->od->l3redirect_port) {
-            const char *redirect_type = smap_get(&op->nbrp->options,
-                                                 "redirect-type");
-            if (redirect_type && !strcasecmp(redirect_type, "bridged")) {
-                /* Packet is on a non gateway chassis and
-                 * has an unresolved ARP on a network behind gateway
-                 * chassis attached router port. Since, redirect type
-                 * is "bridged", instead of calling "get_arp"
-                 * on this node, we will redirect the packet to gateway
-                 * chassis, by setting destination mac router port mac.*/
+        /* UDP/TCP port unreachable */
+        if (!smap_get(&op->od->nbr->options, "chassis")
+            && !op->od->l3dgw_port) {
+            for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
                 ds_clear(match);
-                ds_put_format(match, "outport == %s && "
-                              "!is_chassis_resident(%s)", op->json_key,
-                              op->od->l3redirect_port->json_key);
-                ds_clear(actions);
-                ds_put_format(actions, "eth.dst = %s; next;",
-                              op->lrp_networks.ea_s);
+                ds_put_format(match,
+                              "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
+                              op->lrp_networks.ipv6_addrs[i].addr_s);
+                const char *action = "tcp_reset {"
+                                     "eth.dst <-> eth.src; "
+                                     "ip6.dst <-> ip6.src; "
+                                     "next; };";
+                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+                                        80, ds_cstr(match), action,
+                                        &op->nbrp->header_);
 
-                ovn_lflow_add_with_hint(lflows, op->od,
-                                        S_ROUTER_IN_ARP_RESOLVE, 50,
-                                        ds_cstr(match), ds_cstr(actions),
+                ds_clear(match);
+                ds_put_format(match,
+                              "ip6 && ip6.dst == %s && !ip.later_frag && udp",
+                              op->lrp_networks.ipv6_addrs[i].addr_s);
+                action = "icmp6 {"
+                         "eth.dst <-> eth.src; "
+                         "ip6.dst <-> ip6.src; "
+                         "ip.ttl = 255; "
+                         "icmp6.type = 1; "
+                         "icmp6.code = 4; "
+                         "next; };";
+                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+                                        80, ds_cstr(match), action,
+                                        &op->nbrp->header_);
+
+                ds_clear(match);
+                ds_put_format(match,
+                              "ip6 && ip6.dst == %s && !ip.later_frag",
+                              op->lrp_networks.ipv6_addrs[i].addr_s);
+                action = "icmp6 {"
+                         "eth.dst <-> eth.src; "
+                         "ip6.dst <-> ip6.src; "
+                         "ip.ttl = 255; "
+                         "icmp6.type = 1; "
+                         "icmp6.code = 3; "
+                         "next; };";
+                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+                                        70, ds_cstr(match), action,
                                         &op->nbrp->header_);
             }
         }
 
-        /* Drop IP traffic destined to router owned IPs. Part of it is dropped
-         * in stage "lr_in_ip_input" but traffic that could have been unSNATed
-         * but didn't match any existing session might still end up here.
-         *
-         * Priority 1.
-         */
-        build_lrouter_drop_own_dest(op, S_ROUTER_IN_ARP_RESOLVE, 1, true,
-                                    lflows);
-    } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
-               && strcmp(op->nbsp->type, "virtual")) {
-        /* This is a logical switch port that backs a VM or a container.
-         * Extract its addresses. For each of the address, go through all
-         * the router ports attached to the switch (to which this port
-         * connects) and if the address in question is reachable from the
-         * router port, add an ARP/ND entry in that router's pipeline. */
+        /* ICMPv6 time exceeded */
+        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+            /* skip link-local address */
+            if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
+                continue;
+            }
 
-        for (size_t i = 0; i < op->n_lsp_addrs; i++) {
-            const char *ea_s = op->lsp_addrs[i].ea_s;
-            for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
-                const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
-                for (size_t k = 0; k < op->od->n_router_ports; k++) {
-                    /* Get the Logical_Router_Port that the
-                     * Logical_Switch_Port is connected to, as
-                     * 'peer'. */
-                    const char *peer_name = smap_get(
-                        &op->od->router_ports[k]->nbsp->options,
-                        "router-port");
-                    if (!peer_name) {
-                        continue;
-                    }
+            ds_clear(match);
+            ds_clear(actions);
 
-                    struct ovn_port *peer = ovn_port_find(ports, peer_name);
-                    if (!peer || !peer->nbrp) {
-                        continue;
-                    }
+            ds_put_format(match,
+                          "inport == %s && ip6 && "
+                          "ip6.src == %s/%d && "
+                          "ip.ttl == {0, 1} && !ip.later_frag",
+                          op->json_key,
+                          op->lrp_networks.ipv6_addrs[i].network_s,
+                          op->lrp_networks.ipv6_addrs[i].plen);
+            ds_put_format(actions,
+                          "icmp6 {"
+                          "eth.dst <-> eth.src; "
+                          "ip6.dst = ip6.src; "
+                          "ip6.src = %s; "
+                          "ip.ttl = 255; "
+                          "icmp6.type = 3; /* Time exceeded */ "
+                          "icmp6.code = 0; /* TTL exceeded in transit */ "
+                          "next; };",
+                          op->lrp_networks.ipv6_addrs[i].addr_s);
+            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
+                                    ds_cstr(match), ds_cstr(actions),
+                                    &op->nbrp->header_);
+        }
+    }
 
-                    if (!find_lrp_member_ip(peer, ip_s)) {
-                        continue;
-                    }
+}
 
-                    ds_clear(match);
-                    ds_put_format(match, "outport == %s && "
-                                  REG_NEXT_HOP_IPV4 " == %s",
-                                  peer->json_key, ip_s);
+static void
+build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od,
+                                  struct hmap *lflows)
+{
+    if (od->nbr) {
 
-                    ds_clear(actions);
-                    ds_put_format(actions, "eth.dst = %s; next;", ea_s);
-                    ovn_lflow_add_with_hint(lflows, peer->od,
-                                            S_ROUTER_IN_ARP_RESOLVE, 100,
-                                            ds_cstr(match),
-                                            ds_cstr(actions),
-                                            &op->nbsp->header_);
-                }
+        /* Priority-90-92 flows handle ARP requests and ND packets. Most are
+         * per logical port but DNAT addresses can be handled per datapath
+         * for non gateway router ports.
+         *
+         * Priority 91 and 92 flows are added for each gateway router
+         * port to handle the special cases. In case we get the packet
+         * on a regular port, just reply with the port's ETH address.
+         */
+        for (int i = 0; i < od->nbr->n_nat; i++) {
+            struct ovn_nat *nat_entry = &od->nat_entries[i];
+
+            /* Skip entries we failed to parse. */
+            if (!nat_entry_is_valid(nat_entry)) {
+                continue;
             }
 
-            for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
-                const char *ip_s = op->lsp_addrs[i].ipv6_addrs[j].addr_s;
-                for (size_t k = 0; k < op->od->n_router_ports; k++) {
-                    /* Get the Logical_Router_Port that the
-                     * Logical_Switch_Port is connected to, as
-                     * 'peer'. */
-                    const char *peer_name = smap_get(
-                        &op->od->router_ports[k]->nbsp->options,
-                        "router-port");
-                    if (!peer_name) {
-                        continue;
-                    }
+            /* Skip SNAT entries for now, we handle unique SNAT IPs separately
+             * below.
+             */
+            if (!strcmp(nat_entry->nb->type, "snat")) {
+                continue;
+            }
+            build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
+        }
 
-                    struct ovn_port *peer = ovn_port_find(ports, peer_name);
-                    if (!peer || !peer->nbrp) {
-                        continue;
-                    }
+        /* Now handle SNAT entries too, one per unique SNAT IP. */
+        struct shash_node *snat_snode;
+        SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
+            struct ovn_snat_ip *snat_ip = snat_snode->data;
 
-                    if (!find_lrp_member_ip(peer, ip_s)) {
-                        continue;
-                    }
+            if (ovs_list_is_empty(&snat_ip->snat_entries)) {
+                continue;
+            }
 
-                    ds_clear(match);
-                    ds_put_format(match, "outport == %s && "
-                                  REG_NEXT_HOP_IPV6 " == %s",
-                                  peer->json_key, ip_s);
+            struct ovn_nat *nat_entry =
+                CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
+                             struct ovn_nat, ext_addr_list_node);
+            build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
+        }
+    }
+}
 
-                    ds_clear(actions);
-                    ds_put_format(actions, "eth.dst = %s; next;", ea_s);
-                    ovn_lflow_add_with_hint(lflows, peer->od,
-                                            S_ROUTER_IN_ARP_RESOLVE, 100,
-                                            ds_cstr(match),
-                                            ds_cstr(actions),
-                                            &op->nbsp->header_);
-                }
-            }
+/* Logical router ingress table 3: IP Input for IPv4. */
+static void
+build_lrouter_ipv4_ip_input(struct ovn_port *op,
+                            struct hmap *lflows,
+                            struct ds *match, struct ds *actions)
+{
+    /* No ingress packets are accepted on a chassisredirect
+     * port, so no need to program flows for that port. */
+    if (op->nbrp && (!op->derived)) {
+        if (op->lrp_networks.n_ipv4_addrs) {
+            /* L3 admission control: drop packets that originate from an
+             * IPv4 address owned by the router or a broadcast address
+             * known to the router (priority 100). */
+            ds_clear(match);
+            ds_put_cstr(match, "ip4.src == ");
+            op_put_v4_networks(match, op, true);
+            ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
+            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
+                                    ds_cstr(match), "drop;",
+                                    &op->nbrp->header_);
+
+            /* ICMP echo reply.  These flows reply to ICMP echo requests
+             * received for the router's IP address. Since packets only
+             * get here as part of the logical router datapath, the inport
+             * (i.e. the incoming locally attached net) does not matter.
+             * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
+            ds_clear(match);
+            ds_put_cstr(match, "ip4.dst == ");
+            op_put_v4_networks(match, op, false);
+            ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0");
+
+            const char * icmp_actions = "ip4.dst <-> ip4.src; "
+                          "ip.ttl = 255; "
+                          "icmp4.type = 0; "
+                          "flags.loopback = 1; "
+                          "next; ";
+            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+                                    ds_cstr(match), icmp_actions,
+                                    &op->nbrp->header_);
         }
-    } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp)
-               && !strcmp(op->nbsp->type, "virtual")) {
-        /* This is a virtual port. Add ARP replies for the virtual ip with
-         * the mac of the present active virtual parent.
-         * If the logical port doesn't have virtual parent set in
-         * Port_Binding table, then add the flow to set eth.dst to
-         * 00:00:00:00:00:00 and advance to next table so that ARP is
-         * resolved by router pipeline using the arp{} action.
-         * The MAC_Binding entry for the virtual ip might be invalid. */
-        ovs_be32 ip;
 
-        const char *vip = smap_get(&op->nbsp->options,
-                                   "virtual-ip");
-        const char *virtual_parents = smap_get(&op->nbsp->options,
-                                               "virtual-parents");
-        if (!vip || !virtual_parents ||
-            !ip_parse(vip, &ip) || !op->sb) {
-            return;
+        /* ICMP time exceeded */
+        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+            ds_clear(match);
+            ds_clear(actions);
+
+            ds_put_format(match,
+                          "inport == %s && ip4 && "
+                          "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
+            ds_put_format(actions,
+                          "icmp4 {"
+                          "eth.dst <-> eth.src; "
+                          "icmp4.type = 11; /* Time exceeded */ "
+                          "icmp4.code = 0; /* TTL exceeded in transit */ "
+                          "ip4.dst = ip4.src; "
+                          "ip4.src = %s; "
+                          "ip.ttl = 255; "
+                          "next; };",
+                          op->lrp_networks.ipv4_addrs[i].addr_s);
+            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
+                                    ds_cstr(match), ds_cstr(actions),
+                                    &op->nbrp->header_);
         }
 
-        if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] ||
-            !op->sb->chassis) {
-            /* The virtual port is not claimed yet. */
-            for (size_t i = 0; i < op->od->n_router_ports; i++) {
-                const char *peer_name = smap_get(
-                    &op->od->router_ports[i]->nbsp->options,
-                    "router-port");
-                if (!peer_name) {
-                    continue;
-                }
+        /* ARP reply.  These flows reply to ARP requests for the router's own
+         * IP address. */
+        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+            ds_clear(match);
+            ds_put_format(match, "arp.spa == %s/%u",
+                          op->lrp_networks.ipv4_addrs[i].network_s,
+                          op->lrp_networks.ipv4_addrs[i].plen);
 
-                struct ovn_port *peer = ovn_port_find(ports, peer_name);
-                if (!peer || !peer->nbrp) {
-                    continue;
+            if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
+                && op->peer->od->n_localnet_ports) {
+                bool add_chassis_resident_check = false;
+                if (op == op->od->l3dgw_port) {
+                    /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
+                     * should only be sent from the gateway chassis, so that
+                     * upstream MAC learning points to the gateway chassis.
+                     * Also need to avoid generation of multiple ARP responses
+                     * from different chassis. */
+                    add_chassis_resident_check = true;
+                } else {
+                    /* Check if the option 'reside-on-redirect-chassis'
+                     * is set to true on the router port. If set to true
+                     * and if peer's logical switch has a localnet port, it
+                     * means the router pipeline for the packets from
+                     * peer's logical switch is be run on the chassis
+                     * hosting the gateway port and it should reply to the
+                     * ARP requests for the router port IPs.
+                     */
+                    add_chassis_resident_check = smap_get_bool(
+                        &op->nbrp->options,
+                        "reside-on-redirect-chassis", false);
                 }
 
-                if (find_lrp_member_ip(peer, vip)) {
-                    ds_clear(match);
-                    ds_put_format(match, "outport == %s && "
-                                  REG_NEXT_HOP_IPV4 " == %s",
-                                  peer->json_key, vip);
-
-                    const char *arp_actions =
-                                  "eth.dst = 00:00:00:00:00:00; next;";
-                    ovn_lflow_add_with_hint(lflows, peer->od,
-                                            S_ROUTER_IN_ARP_RESOLVE, 100,
-                                            ds_cstr(match),
-                                            arp_actions,
-                                            &op->nbsp->header_);
-                    break;
+                if (add_chassis_resident_check) {
+                    ds_put_format(match, " && is_chassis_resident(%s)",
+                                  op->od->l3redirect_port->json_key);
                 }
             }
-        } else {
-            struct ovn_port *vp =
-                ovn_port_find(ports, op->sb->virtual_parent);
-            if (!vp || !vp->nbsp) {
-                return;
-            }
 
-            for (size_t i = 0; i < vp->n_lsp_addrs; i++) {
-                bool found_vip_network = false;
-                const char *ea_s = vp->lsp_addrs[i].ea_s;
-                for (size_t j = 0; j < vp->od->n_router_ports; j++) {
-                    /* Get the Logical_Router_Port that the
-                    * Logical_Switch_Port is connected to, as
-                    * 'peer'. */
-                    const char *peer_name = smap_get(
-                        &vp->od->router_ports[j]->nbsp->options,
-                        "router-port");
-                    if (!peer_name) {
-                        continue;
-                    }
+            build_lrouter_arp_flow(op->od, op,
+                                   op->lrp_networks.ipv4_addrs[i].addr_s,
+                                   REG_INPORT_ETH_ADDR, match, false, 90,
+                                   &op->nbrp->header_, lflows);
+        }
 
-                    struct ovn_port *peer =
-                        ovn_port_find(ports, peer_name);
-                    if (!peer || !peer->nbrp) {
-                        continue;
-                    }
+        /* A set to hold all load-balancer vips that need ARP responses. */
+        struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
+        struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
+        get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
 
-                    if (!find_lrp_member_ip(peer, vip)) {
-                        continue;
-                    }
-
-                    ds_clear(match);
-                    ds_put_format(match, "outport == %s && "
-                                  REG_NEXT_HOP_IPV4 " == %s",
-                                  peer->json_key, vip);
+        const char *ip_address;
+        SSET_FOR_EACH (ip_address, &all_ips_v4) {
+            ds_clear(match);
+            if (op == op->od->l3dgw_port) {
+                ds_put_format(match, "is_chassis_resident(%s)",
+                              op->od->l3redirect_port->json_key);
+            }
 
-                    ds_clear(actions);
-                    ds_put_format(actions, "eth.dst = %s; next;", ea_s);
-                    ovn_lflow_add_with_hint(lflows, peer->od,
-                                            S_ROUTER_IN_ARP_RESOLVE, 100,
-                                            ds_cstr(match),
-                                            ds_cstr(actions),
-                                            &op->nbsp->header_);
-                    found_vip_network = true;
-                    break;
-                }
+            build_lrouter_arp_flow(op->od, op,
+                                   ip_address, REG_INPORT_ETH_ADDR,
+                                   match, false, 90, NULL, lflows);
+        }
 
-                if (found_vip_network) {
-                    break;
-                }
+        SSET_FOR_EACH (ip_address, &all_ips_v6) {
+            ds_clear(match);
+            if (op == op->od->l3dgw_port) {
+                ds_put_format(match, "is_chassis_resident(%s)",
+                              op->od->l3redirect_port->json_key);
             }
+
+            build_lrouter_nd_flow(op->od, op, "nd_na",
+                                  ip_address, NULL, REG_INPORT_ETH_ADDR,
+                                  match, false, 90, NULL, lflows);
         }
-    } else if (lsp_is_router(op->nbsp)) {
-        /* This is a logical switch port that connects to a router. */
 
-        /* The peer of this switch port is the router port for which
-         * we need to add logical flows such that it can resolve
-         * ARP entries for all the other router ports connected to
-         * the switch in question. */
+        sset_destroy(&all_ips_v4);
+        sset_destroy(&all_ips_v6);
 
-        const char *peer_name = smap_get(&op->nbsp->options,
-                                         "router-port");
-        if (!peer_name) {
-            return;
-        }
+        if (!smap_get(&op->od->nbr->options, "chassis")
+            && !op->od->l3dgw_port) {
+            /* UDP/TCP port unreachable. */
+            for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+                ds_clear(match);
+                ds_put_format(match,
+                              "ip4 && ip4.dst == %s && !ip.later_frag && udp",
+                              op->lrp_networks.ipv4_addrs[i].addr_s);
+                const char *action = "icmp4 {"
+                                     "eth.dst <-> eth.src; "
+                                     "ip4.dst <-> ip4.src; "
+                                     "ip.ttl = 255; "
+                                     "icmp4.type = 3; "
+                                     "icmp4.code = 3; "
+                                     "next; };";
+                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+                                        80, ds_cstr(match), action,
+                                        &op->nbrp->header_);
 
-        struct ovn_port *peer = ovn_port_find(ports, peer_name);
-        if (!peer || !peer->nbrp) {
-            return;
+                ds_clear(match);
+                ds_put_format(match,
+                              "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
+                              op->lrp_networks.ipv4_addrs[i].addr_s);
+                action = "tcp_reset {"
+                         "eth.dst <-> eth.src; "
+                         "ip4.dst <-> ip4.src; "
+                         "next; };";
+                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+                                        80, ds_cstr(match), action,
+                                        &op->nbrp->header_);
+
+                ds_clear(match);
+                ds_put_format(match,
+                              "ip4 && ip4.dst == %s && !ip.later_frag",
+                              op->lrp_networks.ipv4_addrs[i].addr_s);
+                action = "icmp4 {"
+                         "eth.dst <-> eth.src; "
+                         "ip4.dst <-> ip4.src; "
+                         "ip.ttl = 255; "
+                         "icmp4.type = 3; "
+                         "icmp4.code = 2; "
+                         "next; };";
+                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
+                                        70, ds_cstr(match), action,
+                                        &op->nbrp->header_);
+            }
         }
 
-        if (peer->od->nbr &&
-            smap_get_bool(&peer->od->nbr->options,
-                          "dynamic_neigh_routers", false)) {
+        /* Drop IP traffic destined to router owned IPs except if the IP is
+         * also a SNAT IP. Those are dropped later, in stage
+         * "lr_in_arp_resolve", if unSNAT was unsuccessful.
+         *
+         * Priority 60.
+         */
+        build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false,
+                                    lflows);
+
+        /* ARP / ND handling for external IP addresses.
+         *
+         * DNAT and SNAT IP addresses are external IP addresses that need ARP
+         * handling.
+         *
+         * These are already taken care globally, per router. The only
+         * exception is on the l3dgw_port where we might need to use a
+         * different ETH address.
+         */
+        if (op != op->od->l3dgw_port) {
             return;
         }
 
-        for (size_t i = 0; i < op->od->n_router_ports; i++) {
-            const char *router_port_name = smap_get(
-                                &op->od->router_ports[i]->nbsp->options,
-                                "router-port");
-            struct ovn_port *router_port = ovn_port_find(ports,
-                                                         router_port_name);
-            if (!router_port || !router_port->nbrp) {
+        for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
+            struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+
+            /* Skip entries we failed to parse. */
+            if (!nat_entry_is_valid(nat_entry)) {
                 continue;
             }
 
-            /* Skip the router port under consideration. */
-            if (router_port == peer) {
-               continue;
+            /* Skip SNAT entries for now, we handle unique SNAT IPs separately
+             * below.
+             */
+            if (!strcmp(nat_entry->nb->type, "snat")) {
+                continue;
             }
+            build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
+        }
 
-            if (router_port->lrp_networks.n_ipv4_addrs) {
-                ds_clear(match);
-                ds_put_format(match, "outport == %s && "
-                              REG_NEXT_HOP_IPV4 " == ",
-                              peer->json_key);
-                op_put_v4_networks(match, router_port, false);
+        /* Now handle SNAT entries too, one per unique SNAT IP. */
+        struct shash_node *snat_snode;
+        SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) {
+            struct ovn_snat_ip *snat_ip = snat_snode->data;
 
-                ds_clear(actions);
-                ds_put_format(actions, "eth.dst = %s; next;",
-                                          router_port->lrp_networks.ea_s);
-                ovn_lflow_add_with_hint(lflows, peer->od,
-                                        S_ROUTER_IN_ARP_RESOLVE, 100,
-                                        ds_cstr(match), ds_cstr(actions),
-                                        &op->nbsp->header_);
+            if (ovs_list_is_empty(&snat_ip->snat_entries)) {
+                continue;
             }
 
-            if (router_port->lrp_networks.n_ipv6_addrs) {
-                ds_clear(match);
-                ds_put_format(match, "outport == %s && "
-                              REG_NEXT_HOP_IPV6 " == ",
-                              peer->json_key);
-                op_put_v6_networks(match, router_port);
-
-                ds_clear(actions);
-                ds_put_format(actions, "eth.dst = %s; next;",
-                              router_port->lrp_networks.ea_s);
-                ovn_lflow_add_with_hint(lflows, peer->od,
-                                        S_ROUTER_IN_ARP_RESOLVE, 100,
-                                        ds_cstr(match), ds_cstr(actions),
-                                        &op->nbsp->header_);
-            }
+            struct ovn_nat *nat_entry =
+                CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
+                             struct ovn_nat, ext_addr_list_node);
+            build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
         }
     }
-
 }
 
-/* Local router ingress table CHK_PKT_LEN: Check packet length.
- *
- * Any IPv4 packet with outport set to the distributed gateway
- * router port, check the packet length and store the result in the
- * 'REGBIT_PKT_LARGER' register bit.
- *
- * Local router ingress table LARGER_PKTS: Handle larger packets.
- *
- * Any IPv4 packet with outport set to the distributed gateway
- * router port and the 'REGBIT_PKT_LARGER' register bit is set,
- * generate ICMPv4 packet with type 3 (Destination Unreachable) and
- * code 4 (Fragmentation needed).
- * */
+/* NAT, Defrag and load balancing. */
 static void
-build_check_pkt_len_flows_for_lrouter(
-        struct ovn_datapath *od, struct hmap *lflows,
-        struct hmap *ports,
-        struct ds *match, struct ds *actions)
+build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od,
+                            struct hmap *lflows,
+                            struct shash *meter_groups,
+                            struct hmap *lbs,
+                            struct ds *match, struct ds *actions)
 {
     if (od->nbr) {
 
         /* Packets are allowed by default. */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 0, "1",
-                      "next;");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_LARGER_PKTS, 0, "1",
-                      "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_OUT_EGR_LOOP, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ECMP_STATEFUL, 0, "1", "next;");
 
-        if (od->l3dgw_port && od->l3redirect_port) {
-            int gw_mtu = 0;
-            if (od->l3dgw_port->nbrp) {
-                 gw_mtu = smap_get_int(&od->l3dgw_port->nbrp->options,
-                                       "gateway_mtu", 0);
-            }
-            /* Add the flows only if gateway_mtu is configured. */
-            if (gw_mtu <= 0) {
-                return;
-            }
+        /* Send the IPv6 NS packets to next table. When ovn-controller
+         * generates IPv6 NS (for the action - nd_ns{}), the injected
+         * packet would go through conntrack - which is not required. */
+        ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 120, "nd_ns", "next;");
 
-            ds_clear(match);
-            ds_put_format(match, "outport == %s", od->l3dgw_port->json_key);
+        /* NAT rules are only valid on Gateway routers and routers with
+         * l3dgw_port (router has a port with gateway chassis
+         * specified). */
+        if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
+            return;
+        }
 
-            ds_clear(actions);
-            ds_put_format(actions,
-                          REGBIT_PKT_LARGER" = check_pkt_larger(%d);"
-                          " next;", gw_mtu + VLAN_ETH_HEADER_LEN);
-            ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_CHK_PKT_LEN, 50,
-                                    ds_cstr(match), ds_cstr(actions),
-                                    &od->l3dgw_port->nbrp->header_);
+        struct sset nat_entries = SSET_INITIALIZER(&nat_entries);
 
-            for (size_t i = 0; i < od->nbr->n_ports; i++) {
-                struct ovn_port *rp = ovn_port_find(ports,
-                                                    od->nbr->ports[i]->name);
-                if (!rp || rp == od->l3dgw_port) {
-                    continue;
-                }
+        bool dnat_force_snat_ip =
+            !lport_addresses_is_empty(&od->dnat_force_snat_addrs);
+        bool lb_force_snat_ip =
+            !lport_addresses_is_empty(&od->lb_force_snat_addrs);
 
-                if (rp->lrp_networks.ipv4_addrs) {
-                    ds_clear(match);
-                    ds_put_format(match, "inport == %s && outport == %s"
-                                  " && ip4 && "REGBIT_PKT_LARGER,
-                                  rp->json_key, od->l3dgw_port->json_key);
+        for (int i = 0; i < od->nbr->n_nat; i++) {
+            const struct nbrec_nat *nat;
 
-                    ds_clear(actions);
-                    /* Set icmp4.frag_mtu to gw_mtu */
-                    ds_put_format(actions,
-                        "icmp4_error {"
-                        REGBIT_EGRESS_LOOPBACK" = 1; "
-                        "eth.dst = %s; "
-                        "ip4.dst = ip4.src; "
-                        "ip4.src = %s; "
-                        "ip.ttl = 255; "
-                        "icmp4.type = 3; /* Destination Unreachable. */ "
-                        "icmp4.code = 4; /* Frag Needed and DF was Set. */ "
-                        "icmp4.frag_mtu = %d; "
-                        "next(pipeline=ingress, table=%d); };",
-                        rp->lrp_networks.ea_s,
-                        rp->lrp_networks.ipv4_addrs[0].addr_s,
-                        gw_mtu,
-                        ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
-                    ovn_lflow_add_with_hint(lflows, od,
-                                            S_ROUTER_IN_LARGER_PKTS, 50,
-                                            ds_cstr(match), ds_cstr(actions),
-                                            &rp->nbrp->header_);
-                }
+            nat = od->nbr->nat[i];
 
-                if (rp->lrp_networks.ipv6_addrs) {
-                    ds_clear(match);
-                    ds_put_format(match, "inport == %s && outport == %s"
-                                  " && ip6 && "REGBIT_PKT_LARGER,
-                                  rp->json_key, od->l3dgw_port->json_key);
+            ovs_be32 ip, mask;
+            struct in6_addr ipv6, mask_v6, v6_exact = IN6ADDR_EXACT_INIT;
+            bool is_v6 = false;
+            bool stateless = lrouter_nat_is_stateless(nat);
+            struct nbrec_address_set *allowed_ext_ips =
+                                      nat->allowed_ext_ips;
+            struct nbrec_address_set *exempted_ext_ips =
+                                      nat->exempted_ext_ips;
 
-                    ds_clear(actions);
-                    /* Set icmp6.frag_mtu to gw_mtu */
-                    ds_put_format(actions,
-                        "icmp6_error {"
-                        REGBIT_EGRESS_LOOPBACK" = 1; "
-                        "eth.dst = %s; "
-                        "ip6.dst = ip6.src; "
-                        "ip6.src = %s; "
-                        "ip.ttl = 255; "
-                        "icmp6.type = 2; /* Packet Too Big. */ "
-                        "icmp6.code = 0; "
-                        "icmp6.frag_mtu = %d; "
-                        "next(pipeline=ingress, table=%d); };",
-                        rp->lrp_networks.ea_s,
-                        rp->lrp_networks.ipv6_addrs[0].addr_s,
-                        gw_mtu,
-                        ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
-                    ovn_lflow_add_with_hint(lflows, od,
-                                            S_ROUTER_IN_LARGER_PKTS, 50,
-                                            ds_cstr(match), ds_cstr(actions),
-                                            &rp->nbrp->header_);
-                }
+            if (allowed_ext_ips && exempted_ext_ips) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+                VLOG_WARN_RL(&rl, "NAT rule: "UUID_FMT" not applied, since "
+                             "both allowed and exempt external ips set",
+                             UUID_ARGS(&(nat->header_.uuid)));
+                continue;
             }
-        }
-    }
-}
 
-/* Logical router ingress table GW_REDIRECT: Gateway redirect.
- *
- * For traffic with outport equal to the l3dgw_port
- * on a distributed router, this table redirects a subset
- * of the traffic to the l3redirect_port which represents
- * the central instance of the l3dgw_port.
- */
-static void
-build_gateway_redirect_flows_for_lrouter(
-        struct ovn_datapath *od, struct hmap *lflows,
-        struct ds *match, struct ds *actions)
-{
-    if (od->nbr) {
-        if (od->l3dgw_port && od->l3redirect_port) {
-            const struct ovsdb_idl_row *stage_hint = NULL;
+            char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
+            if (error || mask != OVS_BE32_MAX) {
+                free(error);
+                error = ipv6_parse_masked(nat->external_ip, &ipv6, &mask_v6);
+                if (error || memcmp(&mask_v6, &v6_exact, sizeof(mask_v6))) {
+                    /* Invalid for both IPv4 and IPv6 */
+                    static struct vlog_rate_limit rl =
+                        VLOG_RATE_LIMIT_INIT(5, 1);
+                    VLOG_WARN_RL(&rl, "bad external ip %s for nat",
+                                 nat->external_ip);
+                    free(error);
+                    continue;
+                }
+                /* It was an invalid IPv4 address, but valid IPv6.
+                 * Treat the rest of the handling of this NAT rule
+                 * as IPv6. */
+                is_v6 = true;
+            }
 
-            if (od->l3dgw_port->nbrp) {
-                stage_hint = &od->l3dgw_port->nbrp->header_;
+            /* Check the validity of nat->logical_ip. 'logical_ip' can
+             * be a subnet when the type is "snat". */
+            int cidr_bits;
+            if (is_v6) {
+                error = ipv6_parse_masked(nat->logical_ip, &ipv6, &mask_v6);
+                cidr_bits = ipv6_count_cidr_bits(&mask_v6);
+            } else {
+                error = ip_parse_masked(nat->logical_ip, &ip, &mask);
+                cidr_bits = ip_count_cidr_bits(mask);
+            }
+            if (!strcmp(nat->type, "snat")) {
+                if (error) {
+                    /* Invalid for both IPv4 and IPv6 */
+                    static struct vlog_rate_limit rl =
+                        VLOG_RATE_LIMIT_INIT(5, 1);
+                    VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
+                                 "in router "UUID_FMT"",
+                                 nat->logical_ip, UUID_ARGS(&od->key));
+                    free(error);
+                    continue;
+                }
+            } else {
+                if (error || (!is_v6 && mask != OVS_BE32_MAX)
+                    || (is_v6 && memcmp(&mask_v6, &v6_exact,
+                                        sizeof mask_v6))) {
+                    /* Invalid for both IPv4 and IPv6 */
+                    static struct vlog_rate_limit rl =
+                        VLOG_RATE_LIMIT_INIT(5, 1);
+                    VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
+                        ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
+                    free(error);
+                    continue;
+                }
             }
 
-            /* For traffic with outport == l3dgw_port, if the
-             * packet did not match any higher priority redirect
-             * rule, then the traffic is redirected to the central
-             * instance of the l3dgw_port. */
-            ds_clear(match);
-            ds_put_format(match, "outport == %s",
-                          od->l3dgw_port->json_key);
-            ds_clear(actions);
-            ds_put_format(actions, "outport = %s; next;",
-                          od->l3redirect_port->json_key);
-            ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT, 50,
-                                    ds_cstr(match), ds_cstr(actions),
-                                    stage_hint);
-        }
+            /* For distributed router NAT, determine whether this NAT rule
+             * satisfies the conditions for distributed NAT processing. */
+            bool distributed = false;
+            struct eth_addr mac;
+            if (od->l3dgw_port && !strcmp(nat->type, "dnat_and_snat") &&
+                nat->logical_port && nat->external_mac) {
+                if (eth_addr_from_string(nat->external_mac, &mac)) {
+                    distributed = true;
+                } else {
+                    static struct vlog_rate_limit rl =
+                        VLOG_RATE_LIMIT_INIT(5, 1);
+                    VLOG_WARN_RL(&rl, "bad mac %s for dnat in router "
+                        ""UUID_FMT"", nat->external_mac, UUID_ARGS(&od->key));
+                    continue;
+                }
+            }
 
-        /* Packets are allowed by default. */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_GW_REDIRECT, 0, "1", "next;");
-    }
-}
+            /* Ingress UNSNAT table: It is for already established connections'
+             * reverse traffic. i.e., SNAT has already been done in egress
+             * pipeline and now the packet has entered the ingress pipeline as
+             * part of a reply. We undo the SNAT here.
+             *
+             * Undoing SNAT has to happen before DNAT processing.  This is
+             * because when the packet was DNATed in ingress pipeline, it did
+             * not know about the possibility of eventual additional SNAT in
+             * egress pipeline. */
+            if (!strcmp(nat->type, "snat")
+                || !strcmp(nat->type, "dnat_and_snat")) {
+                if (!od->l3dgw_port) {
+                    /* Gateway router. */
+                    ds_clear(match);
+                    ds_clear(actions);
+                    ds_put_format(match, "ip && ip%s.dst == %s",
+                                  is_v6 ? "6" : "4",
+                                  nat->external_ip);
+                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+                       ds_put_format(actions, "ip%s.dst=%s; next;",
+                                     is_v6 ? "6" : "4", nat->logical_ip);
+                    } else {
+                       ds_put_cstr(actions, "ct_snat;");
+                    }
 
-/* Local router ingress table ARP_REQUEST: ARP request.
- *
- * In the common case where the Ethernet destination has been resolved,
- * this table outputs the packet (priority 0).  Otherwise, it composes
- * and sends an ARP/IPv6 NA request (priority 100). */
-static void
-build_arp_request_flows_for_lrouter(
-        struct ovn_datapath *od, struct hmap *lflows,
-        struct ds *match, struct ds *actions)
-{
-    if (od->nbr) {
-        for (int i = 0; i < od->nbr->n_static_routes; i++) {
-            const struct nbrec_logical_router_static_route *route;
+                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
+                                            90, ds_cstr(match),
+                                            ds_cstr(actions),
+                                            &nat->header_);
+                } else {
+                    /* Distributed router. */
 
-            route = od->nbr->static_routes[i];
-            struct in6_addr gw_ip6;
-            unsigned int plen;
-            char *error = ipv6_parse_cidr(route->nexthop, &gw_ip6, &plen);
-            if (error || plen != 128) {
-                free(error);
-                continue;
-            }
+                    /* Traffic received on l3dgw_port is subject to NAT. */
+                    ds_clear(match);
+                    ds_clear(actions);
+                    ds_put_format(match, "ip && ip%s.dst == %s"
+                                          " && inport == %s",
+                                  is_v6 ? "6" : "4",
+                                  nat->external_ip,
+                                  od->l3dgw_port->json_key);
+                    if (!distributed && od->l3redirect_port) {
+                        /* Flows for NAT rules that are centralized are only
+                         * programmed on the gateway chassis. */
+                        ds_put_format(match, " && is_chassis_resident(%s)",
+                                      od->l3redirect_port->json_key);
+                    }
 
-            ds_clear(match);
-            ds_put_format(match, "eth.dst == 00:00:00:00:00:00 && "
-                          "ip6 && " REG_NEXT_HOP_IPV6 " == %s",
-                          route->nexthop);
-            struct in6_addr sn_addr;
-            struct eth_addr eth_dst;
-            in6_addr_solicited_node(&sn_addr, &gw_ip6);
-            ipv6_multicast_to_ethernet(&eth_dst, &sn_addr);
+                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+                        ds_put_format(actions, "ip%s.dst=%s; next;",
+                                      is_v6 ? "6" : "4", nat->logical_ip);
+                    } else {
+                        ds_put_cstr(actions, "ct_snat;");
+                    }
 
-            char sn_addr_s[INET6_ADDRSTRLEN + 1];
-            ipv6_string_mapped(sn_addr_s, &sn_addr);
+                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_UNSNAT,
+                                            100,
+                                            ds_cstr(match), ds_cstr(actions),
+                                            &nat->header_);
+                }
+            }
 
-            ds_clear(actions);
-            ds_put_format(actions,
-                          "nd_ns { "
-                          "eth.dst = "ETH_ADDR_FMT"; "
-                          "ip6.dst = %s; "
-                          "nd.target = %s; "
-                          "output; "
-                          "};", ETH_ADDR_ARGS(eth_dst), sn_addr_s,
-                          route->nexthop);
-
-            ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ARP_REQUEST, 200,
-                                    ds_cstr(match), ds_cstr(actions),
-                                    &route->header_);
-        }
-
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
-                      "eth.dst == 00:00:00:00:00:00 && ip4",
-                      "arp { "
-                      "eth.dst = ff:ff:ff:ff:ff:ff; "
-                      "arp.spa = " REG_SRC_IPV4 "; "
-                      "arp.tpa = " REG_NEXT_HOP_IPV4 "; "
-                      "arp.op = 1; " /* ARP request */
-                      "output; "
-                      "};");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
-                      "eth.dst == 00:00:00:00:00:00 && ip6",
-                      "nd_ns { "
-                      "nd.target = " REG_NEXT_HOP_IPV6 "; "
-                      "output; "
-                      "};");
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
-    }
-}
-
-/* Logical router egress table DELIVERY: Delivery (priority 100-110).
- *
- * Priority 100 rules deliver packets to enabled logical ports.
- * Priority 110 rules match multicast packets and update the source
- * mac before delivering to enabled logical ports. IP multicast traffic
- * bypasses S_ROUTER_IN_IP_ROUTING route lookups.
- */
-static void
-build_egress_delivery_flows_for_lrouter_port(
-        struct ovn_port *op, struct hmap *lflows,
-        struct ds *match, struct ds *actions)
-{
-    if (op->nbrp) {
-        if (!lrport_is_enabled(op->nbrp)) {
-            /* Drop packets to disabled logical ports (since logical flow
-             * tables are default-drop). */
-            return;
-        }
-
-        if (op->derived) {
-            /* No egress packets should be processed in the context of
-             * a chassisredirect port.  The chassisredirect port should
-             * be replaced by the l3dgw port in the local output
-             * pipeline stage before egress processing. */
-            return;
-        }
-
-        /* If multicast relay is enabled then also adjust source mac for IP
-         * multicast traffic.
-         */
-        if (op->od->mcast_info.rtr.relay) {
-            ds_clear(match);
-            ds_clear(actions);
-            ds_put_format(match, "(ip4.mcast || ip6.mcast) && outport == %s",
-                          op->json_key);
-            ds_put_format(actions, "eth.src = %s; output;",
-                          op->lrp_networks.ea_s);
-            ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 110,
-                          ds_cstr(match), ds_cstr(actions));
-        }
-
-        ds_clear(match);
-        ds_put_format(match, "outport == %s", op->json_key);
-        ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
-                      ds_cstr(match), "output;");
-    }
-
-}
-
-static void
-build_misc_local_traffic_drop_flows_for_lrouter(
-        struct ovn_datapath *od, struct hmap *lflows)
-{
-    if (od->nbr) {
-        /* L3 admission control: drop multicast and broadcast source, localhost
-         * source or destination, and zero network source or destination
-         * (priority 100). */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
-                      "ip4.src_mcast ||"
-                      "ip4.src == 255.255.255.255 || "
-                      "ip4.src == 127.0.0.0/8 || "
-                      "ip4.dst == 127.0.0.0/8 || "
-                      "ip4.src == 0.0.0.0/8 || "
-                      "ip4.dst == 0.0.0.0/8",
-                      "drop;");
-
-        /* Drop ARP packets (priority 85). ARP request packets for router's own
-         * IPs are handled with priority-90 flows.
-         * Drop IPv6 ND packets (priority 85). ND NA packets for router's own
-         * IPs are handled with priority-90 flows.
-         */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 85,
-                      "arp || nd", "drop;");
-
-        /* Allow IPv6 multicast traffic that's supposed to reach the
-         * router pipeline (e.g., router solicitations).
-         */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 84, "nd_rs || nd_ra",
-                      "next;");
-
-        /* Drop other reserved multicast. */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 83,
-                      "ip6.mcast_rsvd", "drop;");
-
-        /* Allow other multicast if relay enabled (priority 82). */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 82,
-                      "ip4.mcast || ip6.mcast",
-                      od->mcast_info.rtr.relay ? "next;" : "drop;");
-
-        /* Drop Ethernet local broadcast.  By definition this traffic should
-         * not be forwarded.*/
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
-                      "eth.bcast", "drop;");
+            /* Ingress DNAT table: Packets enter the pipeline with destination
+             * IP address that needs to be DNATted from a external IP address
+             * to a logical IP address. */
+            if (!strcmp(nat->type, "dnat")
+                || !strcmp(nat->type, "dnat_and_snat")) {
+                if (!od->l3dgw_port) {
+                    /* Gateway router. */
+                    /* Packet when it goes from the initiator to destination.
+                     * We need to set flags.loopback because the router can
+                     * send the packet back through the same interface. */
+                    ds_clear(match);
+                    ds_put_format(match, "ip && ip%s.dst == %s",
+                                  is_v6 ? "6" : "4",
+                                  nat->external_ip);
+                    ds_clear(actions);
+                    if (allowed_ext_ips || exempted_ext_ips) {
+                        lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+                                                     is_v6, true, mask);
+                    }
 
-        /* TTL discard */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
-                      "ip4 && ip.ttl == {0, 1}", "drop;");
+                    if (dnat_force_snat_ip) {
+                        /* Indicate to the future tables that a DNAT has taken
+                         * place and a force SNAT needs to be done in the
+                         * Egress SNAT table. */
+                        ds_put_format(actions,
+                                      "flags.force_snat_for_dnat = 1; ");
+                    }
 
-        /* Pass other traffic not already handled to the next table for
-         * routing. */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
-    }
-}
+                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+                        ds_put_format(actions, "flags.loopback = 1; "
+                                      "ip%s.dst=%s; next;",
+                                      is_v6 ? "6" : "4", nat->logical_ip);
+                    } else {
+                        ds_put_format(actions, "flags.loopback = 1; "
+                                      "ct_dnat(%s", nat->logical_ip);
 
-static void
-build_dhcpv6_reply_flows_for_lrouter_port(
-        struct ovn_port *op, struct hmap *lflows,
-        struct ds *match)
-{
-    if (op->nbrp && (!op->derived)) {
-        for (size_t i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-            ds_clear(match);
-            ds_put_format(match, "ip6.dst == %s && udp.src == 547 &&"
-                          " udp.dst == 546",
-                          op->lrp_networks.ipv6_addrs[i].addr_s);
-            ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
-                          ds_cstr(match),
-                          "reg0 = 0; handle_dhcpv6_reply;");
-        }
-    }
+                        if (nat->external_port_range[0]) {
+                            ds_put_format(actions, ",%s",
+                                          nat->external_port_range);
+                        }
+                        ds_put_format(actions, ");");
+                    }
 
-}
+                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
+                                            ds_cstr(match), ds_cstr(actions),
+                                            &nat->header_);
+                } else {
+                    /* Distributed router. */
 
-static void
-build_ipv6_input_flows_for_lrouter_port(
-        struct ovn_port *op, struct hmap *lflows,
-        struct ds *match, struct ds *actions)
-{
-    if (op->nbrp && (!op->derived)) {
-        /* No ingress packets are accepted on a chassisredirect
-         * port, so no need to program flows for that port. */
-        if (op->lrp_networks.n_ipv6_addrs) {
-            /* ICMPv6 echo reply.  These flows reply to echo requests
-             * received for the router's IP address. */
-            ds_clear(match);
-            ds_put_cstr(match, "ip6.dst == ");
-            op_put_v6_networks(match, op);
-            ds_put_cstr(match, " && icmp6.type == 128 && icmp6.code == 0");
+                    /* Traffic received on l3dgw_port is subject to NAT. */
+                    ds_clear(match);
+                    ds_put_format(match, "ip && ip%s.dst == %s"
+                                          " && inport == %s",
+                                  is_v6 ? "6" : "4",
+                                  nat->external_ip,
+                                  od->l3dgw_port->json_key);
+                    if (!distributed && od->l3redirect_port) {
+                        /* Flows for NAT rules that are centralized are only
+                         * programmed on the gateway chassis. */
+                        ds_put_format(match, " && is_chassis_resident(%s)",
+                                      od->l3redirect_port->json_key);
+                    }
+                    ds_clear(actions);
+                    if (allowed_ext_ips || exempted_ext_ips) {
+                        lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+                                                     is_v6, true, mask);
+                    }
 
-            const char *lrp_actions =
-                        "ip6.dst <-> ip6.src; "
-                        "ip.ttl = 255; "
-                        "icmp6.type = 129; "
-                        "flags.loopback = 1; "
-                        "next; ";
-            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
-                                    ds_cstr(match), lrp_actions,
-                                    &op->nbrp->header_);
-        }
+                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+                        ds_put_format(actions, "ip%s.dst=%s; next;",
+                                      is_v6 ? "6" : "4", nat->logical_ip);
+                    } else {
+                        ds_put_format(actions, "ct_dnat(%s", nat->logical_ip);
+                        if (nat->external_port_range[0]) {
+                            ds_put_format(actions, ",%s",
+                                          nat->external_port_range);
+                        }
+                        ds_put_format(actions, ");");
+                    }
 
-        /* ND reply.  These flows reply to ND solicitations for the
-         * router's own IP address. */
-        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-            ds_clear(match);
-            if (op->od->l3dgw_port && op == op->od->l3dgw_port
-                && op->od->l3redirect_port) {
-                /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
-                 * should only be sent from the gateway chassi, so that
-                 * upstream MAC learning points to the gateway chassis.
-                 * Also need to avoid generation of multiple ND replies
-                 * from different chassis. */
-                ds_put_format(match, "is_chassis_resident(%s)",
-                              op->od->l3redirect_port->json_key);
+                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DNAT, 100,
+                                            ds_cstr(match), ds_cstr(actions),
+                                            &nat->header_);
+                }
             }
 
-            build_lrouter_nd_flow(op->od, op, "nd_na_router",
-                                  op->lrp_networks.ipv6_addrs[i].addr_s,
-                                  op->lrp_networks.ipv6_addrs[i].sn_addr_s,
-                                  REG_INPORT_ETH_ADDR, match, false, 90,
-                                  &op->nbrp->header_, lflows);
-        }
-
-        /* UDP/TCP port unreachable */
-        if (!smap_get(&op->od->nbr->options, "chassis")
-            && !op->od->l3dgw_port) {
-            for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-                ds_clear(match);
-                ds_put_format(match,
-                              "ip6 && ip6.dst == %s && !ip.later_frag && tcp",
-                              op->lrp_networks.ipv6_addrs[i].addr_s);
-                const char *action = "tcp_reset {"
-                                     "eth.dst <-> eth.src; "
-                                     "ip6.dst <-> ip6.src; "
-                                     "next; };";
-                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-                                        80, ds_cstr(match), action,
-                                        &op->nbrp->header_);
-
-                ds_clear(match);
-                ds_put_format(match,
-                              "ip6 && ip6.dst == %s && !ip.later_frag && udp",
-                              op->lrp_networks.ipv6_addrs[i].addr_s);
-                action = "icmp6 {"
-                         "eth.dst <-> eth.src; "
-                         "ip6.dst <-> ip6.src; "
-                         "ip.ttl = 255; "
-                         "icmp6.type = 1; "
-                         "icmp6.code = 4; "
-                         "next; };";
-                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-                                        80, ds_cstr(match), action,
-                                        &op->nbrp->header_);
-
-                ds_clear(match);
-                ds_put_format(match,
-                              "ip6 && ip6.dst == %s && !ip.later_frag",
-                              op->lrp_networks.ipv6_addrs[i].addr_s);
-                action = "icmp6 {"
-                         "eth.dst <-> eth.src; "
-                         "ip6.dst <-> ip6.src; "
-                         "ip.ttl = 255; "
-                         "icmp6.type = 1; "
-                         "icmp6.code = 3; "
-                         "next; };";
-                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-                                        70, ds_cstr(match), action,
-                                        &op->nbrp->header_);
-            }
-        }
+            /* ARP resolve for NAT IPs. */
+            if (od->l3dgw_port) {
+                if (!strcmp(nat->type, "snat")) {
+                    ds_clear(match);
+                    ds_put_format(
+                        match, "inport == %s && %s == %s",
+                        od->l3dgw_port->json_key,
+                        is_v6 ? "ip6.src" : "ip4.src", nat->external_ip);
+                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_INPUT,
+                                            120, ds_cstr(match), "next;",
+                                            &nat->header_);
+                }
 
-        /* ICMPv6 time exceeded */
-        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-            /* skip link-local address */
-            if (in6_is_lla(&op->lrp_networks.ipv6_addrs[i].network)) {
-                continue;
+                if (!sset_contains(&nat_entries, nat->external_ip)) {
+                    ds_clear(match);
+                    ds_put_format(
+                        match, "outport == %s && %s == %s",
+                        od->l3dgw_port->json_key,
+                        is_v6 ? REG_NEXT_HOP_IPV6 : REG_NEXT_HOP_IPV4,
+                        nat->external_ip);
+                    ds_clear(actions);
+                    ds_put_format(
+                        actions, "eth.dst = %s; next;",
+                        distributed ? nat->external_mac :
+                        od->l3dgw_port->lrp_networks.ea_s);
+                    ovn_lflow_add_with_hint(lflows, od,
+                                            S_ROUTER_IN_ARP_RESOLVE,
+                                            100, ds_cstr(match),
+                                            ds_cstr(actions),
+                                            &nat->header_);
+                    sset_add(&nat_entries, nat->external_ip);
+                }
+            } else {
+                /* Add the NAT external_ip to the nat_entries even for
+                 * gateway routers. This is required for adding load balancer
+                 * flows.*/
+                sset_add(&nat_entries, nat->external_ip);
             }
 
-            ds_clear(match);
-            ds_clear(actions);
-
-            ds_put_format(match,
-                          "inport == %s && ip6 && "
-                          "ip6.src == %s/%d && "
-                          "ip.ttl == {0, 1} && !ip.later_frag",
-                          op->json_key,
-                          op->lrp_networks.ipv6_addrs[i].network_s,
-                          op->lrp_networks.ipv6_addrs[i].plen);
-            ds_put_format(actions,
-                          "icmp6 {"
-                          "eth.dst <-> eth.src; "
-                          "ip6.dst = ip6.src; "
-                          "ip6.src = %s; "
-                          "ip.ttl = 255; "
-                          "icmp6.type = 3; /* Time exceeded */ "
-                          "icmp6.code = 0; /* TTL exceeded in transit */ "
-                          "next; };",
-                          op->lrp_networks.ipv6_addrs[i].addr_s);
-            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
-                                    ds_cstr(match), ds_cstr(actions),
-                                    &op->nbrp->header_);
-        }
-    }
+            /* Egress UNDNAT table: It is for already established connections'
+             * reverse traffic. i.e., DNAT has already been done in ingress
+             * pipeline and now the packet has entered the egress pipeline as
+             * part of a reply. We undo the DNAT here.
+             *
+             * Note that this only applies for NAT on a distributed router.
+             * Undo DNAT on a gateway router is done in the ingress DNAT
+             * pipeline stage. */
+            if (od->l3dgw_port && (!strcmp(nat->type, "dnat")
+                || !strcmp(nat->type, "dnat_and_snat"))) {
+                ds_clear(match);
+                ds_put_format(match, "ip && ip%s.src == %s"
+                                      " && outport == %s",
+                              is_v6 ? "6" : "4",
+                              nat->logical_ip,
+                              od->l3dgw_port->json_key);
+                if (!distributed && od->l3redirect_port) {
+                    /* Flows for NAT rules that are centralized are only
+                     * programmed on the gateway chassis. */
+                    ds_put_format(match, " && is_chassis_resident(%s)",
+                                  od->l3redirect_port->json_key);
+                }
+                ds_clear(actions);
+                if (distributed) {
+                    ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ",
+                                  ETH_ADDR_ARGS(mac));
+                }
 
-}
+                if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+                    ds_put_format(actions, "ip%s.src=%s; next;",
+                                  is_v6 ? "6" : "4", nat->external_ip);
+                } else {
+                    ds_put_format(actions, "ct_dnat;");
+                }
 
-static void
-build_lrouter_arp_nd_for_datapath(struct ovn_datapath *od,
-                                  struct hmap *lflows)
-{
-    if (od->nbr) {
+                ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_UNDNAT, 100,
+                                        ds_cstr(match), ds_cstr(actions),
+                                        &nat->header_);
+            }
 
-        /* Priority-90-92 flows handle ARP requests and ND packets. Most are
-         * per logical port but DNAT addresses can be handled per datapath
-         * for non gateway router ports.
-         *
-         * Priority 91 and 92 flows are added for each gateway router
-         * port to handle the special cases. In case we get the packet
-         * on a regular port, just reply with the port's ETH address.
-         */
-        for (int i = 0; i < od->nbr->n_nat; i++) {
-            struct ovn_nat *nat_entry = &od->nat_entries[i];
+            /* Egress SNAT table: Packets enter the egress pipeline with
+             * source ip address that needs to be SNATted to a external ip
+             * address. */
+            if (!strcmp(nat->type, "snat")
+                || !strcmp(nat->type, "dnat_and_snat")) {
+                if (!od->l3dgw_port) {
+                    /* Gateway router. */
+                    ds_clear(match);
+                    ds_put_format(match, "ip && ip%s.src == %s",
+                                  is_v6 ? "6" : "4",
+                                  nat->logical_ip);
+                    ds_clear(actions);
 
-            /* Skip entries we failed to parse. */
-            if (!nat_entry_is_valid(nat_entry)) {
-                continue;
-            }
+                    if (allowed_ext_ips || exempted_ext_ips) {
+                        lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+                                                     is_v6, false, mask);
+                    }
 
-            /* Skip SNAT entries for now, we handle unique SNAT IPs separately
-             * below.
-             */
-            if (!strcmp(nat_entry->nb->type, "snat")) {
-                continue;
-            }
-            build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
-        }
+                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+                        ds_put_format(actions, "ip%s.src=%s; next;",
+                                      is_v6 ? "6" : "4", nat->external_ip);
+                    } else {
+                        ds_put_format(actions, "ct_snat(%s",
+                                      nat->external_ip);
 
-        /* Now handle SNAT entries too, one per unique SNAT IP. */
-        struct shash_node *snat_snode;
-        SHASH_FOR_EACH (snat_snode, &od->snat_ips) {
-            struct ovn_snat_ip *snat_ip = snat_snode->data;
+                        if (nat->external_port_range[0]) {
+                            ds_put_format(actions, ",%s",
+                                          nat->external_port_range);
+                        }
+                        ds_put_format(actions, ");");
+                    }
 
-            if (ovs_list_is_empty(&snat_ip->snat_entries)) {
-                continue;
-            }
+                    /* The priority here is calculated such that the
+                     * nat->logical_ip with the longest mask gets a higher
+                     * priority. */
+                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
+                                            cidr_bits + 1,
+                                            ds_cstr(match), ds_cstr(actions),
+                                            &nat->header_);
+                } else {
+                    uint16_t priority = cidr_bits + 1;
 
-            struct ovn_nat *nat_entry =
-                CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
-                             struct ovn_nat, ext_addr_list_node);
-            build_lrouter_nat_arp_nd_flow(od, nat_entry, lflows);
-        }
-    }
-}
+                    /* Distributed router. */
+                    ds_clear(match);
+                    ds_put_format(match, "ip && ip%s.src == %s"
+                                          " && outport == %s",
+                                  is_v6 ? "6" : "4",
+                                  nat->logical_ip,
+                                  od->l3dgw_port->json_key);
+                    if (!distributed && od->l3redirect_port) {
+                        /* Flows for NAT rules that are centralized are only
+                         * programmed on the gateway chassis. */
+                        priority += 128;
+                        ds_put_format(match, " && is_chassis_resident(%s)",
+                                      od->l3redirect_port->json_key);
+                    }
+                    ds_clear(actions);
 
-/* Logical router ingress table 3: IP Input for IPv4. */
-static void
-build_lrouter_ipv4_ip_input(struct ovn_port *op,
-                            struct hmap *lflows,
-                            struct ds *match, struct ds *actions)
-{
-    /* No ingress packets are accepted on a chassisredirect
-     * port, so no need to program flows for that port. */
-    if (op->nbrp && (!op->derived)) {
-        if (op->lrp_networks.n_ipv4_addrs) {
-            /* L3 admission control: drop packets that originate from an
-             * IPv4 address owned by the router or a broadcast address
-             * known to the router (priority 100). */
-            ds_clear(match);
-            ds_put_cstr(match, "ip4.src == ");
-            op_put_v4_networks(match, op, true);
-            ds_put_cstr(match, " && "REGBIT_EGRESS_LOOPBACK" == 0");
-            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
-                                    ds_cstr(match), "drop;",
-                                    &op->nbrp->header_);
+                    if (allowed_ext_ips || exempted_ext_ips) {
+                        lrouter_nat_add_ext_ip_match(od, lflows, match, nat,
+                                                     is_v6, false, mask);
+                    }
 
-            /* ICMP echo reply.  These flows reply to ICMP echo requests
-             * received for the router's IP address. Since packets only
-             * get here as part of the logical router datapath, the inport
-             * (i.e. the incoming locally attached net) does not matter.
-             * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
-            ds_clear(match);
-            ds_put_cstr(match, "ip4.dst == ");
-            op_put_v4_networks(match, op, false);
-            ds_put_cstr(match, " && icmp4.type == 8 && icmp4.code == 0");
+                    if (distributed) {
+                        ds_put_format(actions, "eth.src = "ETH_ADDR_FMT"; ",
+                                      ETH_ADDR_ARGS(mac));
+                    }
 
-            const char * icmp_actions = "ip4.dst <-> ip4.src; "
-                          "ip.ttl = 255; "
-                          "icmp4.type = 0; "
-                          "flags.loopback = 1; "
-                          "next; ";
-            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
-                                    ds_cstr(match), icmp_actions,
-                                    &op->nbrp->header_);
-        }
+                    if (!strcmp(nat->type, "dnat_and_snat") && stateless) {
+                        ds_put_format(actions, "ip%s.src=%s; next;",
+                                      is_v6 ? "6" : "4", nat->external_ip);
+                    } else {
+                        ds_put_format(actions, "ct_snat(%s",
+                                      nat->external_ip);
+                        if (nat->external_port_range[0]) {
+                            ds_put_format(actions, ",%s",
+                                          nat->external_port_range);
+                        }
+                        ds_put_format(actions, ");");
+                    }
 
-        /* ICMP time exceeded */
-        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-            ds_clear(match);
-            ds_clear(actions);
+                    /* The priority here is calculated such that the
+                     * nat->logical_ip with the longest mask gets a higher
+                     * priority. */
+                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_SNAT,
+                                            priority, ds_cstr(match),
+                                            ds_cstr(actions),
+                                            &nat->header_);
+                }
+            }
 
-            ds_put_format(match,
-                          "inport == %s && ip4 && "
-                          "ip.ttl == {0, 1} && !ip.later_frag", op->json_key);
-            ds_put_format(actions,
-                          "icmp4 {"
-                          "eth.dst <-> eth.src; "
-                          "icmp4.type = 11; /* Time exceeded */ "
-                          "icmp4.code = 0; /* TTL exceeded in transit */ "
-                          "ip4.dst = ip4.src; "
-                          "ip4.src = %s; "
-                          "ip.ttl = 255; "
-                          "next; };",
-                          op->lrp_networks.ipv4_addrs[i].addr_s);
-            ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT, 40,
-                                    ds_cstr(match), ds_cstr(actions),
-                                    &op->nbrp->header_);
-        }
+            /* Logical router ingress table 0:
+             * For NAT on a distributed router, add rules allowing
+             * ingress traffic with eth.dst matching nat->external_mac
+             * on the l3dgw_port instance where nat->logical_port is
+             * resident. */
+            if (distributed) {
+                /* Store the ethernet address of the port receiving the packet.
+                 * This will save us from having to match on inport further
+                 * down in the pipeline.
+                 */
+                ds_clear(actions);
+                ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;",
+                              od->l3dgw_port->lrp_networks.ea_s);
 
-        /* ARP reply.  These flows reply to ARP requests for the router's own
-         * IP address. */
-        for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-            ds_clear(match);
-            ds_put_format(match, "arp.spa == %s/%u",
-                          op->lrp_networks.ipv4_addrs[i].network_s,
-                          op->lrp_networks.ipv4_addrs[i].plen);
+                ds_clear(match);
+                ds_put_format(match,
+                              "eth.dst == "ETH_ADDR_FMT" && inport == %s"
+                              " && is_chassis_resident(\"%s\")",
+                              ETH_ADDR_ARGS(mac),
+                              od->l3dgw_port->json_key,
+                              nat->logical_port);
+                ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50,
+                                        ds_cstr(match), ds_cstr(actions),
+                                        &nat->header_);
+            }
 
-            if (op->od->l3dgw_port && op->od->l3redirect_port && op->peer
-                && op->peer->od->n_localnet_ports) {
-                bool add_chassis_resident_check = false;
-                if (op == op->od->l3dgw_port) {
-                    /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
-                     * should only be sent from the gateway chassis, so that
-                     * upstream MAC learning points to the gateway chassis.
-                     * Also need to avoid generation of multiple ARP responses
-                     * from different chassis. */
-                    add_chassis_resident_check = true;
+            /* Ingress Gateway Redirect Table: For NAT on a distributed
+             * router, add flows that are specific to a NAT rule.  These
+             * flows indicate the presence of an applicable NAT rule that
+             * can be applied in a distributed manner.
+             * In particulr REG_SRC_IPV4/REG_SRC_IPV6 and eth.src are set to
+             * NAT external IP and NAT external mac so the ARP request
+             * generated in the following stage is sent out with proper IP/MAC
+             * src addresses.
+             */
+            if (distributed) {
+                ds_clear(match);
+                ds_clear(actions);
+                ds_put_format(match,
+                              "ip%s.src == %s && outport == %s && "
+                              "is_chassis_resident(\"%s\")",
+                              is_v6 ? "6" : "4", nat->logical_ip,
+                              od->l3dgw_port->json_key, nat->logical_port);
+                ds_put_format(actions, "eth.src = %s; %s = %s; next;",
+                              nat->external_mac,
+                              is_v6 ? REG_SRC_IPV6 : REG_SRC_IPV4,
+                              nat->external_ip);
+                ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_GW_REDIRECT,
+                                        100, ds_cstr(match),
+                                        ds_cstr(actions), &nat->header_);
+            }
+
+            /* Egress Loopback table: For NAT on a distributed router.
+             * If packets in the egress pipeline on the distributed
+             * gateway port have ip.dst matching a NAT external IP, then
+             * loop a clone of the packet back to the beginning of the
+             * ingress pipeline with inport = outport. */
+            if (od->l3dgw_port) {
+                /* Distributed router. */
+                ds_clear(match);
+                ds_put_format(match, "ip%s.dst == %s && outport == %s",
+                              is_v6 ? "6" : "4",
+                              nat->external_ip,
+                              od->l3dgw_port->json_key);
+                if (!distributed) {
+                    ds_put_format(match, " && is_chassis_resident(%s)",
+                                  od->l3redirect_port->json_key);
                 } else {
-                    /* Check if the option 'reside-on-redirect-chassis'
-                     * is set to true on the router port. If set to true
-                     * and if peer's logical switch has a localnet port, it
-                     * means the router pipeline for the packets from
-                     * peer's logical switch is be run on the chassis
-                     * hosting the gateway port and it should reply to the
-                     * ARP requests for the router port IPs.
-                     */
-                    add_chassis_resident_check = smap_get_bool(
-                        &op->nbrp->options,
-                        "reside-on-redirect-chassis", false);
+                    ds_put_format(match, " && is_chassis_resident(\"%s\")",
+                                  nat->logical_port);
                 }
 
-                if (add_chassis_resident_check) {
-                    ds_put_format(match, " && is_chassis_resident(%s)",
-                                  op->od->l3redirect_port->json_key);
+                ds_clear(actions);
+                ds_put_format(actions,
+                              "clone { ct_clear; "
+                              "inport = outport; outport = \"\"; "
+                              "flags = 0; flags.loopback = 1; ");
+                for (int j = 0; j < MFF_N_LOG_REGS; j++) {
+                    ds_put_format(actions, "reg%d = 0; ", j);
                 }
+                ds_put_format(actions, REGBIT_EGRESS_LOOPBACK" = 1; "
+                              "next(pipeline=ingress, table=%d); };",
+                              ovn_stage_get_table(S_ROUTER_IN_ADMISSION));
+                ovn_lflow_add_with_hint(lflows, od, S_ROUTER_OUT_EGR_LOOP, 100,
+                                        ds_cstr(match), ds_cstr(actions),
+                                        &nat->header_);
             }
-
-            build_lrouter_arp_flow(op->od, op,
-                                   op->lrp_networks.ipv4_addrs[i].addr_s,
-                                   REG_INPORT_ETH_ADDR, match, false, 90,
-                                   &op->nbrp->header_, lflows);
         }
 
-        /* A set to hold all load-balancer vips that need ARP responses. */
-        struct sset all_ips_v4 = SSET_INITIALIZER(&all_ips_v4);
-        struct sset all_ips_v6 = SSET_INITIALIZER(&all_ips_v6);
-        get_router_load_balancer_ips(op->od, &all_ips_v4, &all_ips_v6);
-
-        const char *ip_address;
-        SSET_FOR_EACH (ip_address, &all_ips_v4) {
-            ds_clear(match);
-            if (op == op->od->l3dgw_port) {
-                ds_put_format(match, "is_chassis_resident(%s)",
-                              op->od->l3redirect_port->json_key);
+        /* Handle force SNAT options set in the gateway router. */
+        if (!od->l3dgw_port) {
+            if (dnat_force_snat_ip) {
+                if (od->dnat_force_snat_addrs.n_ipv4_addrs) {
+                    build_lrouter_force_snat_flows(lflows, od, "4",
+                        od->dnat_force_snat_addrs.ipv4_addrs[0].addr_s,
+                        "dnat");
+                }
+                if (od->dnat_force_snat_addrs.n_ipv6_addrs) {
+                    build_lrouter_force_snat_flows(lflows, od, "6",
+                        od->dnat_force_snat_addrs.ipv6_addrs[0].addr_s,
+                        "dnat");
+                }
             }
-
-            build_lrouter_arp_flow(op->od, op,
-                                   ip_address, REG_INPORT_ETH_ADDR,
-                                   match, false, 90, NULL, lflows);
-        }
-
-        SSET_FOR_EACH (ip_address, &all_ips_v6) {
-            ds_clear(match);
-            if (op == op->od->l3dgw_port) {
-                ds_put_format(match, "is_chassis_resident(%s)",
-                              op->od->l3redirect_port->json_key);
+            if (lb_force_snat_ip) {
+                if (od->lb_force_snat_addrs.n_ipv4_addrs) {
+                    build_lrouter_force_snat_flows(lflows, od, "4",
+                        od->lb_force_snat_addrs.ipv4_addrs[0].addr_s, "lb");
+                }
+                if (od->lb_force_snat_addrs.n_ipv6_addrs) {
+                    build_lrouter_force_snat_flows(lflows, od, "6",
+                        od->lb_force_snat_addrs.ipv6_addrs[0].addr_s, "lb");
+                }
             }
 
-            build_lrouter_nd_flow(op->od, op, "nd_na",
-                                  ip_address, NULL, REG_INPORT_ETH_ADDR,
-                                  match, false, 90, NULL, lflows);
+            /* For gateway router, re-circulate every packet through
+            * the DNAT zone.  This helps with the following.
+            *
+            * Any packet that needs to be unDNATed in the reverse
+            * direction gets unDNATed. Ideally this could be done in
+            * the egress pipeline. But since the gateway router
+            * does not have any feature that depends on the source
+            * ip address being external IP address for IP routing,
+            * we can do it here, saving a future re-circulation. */
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
+                          "ip", "flags.loopback = 1; ct_dnat;");
         }
 
-        sset_destroy(&all_ips_v4);
-        sset_destroy(&all_ips_v6);
-
-        if (!smap_get(&op->od->nbr->options, "chassis")
-            && !op->od->l3dgw_port) {
-            /* UDP/TCP port unreachable. */
-            for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-                ds_clear(match);
-                ds_put_format(match,
-                              "ip4 && ip4.dst == %s && !ip.later_frag && udp",
-                              op->lrp_networks.ipv4_addrs[i].addr_s);
-                const char *action = "icmp4 {"
-                                     "eth.dst <-> eth.src; "
-                                     "ip4.dst <-> ip4.src; "
-                                     "ip.ttl = 255; "
-                                     "icmp4.type = 3; "
-                                     "icmp4.code = 3; "
-                                     "next; };";
-                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-                                        80, ds_cstr(match), action,
-                                        &op->nbrp->header_);
-
-                ds_clear(match);
-                ds_put_format(match,
-                              "ip4 && ip4.dst == %s && !ip.later_frag && tcp",
-                              op->lrp_networks.ipv4_addrs[i].addr_s);
-                action = "tcp_reset {"
-                         "eth.dst <-> eth.src; "
-                         "ip4.dst <-> ip4.src; "
-                         "next; };";
-                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-                                        80, ds_cstr(match), action,
-                                        &op->nbrp->header_);
-
-                ds_clear(match);
-                ds_put_format(match,
-                              "ip4 && ip4.dst == %s && !ip.later_frag",
-                              op->lrp_networks.ipv4_addrs[i].addr_s);
-                action = "icmp4 {"
-                         "eth.dst <-> eth.src; "
-                         "ip4.dst <-> ip4.src; "
-                         "ip.ttl = 255; "
-                         "icmp4.type = 3; "
-                         "icmp4.code = 2; "
-                         "next; };";
-                ovn_lflow_add_with_hint(lflows, op->od, S_ROUTER_IN_IP_INPUT,
-                                        70, ds_cstr(match), action,
-                                        &op->nbrp->header_);
-            }
+        /* Load balancing and packet defrag are only valid on
+         * Gateway routers or router with gateway port. */
+        if (!smap_get(&od->nbr->options, "chassis") && !od->l3dgw_port) {
+            sset_destroy(&nat_entries);
+            return;
         }
 
-        /* Drop IP traffic destined to router owned IPs except if the IP is
-         * also a SNAT IP. Those are dropped later, in stage
-         * "lr_in_arp_resolve", if unSNAT was unsuccessful.
-         *
-         * Priority 60.
-         */
-        build_lrouter_drop_own_dest(op, S_ROUTER_IN_IP_INPUT, 60, false,
-                                    lflows);
+        /* A set to hold all ips that need defragmentation and tracking. */
+        struct sset all_ips = SSET_INITIALIZER(&all_ips);
 
-        /* ARP / ND handling for external IP addresses.
-         *
-         * DNAT and SNAT IP addresses are external IP addresses that need ARP
-         * handling.
-         *
-         * These are already taken care globally, per router. The only
-         * exception is on the l3dgw_port where we might need to use a
-         * different ETH address.
-         */
-        if (op != op->od->l3dgw_port) {
-            return;
-        }
+        for (int i = 0; i < od->nbr->n_load_balancer; i++) {
+            struct nbrec_load_balancer *nb_lb = od->nbr->load_balancer[i];
+            struct ovn_northd_lb *lb =
+                ovn_northd_lb_find(lbs, &nb_lb->header_.uuid);
+            ovs_assert(lb);
 
-        for (size_t i = 0; i < op->od->nbr->n_nat; i++) {
-            struct ovn_nat *nat_entry = &op->od->nat_entries[i];
+            for (size_t j = 0; j < lb->n_vips; j++) {
+                struct ovn_lb_vip *lb_vip = &lb->vips[j];
+                struct ovn_northd_lb_vip *lb_vip_nb = &lb->vips_nb[j];
+                ds_clear(actions);
+                build_lb_vip_actions(lb_vip, lb_vip_nb, actions,
+                                     lb->selection_fields, false);
 
-            /* Skip entries we failed to parse. */
-            if (!nat_entry_is_valid(nat_entry)) {
-                continue;
-            }
+                if (!sset_contains(&all_ips, lb_vip->vip_str)) {
+                    sset_add(&all_ips, lb_vip->vip_str);
+                    /* If there are any load balancing rules, we should send
+                     * the packet to conntrack for defragmentation and
+                     * tracking.  This helps with two things.
+                     *
+                     * 1. With tracking, we can send only new connections to
+                     *    pick a DNAT ip address from a group.
+                     * 2. If there are L4 ports in load balancing rules, we
+                     *    need the defragmentation to match on L4 ports. */
+                    ds_clear(match);
+                    if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
+                        ds_put_format(match, "ip && ip4.dst == %s",
+                                      lb_vip->vip_str);
+                    } else {
+                        ds_put_format(match, "ip && ip6.dst == %s",
+                                      lb_vip->vip_str);
+                    }
+                    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG,
+                                            100, ds_cstr(match), "ct_next;",
+                                            &nb_lb->header_);
+                }
 
-            /* Skip SNAT entries for now, we handle unique SNAT IPs separately
-             * below.
-             */
-            if (!strcmp(nat_entry->nb->type, "snat")) {
-                continue;
-            }
-            build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
-        }
+                /* Higher priority rules are added for load-balancing in DNAT
+                 * table.  For every match (on a VIP[:port]), we add two flows
+                 * via add_router_lb_flow().  One flow is for specific matching
+                 * on ct.new with an action of "ct_lb($targets);".  The other
+                 * flow is for ct.est with an action of "ct_dnat;". */
+                ds_clear(match);
+                if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
+                    ds_put_format(match, "ip && ip4.dst == %s",
+                                  lb_vip->vip_str);
+                } else {
+                    ds_put_format(match, "ip && ip6.dst == %s",
+                                  lb_vip->vip_str);
+                }
 
-        /* Now handle SNAT entries too, one per unique SNAT IP. */
-        struct shash_node *snat_snode;
-        SHASH_FOR_EACH (snat_snode, &op->od->snat_ips) {
-            struct ovn_snat_ip *snat_ip = snat_snode->data;
+                int prio = 110;
+                bool is_udp = nullable_string_is_equal(nb_lb->protocol, "udp");
+                bool is_sctp = nullable_string_is_equal(nb_lb->protocol,
+                                                        "sctp");
+                const char *proto = is_udp ? "udp" : is_sctp ? "sctp" : "tcp";
 
-            if (ovs_list_is_empty(&snat_ip->snat_entries)) {
-                continue;
-            }
+                if (lb_vip->vip_port) {
+                    ds_put_format(match, " && %s && %s.dst == %d", proto,
+                                  proto, lb_vip->vip_port);
+                    prio = 120;
+                }
 
-            struct ovn_nat *nat_entry =
-                CONTAINER_OF(ovs_list_front(&snat_ip->snat_entries),
-                             struct ovn_nat, ext_addr_list_node);
-            build_lrouter_port_nat_arp_nd_flow(op, nat_entry, lflows);
+                if (od->l3redirect_port &&
+                    (lb_vip->n_backends || !lb_vip->empty_backend_rej)) {
+                    ds_put_format(match, " && is_chassis_resident(%s)",
+                                  od->l3redirect_port->json_key);
+                }
+                add_router_lb_flow(lflows, od, match, actions, prio,
+                                   lb_force_snat_ip, lb_vip, proto,
+                                   nb_lb, meter_groups, &nat_entries);
+            }
         }
+        sset_destroy(&all_ips);
+        sset_destroy(&nat_entries);
     }
 }
 
 
+
 struct lswitch_flow_build_info {
     struct hmap *datapaths;
     struct hmap *ports;
@@ -11365,6 +11354,8 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od,
                                         &lsi->actions);
     build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows);
     build_lrouter_arp_nd_for_datapath(od, lsi->lflows);
+    build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->meter_groups,
+                                    lsi->lbs, &lsi->match, &lsi->actions);
 }
 
 /* Helper function to combine all lflow generation which is iterated by port.
@@ -11463,9 +11454,6 @@ build_lswitch_and_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
     ds_destroy(&lsi.actions);
 
     build_lswitch_flows(datapaths, lflows);
-
-    /* Legacy lrouter build - to be migrated. */
-    build_lrouter_flows(datapaths, lflows, meter_groups, lbs);
 }
 
 struct ovn_dp_group {
-- 
2.20.1



More information about the dev mailing list