[ovs-dev] [PATCH ovn 2/2] ovn-ic: Route advertisement.

Han Zhou hzhou at ovn.org
Mon Feb 10 07:00:37 UTC 2020


Support automatical route advertisement and learning for OVN
interconnection.  Static routes and directly connected subnets
can be automatically advertised to avoid manual configuration
across AZs.  This feature is disabled by default, and can be
enabled at each AZ level by:

    ovn-nbctl set NB_Global . options:ic-route-ad=true \
                              options:ic-route-learn=true

More options are available. See ovn-nb(5).

Signed-off-by: Han Zhou <hzhou at ovn.org>
---
 Documentation/tutorials/ovn-interconnection.rst |  28 +-
 TODO.rst                                        |   2 -
 ic/ovn-ic.c                                     | 686 ++++++++++++++++++++++++
 ovn-architecture.7.xml                          |  11 +
 ovn-ic-sb.ovsschema                             |  13 +-
 ovn-ic-sb.xml                                   |  32 ++
 ovn-nb.xml                                      |  74 +++
 tests/ovn-ic.at                                 | 117 ++++
 tests/ovn.at                                    |  11 +-
 utilities/ovn-nbctl.c                           |   4 +
 10 files changed, 967 insertions(+), 11 deletions(-)

diff --git a/Documentation/tutorials/ovn-interconnection.rst b/Documentation/tutorials/ovn-interconnection.rst
index 2f9d6d7..bb08006 100644
--- a/Documentation/tutorials/ovn-interconnection.rst
+++ b/Documentation/tutorials/ovn-interconnection.rst
@@ -180,9 +180,35 @@ In ovn-east, add below route ::
 
     $ ovn-nbctl lr-route-add lr1 10.0.2.0/24 169.254.100.2
 
-In ovs-west, add below route ::
+In ovn-west, add below route ::
 
     $ ovn-nbctl lr-route-add lr2 10.0.1.0/24 169.254.100.1
 
 Now the traffic should be able to go through between the workloads through
 tunnels crossing gateway nodes of ovn-east and ovn-west.
+
+Route Advertisement
+-------------------
+
+Alternatively, you can avoid the above manual static route configuration by
+enabling route advertisement and learning on each OVN deployment ::
+
+    $ ovn-nbctl set NB_Global . options:ic-route-ad=true options:ic-route-learn=true
+
+With this setting, the above routes will be automatically learned and
+configured in Northbound DB in each deployment.  For example, in ovn-east, you
+will see the route ::
+
+    $ ovn-nbctl lr-route-list lr1
+    IPv4 Routes
+                 10.0.2.0/24             169.254.100.2 dst-ip (learned)
+
+In ovn-west you will see ::
+
+    $ ovn-nbctl lr-route-list lr2
+    IPv4 Routes
+                 10.0.1.0/24             169.254.100.1 dst-ip (learned)
+
+Static routes configured in the routers can be advertised and learned as well.
+For more details of router advertisement and its configure options, please
+see <code>ovn-nb</code>(5).
diff --git a/TODO.rst b/TODO.rst
index fbab508..809d1c9 100644
--- a/TODO.rst
+++ b/TODO.rst
@@ -149,5 +149,3 @@ OVN To-do List
 * OVN Interconnection
 
   * Packaging for RHEL, Debian, etc.
-
-  * Route advertisement between edge routers.
diff --git a/ic/ovn-ic.c b/ic/ovn-ic.c
index 25ca3f7..1166956 100644
--- a/ic/ovn-ic.c
+++ b/ic/ovn-ic.c
@@ -61,6 +61,7 @@ struct ic_context {
     struct ovsdb_idl_txn *ovninb_txn;
     struct ovsdb_idl_txn *ovnisb_txn;
     struct ovsdb_idl_index *nbrec_ls_by_name;
+    struct ovsdb_idl_index *nbrec_port_by_name;
     struct ovsdb_idl_index *sbrec_chassis_by_name;
     struct ovsdb_idl_index *sbrec_port_binding_by_name;
     struct ovsdb_idl_index *icsbrec_port_binding_by_ts;
@@ -517,6 +518,45 @@ sync_lsp_tnl_key(const struct nbrec_logical_switch_port *lsp,
 
 }
 
+static bool
+get_router_uuid_by_sb_pb(struct ic_context *ctx,
+                         const struct sbrec_port_binding *sb_pb,
+                         struct uuid *router_uuid)
+{
+    const struct sbrec_port_binding *router_pb = find_peer_port(ctx, sb_pb);
+    if (!router_pb || !router_pb->datapath) {
+        return NULL;
+    }
+
+    return smap_get_uuid(&router_pb->datapath->external_ids, "logical-router",
+                         router_uuid);
+}
+
+static void
+update_isb_pb_external_ids(struct ic_context *ctx,
+                           const struct sbrec_port_binding *sb_pb,
+                           const struct icsbrec_port_binding *isb_pb)
+{
+    struct uuid lr_uuid;
+    if (!get_router_uuid_by_sb_pb(ctx, sb_pb, &lr_uuid)) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+        VLOG_WARN_RL(&rl, "Can't get router uuid for transit switch port %s.",
+                     isb_pb->logical_port);
+        return;
+    }
+
+    struct uuid current_lr_uuid;
+    if (smap_get_uuid(&isb_pb->external_ids, "router-id", &current_lr_uuid) &&
+        uuid_equals(&lr_uuid, &current_lr_uuid)) {
+        return;
+    }
+
+    char *uuid_s = xasprintf(UUID_FMT, UUID_ARGS(&lr_uuid));
+    icsbrec_port_binding_update_external_ids_setkey(isb_pb, "router-id",
+                                                    uuid_s);
+    free(uuid_s);
+}
+
 /* For each local port:
  *   - Sync from NB to ISB.
  *   - Sync gateway from SB to ISB.
@@ -554,6 +594,9 @@ sync_local_port(struct ic_context *ctx,
         }
     }
 
+    /* Sync external_ids:router-id to ISB */
+    update_isb_pb_external_ids(ctx, sb_pb, isb_pb);
+
     /* Sync back tunnel key from ISB to NB */
     sync_lsp_tnl_key(lsp, isb_pb->tunnel_key);
 }
@@ -649,6 +692,8 @@ create_isb_pb(struct ic_context *ctx,
         icsbrec_port_binding_set_gateway(isb_pb, crp->chassis->name);
     }
 
+    update_isb_pb_external_ids(ctx, sb_pb, isb_pb);
+
     /* XXX: Sync encap so that multiple encaps can be used for the same
      * gateway.  However, it is not needed for now, since we don't yet
      * support specifying encap type/ip for gateway chassis or ha-chassis
@@ -758,6 +803,642 @@ port_binding_run(struct ic_context *ctx,
     }
 }
 
+struct ic_router_info {
+    struct hmap_node node;
+    const struct nbrec_logical_router *lr; /* key of hmap */
+    const struct icsbrec_port_binding *isb_pb;
+    struct hmap routes_learned;
+};
+
+/* Represents an interconnection route entry. */
+struct ic_route_info {
+    struct hmap_node node;
+    struct v46_ip prefix;
+    unsigned int plen;
+    struct v46_ip nexthop;
+
+    /* Either nb_route or nb_lrp is set and the other one must be NULL.
+     * - For a route that is learned from IC-SB, or a static route that is
+     *   generated from a route that is configured in NB, the "nb_route"
+     *   is set.
+     * - For a route that is generated from a direct-connect subnet of
+     *   a logical router port, the "nb_lrp" is set. */
+    const struct nbrec_logical_router_static_route *nb_route;
+    const struct nbrec_logical_router_port *nb_lrp;
+};
+
+static uint32_t
+ic_route_hash(const struct v46_ip *prefix, unsigned int plen,
+              const struct v46_ip *nexthop)
+{
+    uint32_t basis = hash_bytes(prefix, sizeof *prefix, (uint32_t)plen);
+    return hash_bytes(nexthop, sizeof *nexthop, basis);
+}
+
+static struct ic_route_info *
+ic_route_find(struct hmap *routes, const struct v46_ip *prefix,
+              unsigned int plen, const struct v46_ip *nexthop)
+{
+    struct ic_route_info *r;
+    uint32_t hash = ic_route_hash(prefix, plen, nexthop);
+    HMAP_FOR_EACH_WITH_HASH (r, node, hash, routes) {
+        if (ip46_equals(&r->prefix, prefix) &&
+            r->plen == plen &&
+            ip46_equals(&r->nexthop, nexthop)) {
+            return r;
+        }
+    }
+    return NULL;
+}
+
+static struct ic_router_info *
+ic_router_find(struct hmap *ic_lrs, const struct nbrec_logical_router *lr)
+{
+    struct ic_router_info *ic_lr;
+    HMAP_FOR_EACH_WITH_HASH (ic_lr, node, uuid_hash(&lr->header_.uuid),
+                             ic_lrs) {
+        if (ic_lr->lr == lr) {
+           return ic_lr;
+        }
+    }
+    return NULL;
+}
+
+static bool
+parse_route(const char *s_prefix, const char *s_nexthop,
+            struct v46_ip *prefix, unsigned int *plen,
+            struct v46_ip *nexthop)
+{
+    if (!ip46_parse_cidr(s_prefix, prefix, plen)) {
+        return false;
+    }
+
+    unsigned int nlen;
+    return ip46_parse_cidr(s_nexthop, nexthop, &nlen);
+}
+
+/* Return false if can't be added due to bad format. */
+static bool
+add_to_routes_learned(struct hmap *routes_learned,
+                      const struct nbrec_logical_router_static_route *nb_route)
+{
+    struct v46_ip prefix, nexthop;
+    unsigned int plen;
+    if (!parse_route(nb_route->ip_prefix, nb_route->nexthop,
+                     &prefix, &plen, &nexthop)) {
+        return false;
+    }
+    struct ic_route_info *ic_route = xzalloc(sizeof *ic_route);
+    ic_route->prefix = prefix;
+    ic_route->plen = plen;
+    ic_route->nexthop = nexthop;
+    ic_route->nb_route = nb_route;
+    hmap_insert(routes_learned, &ic_route->node,
+                ic_route_hash(&prefix, plen, &nexthop));
+    return true;
+}
+
+static bool
+get_nexthop_from_lport_addresses(int family,
+                                 const struct lport_addresses *laddr,
+                                 struct v46_ip *nexthop)
+{
+    nexthop->family = family;
+    if (family == AF_INET) {
+        if (!laddr->n_ipv4_addrs) {
+            return false;
+        }
+        nexthop->ipv4 = laddr->ipv4_addrs[0].addr;
+        return true;
+    }
+
+    /* ipv6 */
+    if (laddr->n_ipv6_addrs) {
+        nexthop->ipv6 = laddr->ipv6_addrs[0].addr;
+        return true;
+    }
+
+    /* ipv6 link local */
+    in6_generate_lla(laddr->ea, &nexthop->ipv6);
+    return true;
+}
+
+static bool
+prefix_is_link_local(struct v46_ip *prefix, unsigned int plen)
+{
+    if (prefix->family == AF_INET) {
+        /* Link local range is "169.254.0.0/16". */
+        if (plen < 16) {
+            return false;
+        }
+        ovs_be32 lla;
+        inet_pton(AF_INET, "169.254.0.0", &lla);
+        return ((prefix->ipv4 & htonl(0xffff0000)) == lla);
+    }
+
+    /* ipv6, link local range is "fe80::/10". */
+    if (plen < 10) {
+        return false;
+    }
+    return (((prefix->ipv6.s6_addr[0] & 0xff) == 0xfe) &&
+            ((prefix->ipv6.s6_addr[1] & 0xc0) == 0x80));
+}
+
+static bool
+prefix_is_black_listed(const struct smap *nb_options,
+                       struct v46_ip *prefix,
+                       unsigned int plen)
+{
+    const char *blacklist = smap_get(nb_options, "ic-route-blacklist");
+    if (!blacklist || !blacklist[0]) {
+        return false;
+    }
+    struct v46_ip bl_prefix;
+    unsigned int bl_plen;
+    char *cur, *next, *start;
+    next = start = xstrdup(blacklist);
+    bool matched = false;
+    while ((cur = strsep(&next, ",")) && *cur) {
+        if (!ip46_parse_cidr(cur, &bl_prefix, &bl_plen)) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "Bad format in nb_global options:"
+                         "ic-route-blacklist: %s. CIDR expected.", cur);
+            continue;
+        }
+
+        if (bl_prefix.family != prefix->family) {
+            continue;
+        }
+
+        /* 192.168.0.0/16 does not belong to 192.168.0.0/17 */
+        if (plen < bl_plen) {
+            continue;
+        }
+
+        if (prefix->family == AF_INET) {
+            ovs_be32 mask = be32_prefix_mask(bl_plen);
+            if ((prefix->ipv4 & mask) != (bl_prefix.ipv4 & mask)) {
+                continue;
+            }
+        } else {
+            struct in6_addr mask = ipv6_create_mask(bl_plen);
+            for (int i = 0; i < 16 && mask.s6_addr[i] != 0; i++) {
+                if ((prefix->ipv6.s6_addr[i] & mask.s6_addr[i])
+                    != (bl_prefix.ipv6.s6_addr[i] & mask.s6_addr[i])) {
+                    continue;
+                }
+            }
+        }
+        matched = true;
+        break;
+    }
+    free(start);
+    return matched;
+}
+
+static bool
+route_need_advertise(const char *policy,
+                     struct v46_ip *prefix,
+                     unsigned int plen,
+                     const struct smap *nb_options)
+{
+    if (!smap_get_bool(nb_options, "ic-route-ad", false)) {
+        return false;
+    }
+
+    if (plen == 0 &&
+        !smap_get_bool(nb_options, "ic-route-ad-default", false)) {
+        return false;
+    }
+
+    if (policy && !strcmp(policy, "src-ip")) {
+        return false;
+    }
+
+    if (prefix_is_link_local(prefix, plen)) {
+        return false;
+    }
+
+    if (prefix_is_black_listed(nb_options, prefix, plen)) {
+        return false;
+    }
+    return true;
+}
+
+static void
+add_to_routes_ad(struct hmap *routes_ad,
+                 const struct nbrec_logical_router_static_route *nb_route,
+                 const struct lport_addresses *nexthop_addresses,
+                 const struct smap *nb_options)
+{
+    struct v46_ip prefix, nexthop;
+    unsigned int plen;
+    if (!parse_route(nb_route->ip_prefix, nb_route->nexthop,
+                     &prefix, &plen, &nexthop)) {
+        return;
+    }
+
+    if (!route_need_advertise(nb_route->policy, &prefix, plen, nb_options)) {
+        return;
+    }
+
+    if (!get_nexthop_from_lport_addresses(prefix.family,
+                                          nexthop_addresses,
+                                          &nexthop)) {
+        return;
+    }
+
+    struct ic_route_info *ic_route = xzalloc(sizeof *ic_route);
+    ic_route->prefix = prefix;
+    ic_route->plen = plen;
+    ic_route->nexthop = nexthop;
+    ic_route->nb_route = nb_route;
+    hmap_insert(routes_ad, &ic_route->node,
+                ic_route_hash(&prefix, plen, &nexthop));
+}
+
+static void
+add_network_to_routes_ad(struct hmap *routes_ad, const char *network,
+                         const struct nbrec_logical_router_port *nb_lrp,
+                         const struct lport_addresses *nexthop_addresses,
+                         const struct smap *nb_options)
+{
+    struct v46_ip prefix, nexthop;
+    unsigned int plen;
+    if (!ip46_parse_cidr(network, &prefix, &plen)) {
+        return;
+    }
+
+    if (!route_need_advertise(NULL, &prefix, plen, nb_options)) {
+        VLOG_DBG("Route ad: skip network %s of lrp %s.",
+                 network, nb_lrp->name);
+        return;
+    }
+
+    if (!get_nexthop_from_lport_addresses(prefix.family,
+                                          nexthop_addresses,
+                                          &nexthop)) {
+        return;
+    }
+
+    VLOG_DBG("Route ad: direct network %s of lrp %s, nexthop "IP_FMT,
+             network, nb_lrp->name, IP_ARGS(nexthop.ipv4));
+    struct ic_route_info *ic_route = xzalloc(sizeof *ic_route);
+    ic_route->prefix = prefix;
+    ic_route->plen = plen;
+    ic_route->nexthop = nexthop;
+    ic_route->nb_lrp = nb_lrp;
+    hmap_insert(routes_ad, &ic_route->node,
+                ic_route_hash(&prefix, plen, &nexthop));
+}
+
+static bool
+route_need_learn(struct v46_ip *prefix,
+                 unsigned int plen,
+                 const struct smap *nb_options)
+{
+    if (!smap_get_bool(nb_options, "ic-route-learn", false)) {
+        return false;
+    }
+
+    if (plen == 0 &&
+        !smap_get_bool(nb_options, "ic-route-learn-default", false)) {
+        return false;
+    }
+
+    if (prefix_is_link_local(prefix, plen)) {
+        return false;
+    }
+
+    if (prefix_is_black_listed(nb_options, prefix, plen)) {
+        return false;
+    }
+
+    return true;
+}
+
+static void
+sync_learned_route(struct ic_context *ctx,
+                   const struct icsbrec_availability_zone *az,
+                   struct ic_router_info *ic_lr)
+{
+    ovs_assert(ctx->ovnnb_txn);
+    const struct icsbrec_route *isb_route;
+    ICSBREC_ROUTE_FOR_EACH (isb_route, ctx->ovnisb_idl) {
+        if (isb_route->availability_zone == az) {
+            continue;
+        }
+        struct v46_ip prefix, nexthop;
+        unsigned int plen;
+        if (!parse_route(isb_route->ip_prefix, isb_route->nexthop,
+                         &prefix, &plen, &nexthop)) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "Bad route format in IC-SB: %s -> %s. Ignored.",
+                         isb_route->ip_prefix, isb_route->nexthop);
+            continue;
+        }
+        const struct nbrec_nb_global *nb_global =
+            nbrec_nb_global_first(ctx->ovnnb_idl);
+        ovs_assert(nb_global);
+        if (!route_need_learn(&prefix, plen, &nb_global->options)) {
+            continue;
+        }
+        struct ic_route_info *route_learned
+            = ic_route_find(&ic_lr->routes_learned, &prefix, plen, &nexthop);
+        if (route_learned) {
+            /* Sync external-ids */
+            struct uuid ext_id;
+            smap_get_uuid(&route_learned->nb_route->external_ids,
+                          "ic-learned-route", &ext_id);
+            if (!uuid_equals(&ext_id, &isb_route->header_.uuid)) {
+                char *uuid_s = xasprintf(UUID_FMT,
+                                         UUID_ARGS(&isb_route->header_.uuid));
+                nbrec_logical_router_static_route_update_external_ids_setkey(
+                    route_learned->nb_route, "ic-learned-route", uuid_s);
+                free(uuid_s);
+            }
+            hmap_remove(&ic_lr->routes_learned, &route_learned->node);
+            free(route_learned);
+        } else {
+            /* Create the missing route in NB. */
+            const struct nbrec_logical_router_static_route *nb_route =
+                nbrec_logical_router_static_route_insert(ctx->ovnnb_txn);
+            nbrec_logical_router_static_route_set_ip_prefix(
+                nb_route, isb_route->ip_prefix);
+            nbrec_logical_router_static_route_set_nexthop(
+                nb_route, isb_route->nexthop);
+            char *uuid_s = xasprintf(UUID_FMT,
+                                     UUID_ARGS(&isb_route->header_.uuid));
+            nbrec_logical_router_static_route_update_external_ids_setkey(
+                nb_route, "ic-learned-route", uuid_s);
+            free(uuid_s);
+            nbrec_logical_router_update_static_routes_addvalue(
+                ic_lr->lr, nb_route);
+        }
+    }
+    /* Delete extra learned routes. */
+    struct ic_route_info *route_learned, *next;
+    HMAP_FOR_EACH_SAFE (route_learned, next, node, &ic_lr->routes_learned) {
+        VLOG_DBG("Delete route %s -> %s that is not in IC-SB from NB.",
+                 route_learned->nb_route->ip_prefix,
+                 route_learned->nb_route->nexthop);
+        nbrec_logical_router_update_static_routes_delvalue(
+            ic_lr->lr, route_learned->nb_route);
+        hmap_remove(&ic_lr->routes_learned, &route_learned->node);
+        free(route_learned);
+    }
+}
+
+static void
+ad_route_sync_external_ids(const struct ic_route_info *route_ad,
+                           const struct icsbrec_route *isb_route)
+{
+    struct uuid isb_ext_id, nb_id;
+    smap_get_uuid(&isb_route->external_ids, "nb-id", &isb_ext_id);
+    nb_id = route_ad->nb_route ? route_ad->nb_route->header_.uuid
+                               : route_ad->nb_lrp->header_.uuid;
+    if (!uuid_equals(&isb_ext_id, &nb_id)) {
+        char *uuid_s = xasprintf(UUID_FMT, UUID_ARGS(&nb_id));
+        icsbrec_route_update_external_ids_setkey(isb_route, "nb-id",
+                                                 uuid_s);
+        free(uuid_s);
+    }
+}
+
+/* Sync routes from routes_ad to IC-SB. */
+static void
+advertise_route(struct ic_context *ctx,
+                const struct icsbrec_availability_zone *az,
+                const char *ts_name,
+                struct hmap *routes_ad)
+{
+    ovs_assert(ctx->ovnisb_txn);
+    const struct icsbrec_route *isb_route;
+    ICSBREC_ROUTE_FOR_EACH (isb_route, ctx->ovnisb_idl) {
+        if (strcmp(isb_route->transit_switch, ts_name)) {
+            continue;
+        }
+
+        if (isb_route->availability_zone != az) {
+            continue;
+        }
+
+        struct v46_ip prefix, nexthop;
+        unsigned int plen;
+
+        if (!parse_route(isb_route->ip_prefix, isb_route->nexthop,
+                         &prefix, &plen, &nexthop)) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, "Bad route format in IC-SB: %s -> %s. "
+                         "Delete it.",
+                         isb_route->ip_prefix, isb_route->nexthop);
+            icsbrec_route_delete(isb_route);
+            continue;
+        }
+        struct ic_route_info *route_ad =
+            ic_route_find(routes_ad, &prefix, plen, &nexthop);
+        if (!route_ad) {
+            /* Delete the extra route from IC-SB. */
+            VLOG_DBG("Delete route %s -> %s from IC-SB, which is not found"
+                     " in local routes to be advertised.",
+                     isb_route->ip_prefix, isb_route->nexthop);
+            icsbrec_route_delete(isb_route);
+        } else {
+            ad_route_sync_external_ids(route_ad, isb_route);
+
+            hmap_remove(routes_ad, &route_ad->node);
+            free(route_ad);
+        }
+    }
+
+    /* Create the missing routes in IC-SB */
+    struct ic_route_info *route_ad, *next;
+    HMAP_FOR_EACH_SAFE (route_ad, next, node, routes_ad) {
+        isb_route = icsbrec_route_insert(ctx->ovnisb_txn);
+        icsbrec_route_set_transit_switch(isb_route, ts_name);
+        icsbrec_route_set_availability_zone(isb_route, az);
+
+        char *prefix_s, *nexthop_s;
+        if (route_ad->prefix.family == AF_INET) {
+            prefix_s = xasprintf(IP_FMT"/%d",
+                                 IP_ARGS(route_ad->prefix.ipv4),
+                                 route_ad->plen);
+            nexthop_s = xasprintf(IP_FMT, IP_ARGS(route_ad->nexthop.ipv4));
+        } else {
+            char network_s[INET6_ADDRSTRLEN];
+            inet_ntop(AF_INET6, &route_ad->prefix.ipv6, network_s,
+                      INET6_ADDRSTRLEN);
+            prefix_s = xasprintf("%s/%d", network_s, route_ad->plen);
+            inet_ntop(AF_INET6, &route_ad->nexthop.ipv6, network_s,
+                      INET6_ADDRSTRLEN);
+            nexthop_s = xstrdup(network_s);
+        }
+        icsbrec_route_set_ip_prefix(isb_route, prefix_s);
+        icsbrec_route_set_nexthop(isb_route, nexthop_s);
+        free(prefix_s);
+        free(nexthop_s);
+
+        ad_route_sync_external_ids(route_ad, isb_route);
+
+        hmap_remove(routes_ad, &route_ad->node);
+        free(route_ad);
+    }
+}
+
+static const char *
+get_lrp_name_by_ts_port_name(struct ic_context *ctx,
+                           const char *ts_port_name)
+{
+    const struct nbrec_logical_switch_port *nb_lsp;
+    const struct nbrec_logical_switch_port *nb_lsp_key =
+        nbrec_logical_switch_port_index_init_row(ctx->nbrec_port_by_name);
+    nbrec_logical_switch_port_index_set_name(nb_lsp_key, ts_port_name);
+    nb_lsp = nbrec_logical_switch_port_index_find(ctx->nbrec_port_by_name,
+                                                  nb_lsp_key);
+    if (!nb_lsp) {
+        return NULL;
+    }
+
+    return smap_get(&nb_lsp->options, "router-port");
+}
+
+static void
+route_run(struct ic_context *ctx,
+          const struct icsbrec_availability_zone *az)
+{
+    if (!ctx->ovnisb_txn || !ctx->ovnnb_txn) {
+        return;
+    }
+
+    const struct nbrec_nb_global *nb_global =
+        nbrec_nb_global_first(ctx->ovnnb_idl);
+    ovs_assert(nb_global);
+
+    const struct icnbrec_transit_switch *ts;
+    ICNBREC_TRANSIT_SWITCH_FOR_EACH (ts, ctx->ovninb_idl) {
+        struct hmap ic_lrs = HMAP_INITIALIZER(&ic_lrs);
+        struct hmap routes_ad = HMAP_INITIALIZER(&routes_ad);
+
+        const struct icsbrec_port_binding *isb_pb;
+        const struct icsbrec_port_binding *isb_pb_key =
+            icsbrec_port_binding_index_init_row(
+                ctx->icsbrec_port_binding_by_ts);
+        icsbrec_port_binding_index_set_transit_switch(isb_pb_key, ts->name);
+
+        /* Each port on TS maps to a logical router, which is stored in the
+         * external_ids:router-id of the IC SB port_binding record. */
+        ICSBREC_PORT_BINDING_FOR_EACH_EQUAL (isb_pb, isb_pb_key,
+                                             ctx->icsbrec_port_binding_by_ts) {
+            if (isb_pb->availability_zone != az) {
+                continue;
+            }
+
+            const char *ts_lrp_name =
+                get_lrp_name_by_ts_port_name(ctx, isb_pb->logical_port);
+            if (!ts_lrp_name) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "Route sync ignores port %s on ts %s "
+                             "because logical router port is not found in NB.",
+                             isb_pb->logical_port, ts->name);
+                continue;
+            }
+
+            struct uuid lr_uuid;
+            if (!smap_get_uuid(&isb_pb->external_ids, "router-id", &lr_uuid)) {
+                VLOG_DBG("IC-SB Port_Binding %s doesn't have "
+                         "external_ids:router-id set.", isb_pb->logical_port);
+                continue;
+            }
+            const struct nbrec_logical_router *lr
+                = nbrec_logical_router_get_for_uuid(ctx->ovnnb_idl, &lr_uuid);
+            if (!lr) {
+                continue;
+            }
+
+            if (ic_router_find(&ic_lrs, lr)) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_INFO_RL(&rl, "Route sync ignores port %s on ts %s for "
+                             "router %s because the router has another port "
+                             "connected to same ts.", isb_pb->logical_port,
+                             ts->name, lr->name);
+                continue;
+            }
+
+            struct lport_addresses ts_port_addrs;
+            if (!extract_lsp_addresses(isb_pb->address, &ts_port_addrs)) {
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_INFO_RL(&rl, "Route sync ignores port %s on ts %s for "
+                             "router %s because the addresses are invalid.",
+                             isb_pb->logical_port, ts->name, lr->name);
+                continue;
+            }
+
+            struct ic_router_info *ic_lr = xzalloc(sizeof *ic_lr);
+            ic_lr->lr = lr;
+            ic_lr->isb_pb = isb_pb;
+            hmap_init(&ic_lr->routes_learned);
+            hmap_insert(&ic_lrs, &ic_lr->node, uuid_hash(&lr->header_.uuid));
+
+            /* Check static routes of the LR */
+            for (int i = 0; i < lr->n_static_routes; i++) {
+                const struct nbrec_logical_router_static_route *nb_route
+                    = lr->static_routes[i];
+                struct uuid isb_uuid;
+                if (smap_get_uuid(&nb_route->external_ids,
+                                  "ic-learned-route", &isb_uuid)) {
+                    /* It is a learned route */
+                    if (!add_to_routes_learned(&ic_lr->routes_learned,
+                                               nb_route)) {
+                        static struct vlog_rate_limit rl =
+                            VLOG_RATE_LIMIT_INIT(5, 1);
+                        VLOG_WARN_RL(&rl, "Bad format of learned route in NB:"
+                                     " %s -> %s. Delete it.",
+                                     nb_route->ip_prefix, nb_route->nexthop);
+                        nbrec_logical_router_update_static_routes_delvalue(
+                            lr, nb_route);
+                    }
+                } else {
+                    /* It may be a route to be advertised */
+                    add_to_routes_ad(&routes_ad, nb_route, &ts_port_addrs,
+                                     &nb_global->options);
+                }
+            }
+
+            /* Check direct-connected subnets of the LR */
+            for (int i = 0; i < lr->n_ports; i++) {
+                const struct nbrec_logical_router_port *lrp = lr->ports[i];
+                if (!strcmp(lrp->name, ts_lrp_name)) {
+                    /* The router port of the TS port is ignored. */
+                    VLOG_DBG("Route ad: skip lrp %s (TS port: %s)",
+                             lrp->name, isb_pb->logical_port);
+                    continue;
+                }
+
+                for (int j = 0; j < lrp->n_networks; j++) {
+                    add_network_to_routes_ad(&routes_ad, lrp->networks[j],
+                                             lrp, &ts_port_addrs,
+                                             &nb_global->options);
+                }
+            }
+
+            destroy_lport_addresses(&ts_port_addrs);
+        }
+        icsbrec_port_binding_index_destroy_row(isb_pb_key);
+
+        advertise_route(ctx, az, ts->name, &routes_ad);
+        hmap_destroy(&routes_ad);
+
+        struct ic_router_info *ic_lr, *next;
+        HMAP_FOR_EACH_SAFE (ic_lr, next, node, &ic_lrs) {
+            sync_learned_route(ctx, az, ic_lr);
+            hmap_destroy(&ic_lr->routes_learned);
+            hmap_remove(&ic_lrs, &ic_lr->node);
+            free(ic_lr);
+        }
+        hmap_destroy(&ic_lrs);
+    }
+}
+
 static void
 ovn_db_run(struct ic_context *ctx)
 {
@@ -771,6 +1452,7 @@ ovn_db_run(struct ic_context *ctx)
     ts_run(ctx);
     gateway_run(ctx, az);
     port_binding_run(ctx, az);
+    route_run(ctx, az);
 }
 
 static void
@@ -930,6 +1612,9 @@ main(int argc, char *argv[])
     struct ovsdb_idl_index *nbrec_ls_by_name
         = ovsdb_idl_index_create1(ovnnb_idl_loop.idl,
                                   &nbrec_logical_switch_col_name);
+    struct ovsdb_idl_index *nbrec_port_by_name
+        = ovsdb_idl_index_create1(ovnnb_idl_loop.idl,
+                                  &nbrec_logical_switch_port_col_name);
     struct ovsdb_idl_index *sbrec_port_binding_by_name
         = ovsdb_idl_index_create1(ovnsb_idl_loop.idl,
                                   &sbrec_port_binding_col_logical_port);
@@ -966,6 +1651,7 @@ main(int argc, char *argv[])
                 .ovnisb_idl = ovnisb_idl_loop.idl,
                 .ovnisb_txn = ovsdb_idl_loop_run(&ovnisb_idl_loop),
                 .nbrec_ls_by_name = nbrec_ls_by_name,
+                .nbrec_port_by_name = nbrec_port_by_name,
                 .sbrec_port_binding_by_name = sbrec_port_binding_by_name,
                 .sbrec_chassis_by_name = sbrec_chassis_by_name,
                 .icsbrec_port_binding_by_ts = icsbrec_port_binding_by_ts,
diff --git a/ovn-architecture.7.xml b/ovn-architecture.7.xml
index 71efa41..909f037 100644
--- a/ovn-architecture.7.xml
+++ b/ovn-architecture.7.xml
@@ -1909,6 +1909,17 @@
       can be produced and then translated to OVS flows locally, which finally
       enables data plane communication.
     </li>
+    <li>
+      Routes that are advertised between different AZs.  If enabled,
+      routes are automatically exchanged by <code>ovn-ic</code>.
+      Both static routes and directly connected subnets are advertised.
+      Options in <ref column="options" table="NB_Global" db="OVN_NB"/>
+      column of the <ref table="NB_Global" db="OVN_NB"/> table of
+      <ref db="OVN_NB"/> database control the behavior of route
+      advertisement, such as enable/disable the advertising/learning
+      routes, whether default routes are advertised/learned, and
+      blacklisted CIDRs.  See <code>ovn-nb</code>(5) for more details.
+    </li>
   </ul>
 
   <p>
diff --git a/ovn-ic-sb.ovsschema b/ovn-ic-sb.ovsschema
index 819c28a..a382993 100644
--- a/ovn-ic-sb.ovsschema
+++ b/ovn-ic-sb.ovsschema
@@ -1,7 +1,7 @@
 {
     "name": "OVN_IC_Southbound",
     "version": "1.0.0",
-    "cksum": "1702378250 6056",
+    "cksum": "1358749947 6592",
     "tables": {
         "IC_SB_Global": {
             "columns": {
@@ -87,6 +87,17 @@
                                  "max": "unlimited"}}},
             "indexes": [["transit_switch", "tunnel_key"], ["logical_port"]],
             "isRoot": true},
+        "Route": {
+            "columns": {
+                "transit_switch": {"type": "string"},
+                "availability_zone": {"type": {"key": {"type": "uuid",
+                                             "refTable": "Availability_Zone"}}},
+                "ip_prefix": {"type": "string"},
+                "nexthop": {"type": "string"},
+                "external_ids": {
+                    "type": {"key": "string", "value": "string",
+                             "min": 0, "max": "unlimited"}}},
+            "isRoot": true},
         "Connection": {
             "columns": {
                 "target": {"type": "string"},
diff --git a/ovn-ic-sb.xml b/ovn-ic-sb.xml
index dad6405..3582cff 100644
--- a/ovn-ic-sb.xml
+++ b/ovn-ic-sb.xml
@@ -292,6 +292,38 @@
     </group>
   </table>
 
+  <table name="Route" title="Route">
+    <p>
+      Each row in this table represents a route advertised.
+    </p>
+
+    <group title="Core Features">
+      <column name="transit_switch">
+        The name of the transit switch, upon which the route is advertised.
+      </column>
+
+      <column name="availability_zone">
+        The availability zone that has advertised the route.
+      </column>
+
+      <column name="ip_prefix">
+        IP prefix of this route (e.g. 192.168.100.0/24).
+      </column>
+
+      <column name="nexthop">
+        Nexthop IP address for this route.
+      </column>
+    </group>
+
+    <group title="Common Columns">
+      <column name="external_ids">
+        <p>
+          See <em>External IDs</em> at the beginning of this document.
+        </p>
+      </column>
+    </group>
+  </table>
+
   <table name="Connection" title="OVSDB client connections.">
     <p>
       Configuration for a database connection to an Open vSwitch database
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 7bae14c..46a870e 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -133,6 +133,73 @@
         </ul>
 
       </column>
+
+      <group title="Options for configuring interconnection route advertisement">
+        <p>
+          These options control how routes are advertised between OVN
+          deployments for interconnection.  If enabled, <code>ovn-ic</code>
+          from different OVN deployments exchanges routes between each other
+          through the global <ref db="OVN_IC_Southbound"/> database.  Only
+          routers with ports connected to interconnection transit switches
+          participate in route advertisement.  For each of these routers, there
+          are two types of routes to be advertised:
+        </p>
+
+        <p>
+          Firstly, the static routes configured in the router are advertised.
+        </p>
+
+        <p>
+          Secondly, the <code>networks</code> configured in the logical router
+          ports that are not on the transit switches are advertised.  These
+          are considered as directly connected subnets on the router.
+        </p>
+
+        <p>
+          Link local prefixes (IPv4 169.254.0.0/16 and IPv6 FE80::/10)
+          are never advertised.
+        </p>
+
+        <p>
+          The learned routes are added to the
+          <ref column="static_routes" table="Logical_Router"/> column of the
+          <ref table="Logical_Router"/> table, with
+          <code>external_ids:ic-learned-route</code> set to the uuid
+          of the row in <ref table="Route" db="OVN_IC_Southbound"/>
+          table of the <ref db="OVN_IC_Southbound"/> database.
+        </p>
+
+        <column name="options" key="ic-route-ad">
+          A boolean value that enables route advertisement to the global
+          <ref db="OVN_IC_Southbound"/> database.  Default is <code>false</code>.
+        </column>
+
+        <column name="options" key="ic-route-learn">
+          A boolean value that enables route learning from the global
+          <ref db="OVN_IC_Southbound"/> database.  Default is <code>false</code>.
+        </column>
+
+        <column name="options" key="ic-route-ad-default">
+          A boolean value that enables advertising default route to the global
+          <ref db="OVN_IC_Southbound"/> database.  Default is <code>false</code>.
+          This option takes effect only when option <code>ic-route-ad</code> is
+          <code>true</code>.
+        </column>
+
+        <column name="options" key="ic-route-learn-default">
+          A boolean value that enables learning default route from the global
+          <ref db="OVN_IC_Southbound"/> database.  Default is <code>false</code>.
+          This option takes effect only when option <code>ic-route-learn</code>
+          is <code>true</code>.
+        </column>
+
+        <column name="options" key="ic-route-blacklist">
+          A string value contains a list of CIDRs delimited by ",".  A route will
+          not be advertised or learned if the route's prefix belongs to any of the
+          CIDRs listed.
+        </column>
+      </group>
+
     </group>
 
     <group title="Connection Options">
@@ -2355,6 +2422,13 @@
       </p>
     </column>
 
+    <column name="external_ids" key="ic-learned-route">
+      <code>ovn-ic</code> populates this key if the route is learned from the
+      global <ref db="OVN_IC_Southbound"/> database.  In this case the value
+      will be set to the uuid of the row in <ref table="Route" db="OVN_IC_Southbound"/>
+      table of the <ref db="OVN_IC_Southbound"/> database.
+    </column>
+
     <group title="Common Columns">
       <column name="external_ids">
         See <em>External IDs</em> at the beginning of this document.
diff --git a/tests/ovn-ic.at b/tests/ovn-ic.at
index d506442..1cfdc49 100644
--- a/tests/ovn-ic.at
+++ b/tests/ovn-ic.at
@@ -186,3 +186,120 @@ OVN_CLEANUP_SBOX(gw1)
 OVN_CLEANUP_IC([az1], [az2])
 
 AT_CLEANUP
+
+AT_SETUP([ovn-ic -- route sync])
+
+ovn_init_ic_db
+ovn-ic-nbctl ts-add ts1
+
+for i in 1 2; do
+    ovn_start az$i
+    ovn_as az$i
+
+    # Enable route learning at AZ level
+    ovn-nbctl set nb_global . options:ic-route-learn=true
+    # Enable route advertising at AZ level
+    ovn-nbctl set nb_global . options:ic-route-ad=true
+
+    # Create LRP and connect to TS
+    ovn-nbctl lr-add lr$i
+    ovn-nbctl lrp-add lr$i lrp-lr$i-ts1 aa:aa:aa:aa:aa:0$i 169.254.100.$i/24
+    ovn-nbctl lsp-add ts1 lsp-ts1-lr$i \
+            -- lsp-set-addresses lsp-ts1-lr$i router \
+            -- lsp-set-type lsp-ts1-lr$i router \
+            -- lsp-set-options lsp-ts1-lr$i router-port=lrp-lr$i-ts1
+
+    # Create static routes
+    ovn-nbctl lr-route-add lr$i 10.11.$i.0/24 169.254.0.1
+
+    # Create a src-ip route, which shouldn't be synced
+    ovn-nbctl --policy=src-ip lr-route-add lr$i 10.22.$i.0/24 169.254.0.2
+done
+
+for i in 1 2; do
+    OVS_WAIT_UNTIL([ovn_as az$i ovn-nbctl lr-route-list lr$i | grep learned])
+done
+
+AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr1], [0], [dnl
+IPv4 Routes
+             10.11.1.0/24               169.254.0.1 dst-ip
+             10.11.2.0/24             169.254.100.2 dst-ip (learned)
+             10.22.1.0/24               169.254.0.2 src-ip
+])
+
+# Disable route-learning for AZ1
+ovn_as az1 ovn-nbctl set nb_global . options:ic-route-learn=false
+OVS_WAIT_WHILE([ovn_as az1 ovn-nbctl lr-route-list lr1 | grep learned])
+AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr1], [0], [dnl
+IPv4 Routes
+             10.11.1.0/24               169.254.0.1 dst-ip
+             10.22.1.0/24               169.254.0.2 src-ip
+])
+
+# AZ1 should still advertise and AZ2 should still learn the route
+AT_CHECK([ovn_as az2 ovn-nbctl lr-route-list lr2 | grep learned], [0], [ignore])
+
+# Disable route-advertising for AZ1
+ovn_as az1 ovn-nbctl set nb_global . options:ic-route-ad=false
+
+# AZ2 shouldn't have the route learned, because AZ1 should have stopped
+# advertising.
+OVS_WAIT_WHILE([ovn_as az2 ovn-nbctl lr-route-list lr2 | grep learned])
+AT_CHECK([ovn_as az2 ovn-nbctl lr-route-list lr2], [0], [dnl
+IPv4 Routes
+             10.11.2.0/24               169.254.0.1 dst-ip
+             10.22.2.0/24               169.254.0.2 src-ip
+])
+
+# Add default route in AZ1
+ovn_as az1 ovn-nbctl lr-route-add lr1 0.0.0.0/0 169.254.0.3
+
+# Re-enable router-advertising & learn for AZ1
+ovn_as az1 ovn-nbctl set nb_global . options:ic-route-ad=true
+ovn_as az1 ovn-nbctl set nb_global . options:ic-route-learn=true
+
+for i in 1 2; do
+    OVS_WAIT_UNTIL([ovn_as az$i ovn-nbctl lr-route-list lr$i | grep learned])
+done
+
+# Default route should NOT get advertised or learned, by default.
+AT_CHECK([ovn_as az2 ovn-nbctl lr-route-list lr2], [0], [dnl
+IPv4 Routes
+             10.11.1.0/24             169.254.100.1 dst-ip (learned)
+             10.11.2.0/24               169.254.0.1 dst-ip
+             10.22.2.0/24               169.254.0.2 src-ip
+])
+
+# Enable default route advertising in AZ1
+ovn_as az1 ovn-nbctl set nb_global . options:ic-route-ad-default=true
+OVS_WAIT_UNTIL([ovn-ic-sbctl list route | grep 0.0.0.0])
+
+# Enable default route learning in AZ2
+ovn_as az2 ovn-nbctl set nb_global . options:ic-route-learn-default=true
+OVS_WAIT_UNTIL([ovn_as az2 ovn-nbctl lr-route-list lr2 | grep learned | grep 0.0.0.0])
+
+# Test directly connected subnet route advertising.
+ovn_as az1 ovn-nbctl lrp-add lr1 lrp-lr1-ls1 aa:aa:aa:aa:bb:01 "192.168.0.1/24"
+OVS_WAIT_UNTIL([ovn_as az2 ovn-nbctl lr-route-list lr2 | grep learned | grep 192.168])
+
+# Delete the directly connected subnet from AZ1, learned route should be
+# removed from AZ2.
+ovn_as az1 ovn-nbctl lrp-del lrp-lr1-ls1
+OVS_WAIT_WHILE([ovn_as az2 ovn-nbctl lr-route-list lr2 | grep learned | grep 192.168])
+
+# Test blacklist routes
+# Add back the directly connected 192.168 route.
+ovn_as az1 ovn-nbctl lrp-add lr1 lrp-lr1-ls1 aa:aa:aa:aa:bb:01 "192.168.0.1/24"
+OVS_WAIT_UNTIL([ovn_as az2 ovn-nbctl lr-route-list lr2 | grep learned | grep 192.168])
+# Ensure AZ1 learned AZ2's 10.11.2.0 route as well.
+OVS_WAIT_UNTIL([ovn_as az1 ovn-nbctl lr-route-list lr1 | grep learned | grep 10.11])
+# Now black list 10.11.0.0/16 and 192.168.0.0/16 in AZ2.
+ovn_as az2 ovn-nbctl set nb_global . options:ic-route-blacklist="10.11.0.0/16,192.168.0.0/16"
+# AZ2 shouldn't learn 192.168 route any more.
+OVS_WAIT_WHILE([ovn_as az2 ovn-nbctl lr-route-list lr2 | grep learned | grep 192.168])
+# AZ1 shouldn't learn 10.11 any more.
+OVS_WAIT_WHILE([ovn_as az1 ovn-nbctl lr-route-list lr1 | grep learned | grep 10.11])
+
+OVN_CLEANUP_IC([az1], [az2])
+
+AT_CLEANUP
diff --git a/tests/ovn.at b/tests/ovn.at
index 091351c..f793c89 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -18061,6 +18061,8 @@ done
 
 for az in `seq 1 $n_az`; do
     ovn_as az$az
+    ovn-nbctl set nb_global . options:ic-route-learn=true
+    ovn-nbctl set nb_global . options:ic-route-ad=true
 
     # Each AZ has n_ts LSPi->LSi->LRi connecting to each TSi
     for i in `seq 1 $n_ts`; do
@@ -18089,13 +18091,6 @@ for az in `seq 1 $n_az`; do
         ovn-nbctl lsp-set-type lsp-ts$i-lr$az-$i router
         ovn-nbctl lsp-set-options lsp-ts$i-lr$az-$i router-port=lrp-lr$az-$i-ts$i
         ovn-nbctl lrp-set-gateway-chassis lrp-lr$az-$i-ts$i gw$az
-
-        for remote in `seq 1 $n_az`; do
-            if test $az = $remote; then
-                continue
-            fi
-            ovn-nbctl lr-route-add lr$az-$i 10.$remote.$i.0/24 169.254.$i.$remote
-        done
     done
 done
 
@@ -18155,6 +18150,8 @@ ovn-ic-sbctl list datapath_binding
 echo "---------------------"
 ovn-ic-sbctl list port_binding
 echo "---------------------"
+ovn-ic-sbctl list route
+echo "---------------------"
 
 for az in `seq 1 $n_az`; do
     for i in `seq 1 $n_ts`; do
diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
index f5d58cc..d8d01c8 100644
--- a/utilities/ovn-nbctl.c
+++ b/utilities/ovn-nbctl.c
@@ -5149,6 +5149,10 @@ print_route(const struct nbrec_logical_router_static_route *route, struct ds *s)
     if (route->output_port) {
         ds_put_format(s, " %s", route->output_port);
     }
+
+    if (smap_get(&route->external_ids, "ic-learned-route")) {
+        ds_put_format(s, " (learned)");
+    }
     ds_put_char(s, '\n');
 }
 
-- 
2.1.0



More information about the dev mailing list