[ovs-dev] [PATCH 23/23] ovn: Implement basic logical L3 routing.

Ben Pfaff blp at nicira.com
Sat Oct 10 04:21:42 UTC 2015


This implement basic logical L3 routing.  It has a lot of caveats,
including the following regarding testing:

   * Only single-router hops have been tested.  Chains or trees of
     logical routers may work but definitely need testing and may
     need a little extra code.

   * No testing of logical router ARP replies.

   * Not enough testing in general.

ovn/TODO describes a lot of other caveats in terms of the work needed
to fix them.

Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 ovn/TODO                |   6 -
 ovn/northd/ovn-northd.c | 641 +++++++++++++++++++++++++++++++++++++++++-------
 ovn/ovn-sb.xml          |  28 ++-
 tests/ovn.at            | 168 ++++++++++++-
 4 files changed, 738 insertions(+), 105 deletions(-)

diff --git a/ovn/TODO b/ovn/TODO
index 8ea0d17..1c3a0dc 100644
--- a/ovn/TODO
+++ b/ovn/TODO
@@ -47,12 +47,6 @@ various ways to ensure it could be implemented, e.g. the same as for
 OpenFlow by allowing the logical inport to be zeroed, or by
 introducing a new action that ignores the inport.
 
-** ovn-northd
-
-*** What flows should it generate?
-
-See description in ovn-northd(8).
-
 ** New OVN logical actions
 
 *** arp
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index ac3b39e..8825b52 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -222,13 +222,20 @@ allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
 /* The 'key' comes from nb->header_.uuid or sb->external_ids:logical-switch. */
 struct ovn_datapath {
     struct hmap_node key_node;  /* Index on 'key'. */
-    struct uuid key;            /* nb->header_.uuid. */
+    struct uuid key;            /* (nbs/nbr)->header_.uuid. */
 
-    const struct nbrec_logical_switch *nb;   /* May be NULL. */
+    const struct nbrec_logical_switch *nbs;  /* May be NULL. */
+    const struct nbrec_logical_router *nbr;  /* May be NULL. */
     const struct sbrec_datapath_binding *sb; /* May be NULL. */
 
     struct ovs_list list;       /* In list of similar records. */
 
+    /* Logical router data (digested from nbr). */
+    ovs_be32 gateway;
+
+    /* Logical switch data. */
+    struct ovn_port *router_port;
+
     struct hmap port_tnlids;
     uint32_t port_key_hint;
 
@@ -237,13 +244,15 @@ struct ovn_datapath {
 
 static struct ovn_datapath *
 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
-                    const struct nbrec_logical_switch *nb,
+                    const struct nbrec_logical_switch *nbs,
+                    const struct nbrec_logical_router *nbr,
                     const struct sbrec_datapath_binding *sb)
 {
     struct ovn_datapath *od = xzalloc(sizeof *od);
     od->key = *key;
     od->sb = sb;
-    od->nb = nb;
+    od->nbs = nbs;
+    od->nbr = nbr;
     hmap_init(&od->port_tnlids);
     od->port_key_hint = 0;
     hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
@@ -301,7 +310,8 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
     const struct sbrec_datapath_binding *sb, *sb_next;
     SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
         struct uuid key;
-        if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key)) {
+        if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
+            !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
             ovsdb_idl_txn_add_comment(ctx->ovnsb_txn,
                                       "deleting Datapath_Binding "UUID_FMT" that "
                                       "lacks external-ids:logical-switch",
@@ -320,23 +330,62 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
         }
 
         struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
-                                                      NULL, sb);
+                                                      NULL, NULL, sb);
         list_push_back(sb_only, &od->list);
     }
 
-    const struct nbrec_logical_switch *nb;
-    NBREC_LOGICAL_SWITCH_FOR_EACH (nb, ctx->ovnnb_idl) {
+    const struct nbrec_logical_switch *nbs;
+    NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
         struct ovn_datapath *od = ovn_datapath_find(datapaths,
-                                                    &nb->header_.uuid);
+                                                    &nbs->header_.uuid);
         if (od) {
-            od->nb = nb;
+            od->nbs = nbs;
             list_remove(&od->list);
             list_push_back(both, &od->list);
         } else {
-            od = ovn_datapath_create(datapaths, &nb->header_.uuid, nb, NULL);
+            od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
+                                     nbs, NULL, NULL);
             list_push_back(nb_only, &od->list);
         }
     }
+
+    const struct nbrec_logical_router *nbr;
+    NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
+        struct ovn_datapath *od = ovn_datapath_find(datapaths,
+                                                    &nbr->header_.uuid);
+        if (od) {
+            if (!od->nbs) {
+                od->nbr = nbr;
+                list_remove(&od->list);
+                list_push_back(both, &od->list);
+            } else {
+                /* Can't happen! */
+                static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl,
+                             "duplicate UUID "UUID_FMT" in OVN_Northbound",
+                             UUID_ARGS(&nbr->header_.uuid));
+                continue;
+            }
+        } else {
+            od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
+                                     NULL, nbr, NULL);
+            list_push_back(nb_only, &od->list);
+        }
+
+        od->gateway = 0;
+        if (nbr->default_gw) {
+            ovs_be32 ip, mask;
+            char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
+            if (error || !ip || mask != OVS_BE32_MAX) {
+                static struct vlog_rate_limit rl
+                    = VLOG_RATE_LIMIT_INIT(5, 1);
+                VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
+                free(error);
+            } else {
+                od->gateway = ip;
+            }
+        }
+    }
 }
 
 static uint32_t
@@ -371,8 +420,9 @@ build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
             od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
 
             char uuid_s[UUID_LEN + 1];
-            sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->nb->header_.uuid));
-            const struct smap id = SMAP_CONST1(&id, "logical-switch", uuid_s);
+            sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
+            const char *key = od->nbs ? "logical-switch" : "logical-router";
+            const struct smap id = SMAP_CONST1(&id, key, uuid_s);
             sbrec_datapath_binding_set_external_ids(od->sb, &id);
 
             sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
@@ -391,10 +441,19 @@ build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
 
 struct ovn_port {
     struct hmap_node key_node;  /* Index on 'key'. */
-    const char *key;            /* nb->name and sb->logical_port */
+    char *key;                  /* nb->name and sb->logical_port */
+    char *json_key;             /* 'key', quoted for use in JSON. */
+
+    const struct nbrec_logical_port *nbs;        /* May be NULL. */
+    const struct nbrec_logical_router_port *nbr; /* May be NULL. */
+    const struct sbrec_port_binding *sb;         /* May be NULL. */
 
-    const struct nbrec_logical_port *nb; /* May be NULL. */
-    const struct sbrec_port_binding *sb; /* May be NULL. */
+    /* Logical router port data. */
+    ovs_be32 ip, mask;          /* 192.168.10.123/24. */
+    ovs_be32 network;           /* 192.168.10.0. */
+    ovs_be32 bcast;             /* 192.168.10.255. */
+    struct eth_addr mac;
+    struct ovn_port *peer;
 
     struct ovn_datapath *od;
 
@@ -403,13 +462,20 @@ struct ovn_port {
 
 static struct ovn_port *
 ovn_port_create(struct hmap *ports, const char *key,
-                const struct nbrec_logical_port *nb,
+                const struct nbrec_logical_port *nbs,
+                const struct nbrec_logical_router_port *nbr,
                 const struct sbrec_port_binding *sb)
 {
     struct ovn_port *op = xzalloc(sizeof *op);
-    op->key = key;
+
+    struct ds json_key = DS_EMPTY_INITIALIZER;
+    json_string_escape(key, &json_key);
+    op->json_key = ds_steal_cstr(&json_key);
+
+    op->key = xstrdup(key);
     op->sb = sb;
-    op->nb = nb;
+    op->nbs = nbs;
+    op->nbr = nbr;
     hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
     return op;
 }
@@ -422,6 +488,8 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
          * private list and once we've exited that function it is not safe to
          * use it. */
         hmap_remove(ports, &port->key_node);
+        free(port->json_key);
+        free(port->key);
         free(port);
     }
 }
@@ -460,24 +528,111 @@ join_logical_ports(struct northd_context *ctx,
     const struct sbrec_port_binding *sb;
     SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
         struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
-                                              NULL, sb);
+                                              NULL, NULL, sb);
         list_push_back(sb_only, &op->list);
     }
 
     struct ovn_datapath *od;
     HMAP_FOR_EACH (od, key_node, datapaths) {
-        for (size_t i = 0; i < od->nb->n_ports; i++) {
-            const struct nbrec_logical_port *nb = od->nb->ports[i];
-            struct ovn_port *op = ovn_port_find(ports, nb->name);
-            if (op) {
-                op->nb = nb;
-                list_remove(&op->list);
-                list_push_back(both, &op->list);
-            } else {
-                op = ovn_port_create(ports, nb->name, nb, NULL);
-                list_push_back(nb_only, &op->list);
+        if (od->nbs) {
+            for (size_t i = 0; i < od->nbs->n_ports; i++) {
+                const struct nbrec_logical_port *nbs = od->nbs->ports[i];
+                struct ovn_port *op = ovn_port_find(ports, nbs->name);
+                if (op) {
+                    if (op->nbs || op->nbr) {
+                        static struct vlog_rate_limit rl
+                            = VLOG_RATE_LIMIT_INIT(5, 1);
+                        VLOG_WARN_RL(&rl, "duplicate logical port %s",
+                                     nbs->name);
+                        continue;
+                    }
+                    op->nbs = nbs;
+                    list_remove(&op->list);
+                    list_push_back(both, &op->list);
+                } else {
+                    op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
+                    list_push_back(nb_only, &op->list);
+                }
+
+                op->od = od;
+            }
+        } else {
+            for (size_t i = 0; i < od->nbr->n_ports; i++) {
+                const struct nbrec_logical_router_port *nbr
+                    = od->nbr->ports[i];
+
+                struct eth_addr mac;
+                if (!eth_addr_from_string(nbr->mac, &mac)) {
+                    static struct vlog_rate_limit rl
+                        = VLOG_RATE_LIMIT_INIT(5, 1);
+                    VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
+                    continue;
+                }
+
+                ovs_be32 ip, mask;
+                char *error = ip_parse_masked(nbr->network, &ip, &mask);
+                if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
+                    static struct vlog_rate_limit rl
+                        = VLOG_RATE_LIMIT_INIT(5, 1);
+                    VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
+                    free(error);
+                    continue;
+                }
+
+                char name[UUID_LEN + 1];
+                snprintf(name, sizeof name, UUID_FMT,
+                         UUID_ARGS(&nbr->header_.uuid));
+                struct ovn_port *op = ovn_port_find(ports, name);
+                if (op) {
+                    if (op->nbs || op->nbr) {
+                        static struct vlog_rate_limit rl
+                            = VLOG_RATE_LIMIT_INIT(5, 1);
+                        VLOG_WARN_RL(&rl, "duplicate logical router port %s",
+                                     name);
+                        continue;
+                    }
+                    op->nbr = nbr;
+                    list_remove(&op->list);
+                    list_push_back(both, &op->list);
+                } else {
+                    op = ovn_port_create(ports, name, NULL, nbr, NULL);
+                    list_push_back(nb_only, &op->list);
+                }
+
+                op->ip = ip;
+                op->mask = mask;
+                op->network = ip & mask;
+                op->bcast = ip | ~mask;
+                op->mac = mac;
+
+                op->od = od;
             }
-            op->od = od;
+        }
+    }
+
+    /* Connect logical router ports, and logical switch ports of type "router",
+     * to their peers. */
+    struct ovn_port *op;
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (op->nbs && !strcmp(op->nbs->type, "router")) {
+            const char *peer_name = smap_get(&op->nbs->options, "router-port");
+            if (!peer_name) {
+                continue;
+            }
+
+            struct ovn_port *peer = ovn_port_find(ports, peer_name);
+            if (!peer || !peer->nbr) {
+                continue;
+            }
+
+            peer->peer = op;
+            op->peer = peer;
+            op->od->router_port = op;
+        } else if (op->nbr && op->nbr->peer) {
+            char peer_name[UUID_LEN + 1];
+            snprintf(peer_name, sizeof peer_name, UUID_FMT,
+                     UUID_ARGS(&op->nbr->peer->header_.uuid));
+            op->peer = ovn_port_find(ports, peer_name);
         }
     }
 }
@@ -485,13 +640,37 @@ join_logical_ports(struct northd_context *ctx,
 static void
 ovn_port_update_sbrec(const struct ovn_port *op)
 {
-    sbrec_port_binding_set_type(op->sb, op->nb->type);
-    sbrec_port_binding_set_options(op->sb, &op->nb->options);
     sbrec_port_binding_set_datapath(op->sb, op->od->sb);
-    sbrec_port_binding_set_parent_port(op->sb, op->nb->parent_name);
-    sbrec_port_binding_set_tag(op->sb, op->nb->tag, op->nb->n_tag);
-    sbrec_port_binding_set_mac(op->sb, (const char **) op->nb->addresses,
-                               op->nb->n_addresses);
+    if (op->nbr) {
+        sbrec_port_binding_set_type(op->sb, "patch");
+
+        const char *peer = op->peer ? op->peer->key : "<error>";
+        const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
+        sbrec_port_binding_set_options(op->sb, &ids);
+
+        sbrec_port_binding_set_parent_port(op->sb, NULL);
+        sbrec_port_binding_set_tag(op->sb, NULL, 0);
+        sbrec_port_binding_set_mac(op->sb, NULL, 0);
+    } else {
+        if (strcmp(op->nbs->type, "router")) {
+            sbrec_port_binding_set_type(op->sb, op->nbs->type);
+            sbrec_port_binding_set_options(op->sb, &op->nbs->options);
+        } else {
+            sbrec_port_binding_set_type(op->sb, "patch");
+
+            const char *router_port = smap_get(&op->nbs->options,
+                                               "router-port");
+            if (!router_port) {
+                router_port = "<error>";
+            }
+            const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
+            sbrec_port_binding_set_options(op->sb, &ids);
+        }
+        sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
+        sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
+        sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
+                                   op->nbs->n_addresses);
+    }
 }
 
 static void
@@ -759,57 +938,63 @@ lport_is_enabled(const struct nbrec_logical_port *lport)
     return !lport->enabled || *lport->enabled;
 }
 
-/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
- * constructing their contents based on the OVN_NB database. */
 static void
-build_lflows(struct northd_context *ctx, struct hmap *datapaths,
-             struct hmap *ports)
+build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+                    struct hmap *lflows, struct hmap *mcgroups)
 {
     /* This flow table structure is documented in ovn-northd(8), so please
      * update ovn-northd.8.xml if you change anything. */
 
-    struct hmap lflows = HMAP_INITIALIZER(&lflows);
-    struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
-
-    /* Ingress table 0: Admission control framework (priorities 0 and 100). */
+    /* Logical switch ingress table 0: Admission control framework (priority
+     * 100). */
     struct ovn_datapath *od;
     HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbs) {
+            continue;
+        }
+
         /* Logical VLANs not supported. */
-        ovn_lflow_add(&lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
                       "drop;");
 
         /* Broadcast/multicast source address is invalid. */
-        ovn_lflow_add(&lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
                       "drop;");
 
         /* Port security flows have priority 50 (see below) and will continue
          * to the next table if packet source is acceptable. */
     }
 
-    /* Ingress table 0: Ingress port security (priority 50). */
+    /* Logical switch ingress table 0: Ingress port security (priority 50). */
     struct ovn_port *op;
     HMAP_FOR_EACH (op, key_node, ports) {
-        if (!lport_is_enabled(op->nb)) {
+        if (!op->nbs) {
+            continue;
+        }
+
+        if (!lport_is_enabled(op->nbs)) {
             /* Drop packets from disabled logical ports (since logical flow
              * tables are default-drop). */
             continue;
         }
 
         struct ds match = DS_EMPTY_INITIALIZER;
-        ds_put_cstr(&match, "inport == ");
-        json_string_escape(op->key, &match);
+        ds_put_format(&match, "inport == %s", op->json_key);
         build_port_security("eth.src",
-                            op->nb->port_security, op->nb->n_port_security,
+                            op->nbs->port_security, op->nbs->n_port_security,
                             &match);
-        ovn_lflow_add(&lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
+        ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
                       ds_cstr(&match), "next;");
         ds_destroy(&match);
     }
 
     /* Ingress table 1: ACLs (any priority). */
     HMAP_FOR_EACH (od, key_node, datapaths) {
-        for (size_t i = 0; i < od->nb->n_acls; i++) {
-            const struct nbrec_acl *acl = od->nb->acls[i];
+        if (!od->nbs) {
+            continue;
+        }
+        for (size_t i = 0; i < od->nbs->n_acls; i++) {
+            const struct nbrec_acl *acl = od->nbs->acls[i];
             const char *action;
 
             if (strcmp(acl->direction, "from-lport")) {
@@ -819,48 +1004,55 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
             action = (!strcmp(acl->action, "allow") ||
                       !strcmp(acl->action, "allow-related"))
                 ? "next;" : "drop;";
-            ovn_lflow_add(&lflows, od, S_SWITCH_IN_ACL, acl->priority,
+            ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, acl->priority,
                           acl->match, action);
         }
     }
     HMAP_FOR_EACH (od, key_node, datapaths) {
-        ovn_lflow_add(&lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
     }
 
     /* Ingress table 2: Destination lookup, broadcast and multicast handling
      * (priority 100). */
     HMAP_FOR_EACH (op, key_node, ports) {
-        if (lport_is_enabled(op->nb)) {
-            ovn_multicast_add(&mcgroups, &mc_flood, op);
+        if (!op->nbs) {
+            continue;
+        }
+
+        if (lport_is_enabled(op->nbs)) {
+            ovn_multicast_add(mcgroups, &mc_flood, op);
         }
     }
     HMAP_FOR_EACH (od, key_node, datapaths) {
-        ovn_lflow_add(&lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
                       "outport = \""MC_FLOOD"\"; output;");
     }
 
     /* Ingress table 2: Destination lookup, unicast handling (priority 50), */
     HMAP_FOR_EACH (op, key_node, ports) {
-        for (size_t i = 0; i < op->nb->n_addresses; i++) {
+        if (!op->nbs) {
+            continue;
+        }
+
+        for (size_t i = 0; i < op->nbs->n_addresses; i++) {
             struct eth_addr mac;
 
-            if (eth_addr_from_string(op->nb->addresses[i], &mac)) {
+            if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
                 struct ds match, actions;
 
                 ds_init(&match);
-                ds_put_format(&match, "eth.dst == %s", op->nb->addresses[i]);
+                ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
+                              ETH_ADDR_ARGS(mac));
 
                 ds_init(&actions);
-                ds_put_cstr(&actions, "outport = ");
-                json_string_escape(op->nb->name, &actions);
-                ds_put_cstr(&actions, "; output;");
-                ovn_lflow_add(&lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
+                ds_put_format(&actions, "outport = %s; output;", op->json_key);
+                ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
                               ds_cstr(&match), ds_cstr(&actions));
                 ds_destroy(&actions);
                 ds_destroy(&match);
-            } else if (!strcmp(op->nb->addresses[i], "unknown")) {
-                if (lport_is_enabled(op->nb)) {
-                    ovn_multicast_add(&mcgroups, &mc_unknown, op);
+            } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
+                if (lport_is_enabled(op->nbs)) {
+                    ovn_multicast_add(mcgroups, &mc_unknown, op);
                     op->od->has_unknown = true;
                 }
             } else {
@@ -868,23 +1060,30 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
 
                 VLOG_INFO_RL(&rl,
                              "%s: invalid syntax '%s' in addresses column",
-                             op->nb->name, op->nb->addresses[i]);
+                             op->nbs->name, op->nbs->addresses[i]);
             }
         }
     }
 
     /* Ingress table 2: Destination lookup for unknown MACs (priority 0). */
     HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbs) {
+            continue;
+        }
+
         if (od->has_unknown) {
-            ovn_lflow_add(&lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
+            ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
                           "outport = \""MC_UNKNOWN"\"; output;");
         }
     }
 
     /* Egress table 0: ACLs (any priority). */
     HMAP_FOR_EACH (od, key_node, datapaths) {
-        for (size_t i = 0; i < od->nb->n_acls; i++) {
-            const struct nbrec_acl *acl = od->nb->acls[i];
+        if (!od->nbs) {
+            continue;
+        }
+        for (size_t i = 0; i < od->nbs->n_acls; i++) {
+            const struct nbrec_acl *acl = od->nbs->acls[i];
             const char *action;
 
             if (strcmp(acl->direction, "to-lport")) {
@@ -894,18 +1093,26 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
             action = (!strcmp(acl->action, "allow") ||
                       !strcmp(acl->action, "allow-related"))
                 ? "next;" : "drop;";
-            ovn_lflow_add(&lflows, od, S_SWITCH_OUT_ACL, acl->priority,
+            ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, acl->priority,
                           acl->match, action);
         }
     }
     HMAP_FOR_EACH (od, key_node, datapaths) {
-        ovn_lflow_add(&lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
+        if (!od->nbs) {
+            continue;
+        }
+
+        ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
     }
 
     /* Egress table 1: Egress port security multicast/broadcast (priority
      * 100). */
     HMAP_FOR_EACH (od, key_node, datapaths) {
-        ovn_lflow_add(&lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
+        if (!od->nbs) {
+            continue;
+        }
+
+        ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
                       "output;");
     }
 
@@ -916,24 +1123,276 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
      * Priority 150 rules drop packets to disabled logical ports, so that they
      * don't even receive multicast or broadcast packets. */
     HMAP_FOR_EACH (op, key_node, ports) {
-        struct ds match;
-
-        ds_init(&match);
-        ds_put_cstr(&match, "outport == ");
-        json_string_escape(op->key, &match);
-        if (lport_is_enabled(op->nb)) {
-            build_port_security("eth.dst",
-                                op->nb->port_security, op->nb->n_port_security,
-                                &match);
-            ovn_lflow_add(&lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
+        if (!op->nbs) {
+            continue;
+        }
+
+        struct ds match = DS_EMPTY_INITIALIZER;
+        ds_put_format(&match, "outport == %s", op->json_key);
+        if (lport_is_enabled(op->nbs)) {
+            build_port_security("eth.dst", op->nbs->port_security,
+                                op->nbs->n_port_security, &match);
+            ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
                           ds_cstr(&match), "output;");
         } else {
-            ovn_lflow_add(&lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
+            ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
                           ds_cstr(&match), "drop;");
         }
 
         ds_destroy(&match);
     }
+}
+
+static bool
+lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
+{
+    return !lrport->enabled || *lrport->enabled;
+}
+
+static void
+add_route(struct hmap *lflows, struct ovn_datapath *od,
+          ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
+{
+    char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
+                            IP_ARGS(network), IP_ARGS(mask));
+
+    struct ds actions = DS_EMPTY_INITIALIZER;
+    ds_put_cstr(&actions, "ip4.ttl--; reg0 = ");
+    if (gateway) {
+        ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
+    } else {
+        ds_put_cstr(&actions, "ip4.dst");
+    }
+    ds_put_cstr(&actions, "; next;");
+
+    ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
+                  count_1bits(ntohl(mask)), match, ds_cstr(&actions));
+    ds_destroy(&actions);
+    free(match);
+}
+
+static void
+build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
+                    struct hmap *lflows)
+{
+    /* This flow table structure is documented in ovn-northd(8), so please
+     * update ovn-northd.8.xml if you change anything. */
+
+    /* XXX ICMP echo reply */
+
+    /* Logical router ingress table 0: Admission control framework. */
+    struct ovn_datapath *od;
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbr) {
+            continue;
+        }
+
+        /* Logical VLANs not supported.
+         * Broadcast/multicast source address is invalid. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
+                      "vlan.present || eth.src[40]", "drop;");
+    }
+
+    /* Logical router ingress table 0: match (priority 50). */
+    struct ovn_port *op;
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!op->nbr) {
+            continue;
+        }
+
+        if (!lrport_is_enabled(op->nbr)) {
+            /* Drop packets from disabled logical ports (since logical flow
+             * tables are default-drop). */
+            continue;
+        }
+
+        char *match = xasprintf(
+            "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
+            ETH_ADDR_ARGS(op->mac), op->json_key);
+        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
+                      match, "next;");
+    }
+
+    /* Logical router ingress table 1: IP Input. */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbr) {
+            continue;
+        }
+
+        /* L3 admission control: drop multicast and broadcast source, localhost
+         * source or destination, and zero network source or destination
+         * (priority 220). */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 220,
+                      "ip4.mcast || "
+                      "ip4.src == 255.255.255.255 || "
+                      "ip4.src == 127.0.0.0/8 || "
+                      "ip4.dst == 127.0.0.0/8 || "
+                      "ip4.src == 0.0.0.0/8 || "
+                      "ip4.dst == 0.0.0.0/8",
+                      "drop;");
+
+        /* Drop Ethernet local broadcast.  By definition this traffic should
+         * not be forwarded.*/
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 190,
+                      "eth.bcast", "drop;");
+
+        /* Drop IP multicast. */
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 190,
+                      "ip4.mcast", "drop;");
+
+        /* TTL discard.
+         *
+         * XXX Need to send ICMP time exceeded if !ip.later_frag. */
+        char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 170, match, "drop;");
+        free(match);
+    }
+
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!op->nbr) {
+            continue;
+        }
+
+        /* L3 admission control: drop packets that originate from an IP address
+         * owned by the router or a broadcast address known to the router
+         * (priority 220). */
+        char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
+                                IP_ARGS(op->ip), IP_ARGS(op->bcast));
+        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 220,
+                      match, "drop;");
+        free(match);
+
+        /* ARP reply.  These flows reply to ARP requests for the router's own
+         * IP address. */
+        match = xasprintf(
+            "inport == %s && arp.tha == "ETH_ADDR_FMT" && arp.op == 1",
+            op->json_key, ETH_ADDR_ARGS(op->mac));
+        char *actions = xasprintf(
+            "eth.dst = eth.src; "
+            "eth.src = "ETH_ADDR_FMT"; "
+            "arp.op = 2; /* ARP reply */ "
+            "arp.tha = arp.sha; "
+            "arp.sha = "ETH_ADDR_FMT"; "
+            "arp.tpa = arp.spa; "
+            "arp.spa = "IP_FMT"; "
+            "outport = %s; "
+            "inport = \"\"; /* Allow sending out inport. */ "
+            "output;",
+            ETH_ADDR_ARGS(op->mac),
+            ETH_ADDR_ARGS(op->mac),
+            IP_ARGS(op->ip),
+            op->json_key);
+        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 210,
+                      match, actions);
+
+        /* Drop IP traffic to this router. */
+        match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
+        ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 200,
+                      match, "drop;");
+        free(match);
+    }
+
+    /* Logical router ingress table 2: IP Routing.
+     *
+     * A packet that arrives at this table is an IP packet that should be
+     * routed to the address in ip4.dst. This table sets reg0 to the next-hop
+     * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
+     * and advances to the next table for ARP resolution. */
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!op->nbr) {
+            continue;
+        }
+
+        add_route(lflows, op->od, op->network, op->mask, 0);
+    }
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbr) {
+            continue;
+        }
+
+        if (od->gateway) {
+            add_route(lflows, od, 0, 0, od->gateway);
+        }
+    }
+    /* XXX destination unreachable */
+
+    /* Local router ingress table 3: ARP Resolution.
+     *
+     * Any packet that reaches this table is an IP packet whose next-hop IP
+     * address is in reg0. (ip4.dst is the final destination.) This table
+     * resolves the IP address in reg0 into an output port in outport and an
+     * Ethernet address in eth.dst. */
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (op->nbr) {
+            /* XXX ARP for neighboring router */
+        } else if (op->od->router_port) {
+            const char *peer_name = smap_get(
+                &op->od->router_port->nbs->options, "router-port");
+            if (!peer_name) {
+                continue;
+            }
+
+            struct ovn_port *peer = ovn_port_find(ports, peer_name);
+            if (!peer || !peer->nbr) {
+                continue;
+            }
+
+            for (size_t i = 0; i < op->nbs->n_addresses; i++) {
+                struct eth_addr ea;
+                ovs_be32 ip;
+
+                if (ovs_scan(op->nbs->addresses[i],
+                             ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
+                             ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
+                    char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
+                    char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
+                                              "eth.dst = "ETH_ADDR_FMT"; "
+                                              "outport = %s; "
+                                              "output;",
+                                              ETH_ADDR_ARGS(peer->mac),
+                                              ETH_ADDR_ARGS(ea),
+                                              peer->json_key);
+                    ovn_lflow_add(lflows, peer->od,
+                                  S_ROUTER_IN_ARP, 200, match, actions);
+                    free(actions);
+                    free(match);
+                }
+            }
+        }
+    }
+
+    /* Logical router egress table 0: Delivery (priority 100).
+     *
+     * Priority 100 rules deliver packets to enabled logical ports. */
+    HMAP_FOR_EACH (op, key_node, ports) {
+        if (!op->nbr) {
+            continue;
+        }
+
+        if (!lrport_is_enabled(op->nbr)) {
+            /* Drop packets to disabled logical ports (since logical flow
+             * tables are default-drop). */
+            continue;
+        }
+
+        char *match = xasprintf("outport == %s", op->json_key);
+        ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
+                      match, "output;");
+        free(match);
+    }
+}
+
+/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
+ * constructing their contents based on the OVN_NB database. */
+static void
+build_lflows(struct northd_context *ctx, struct hmap *datapaths,
+             struct hmap *ports)
+{
+    struct hmap lflows = HMAP_INITIALIZER(&lflows);
+    struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
+
+    build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
+    build_lrouter_flows(datapaths, ports, &lflows);
 
     /* Push changes to the Logical_Flow table to database. */
     const struct sbrec_logical_flow *sbflow, *next_sbflow;
@@ -945,7 +1404,7 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
             continue;
         }
 
-        enum ovn_datapath_type dp_type = DP_SWITCH; /* XXX no routers yet. */
+        enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
         enum ovn_pipeline pipeline
             = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
         struct ovn_lflow *lflow = ovn_lflow_find(
@@ -964,8 +1423,8 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
 
         sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
         sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
-        sbrec_logical_flow_set_pipeline(sbflow,
-                                        pipeline ? "ingress" : "egress");
+        sbrec_logical_flow_set_pipeline(
+            sbflow, pipeline == P_IN ? "ingress" : "egress");
         sbrec_logical_flow_set_table_id(sbflow, table);
         sbrec_logical_flow_set_priority(sbflow, lflow->priority);
         sbrec_logical_flow_set_match(sbflow, lflow->match);
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index f898f97..7d9d22f 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -1025,12 +1025,28 @@ tcp.flags = RST;
       constructed for each supported encapsulation.
     </column>
 
-    <column name="external_ids" key="logical-switch" type='{"type": "uuid"}'>
-      Each row in <ref table="Datapath_Binding"/> is associated with some
-      logical datapath.  <code>ovn-northd</code> uses this key to store the
-      UUID of the logical datapath <ref table="Logical_Switch"
-      db="OVN_Northbound"/> row in the <ref db="OVN_Northbound"/> database.
-    </column>
+    <group title="OVN_Northbound Relationship">
+      <p>
+        Each row in <ref table="Datapath_Binding"/> is associated with some
+        logical datapath.  <code>ovn-northd</code> uses these key to track the
+        association of a logical datapath with concepts in the <ref
+        db="OVN_Northbound"/> database.
+      </p>
+
+      <column name="external_ids" key="logical-switch" type='{"type": "uuid"}'>
+        For a logical datapath that represents a logical switch,
+        <code>ovn-northd</code> stores in this key the UUID of the
+        corresponding <ref table="Logical_Switch" db="OVN_Northbound"/> row in
+        the <ref db="OVN_Northbound"/> database.
+      </column>
+
+      <column name="external_ids" key="logical-router" type='{"type": "uuid"}'>
+        For a logical datapath that represents a logical router,
+        <code>ovn-northd</code> stores in this key the UUID of the
+        corresponding <ref table="Logical_Router" db="OVN_Northbound"/> row in
+        the <ref db="OVN_Northbound"/> database.
+      </column>
+    </group>
 
     <group title="Common Columns">
       The overall purpose of these columns is described under <code>Common
diff --git a/tests/ovn.at b/tests/ovn.at
index a42a319..319ab4b 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -510,12 +510,13 @@ AT_CLEANUP
 
 AT_BANNER([OVN end-to-end tests])
 
-AT_SETUP([ovn -- 3 HVs, 3 VIFs/HV, 1 logical switch])
+# 3 hypervisors, one logical switch, 3 logical ports per hypervisor
+AT_SETUP([ovn -- 3 HVs, 1 LS, 3 lports/HV])
 AT_SKIP_IF([test $HAVE_PYTHON = no])
 ovn_start
 
 # Create hypervisors hv[123].
-# Add vif1[123] to hv1, vif2[123] to hv2, vif3[123].
+# Add vif1[123] to hv1, vif2[123] to hv2, vif3[123] to hv3.
 # Add all of the vifs to a single logical switch lsw0.
 # Turn on port security on all the vifs except vif[123]1.
 # Make vif13, vif2[23], vif3[123] destinations for unknown MACs.
@@ -676,3 +677,166 @@ for i in 1 2 3; do
     done
 done
 AT_CLEANUP
+
+# 3 hypervisors, 3 logical switches with 3 logical ports each, 1 logical router
+AT_SETUP([ovn -- 3 HVs, 3 LS, 3 lports/LS, 1 LR])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+# Logical network:
+#
+# Three logical switches ls1, ls2, ls3.
+# Three VIFs on each: lp1[123], lp2[123], lp3[123].
+# One logical router lr connected to ls[123].
+ovn-nbctl \
+    -- create Logical_Router name=lr0 ports=@lrp1, at lrp2, at lrp3 \
+    -- --id=@lrp1 create Logical_Router_Port name=lrp1 \
+       network=192.168.1.254/24 mac='"00:00:00:00:ff:01"' \
+    -- --id=@lrp2 create Logical_Router_Port name=lrp2 \
+       network=192.168.2.254/24 mac='"00:00:00:00:ff:02"' \
+    -- --id=@lrp3 create Logical_Router_Port name=lrp3 \
+       network=192.168.3.254/24 mac='"00:00:00:00:ff:03"'
+for i in 1 2 3; do
+    lrp_uuid=`ovn-nbctl get Logical_Router_Port lrp$i _uuid`
+    ovn-nbctl \
+        -- lswitch-add ls$i \
+	-- lport-add ls$i lrp$i-attachment \
+	-- set Logical_Port lrp$i-attachment type=router \
+	                                     options:router-port=$lrp_uuid \
+					     addresses='"00:00:00:00:ff:0'$i'"'
+    for j in 1 2 3; do
+        ovn-nbctl \
+	    -- lport-add ls$i lp$i$j \
+	    -- lport-set-addresses lp$i$j "f0:00:00:00:00:$i$j 192.168.$i.$j"
+    done
+done
+
+# Physical network:
+#
+# Three hypervisors hv[123].
+# lp1[123] spread across hv[123]: lp11 on hv1, lp12 on hv2, lp13 on hv3.
+# lp2[123] spread across hv[23]: lp21 and lp22 on hv2, lp23 on hv3.
+# lp3[123] all on hv3.
+
+# Given the name of a logical port, prints the name of the hypervisor
+# on which it is located.
+vif_to_hv() {
+    case $1 in dnl (
+	11) echo 1 ;; dnl (
+	12 | 21 | 22) echo 2 ;; dnl (
+	13 | 23 | 3?) echo 3 ;;
+    esac
+}
+
+net_add n1
+for i in 1 2 3; do
+    sim_add hv$i
+    as hv$i
+    ovs-vsctl add-br br-phys
+    ovn_attach n1 br-phys 192.168.0.$i
+done
+for i in 1 2 3; do
+    for j in 1 2 3; do
+        hv=`vif_to_hv $i$j`
+        as hv$hv ovs-vsctl \
+	    -- add-port br-int vif$i$j \
+	    -- set Interface vif$i$j external-ids:iface-id=lp$i$j \
+	                             options:tx_pcap=hv$hv/vif$i$j-tx.pcap \
+				     options:rxq_pcap=hv$hv/vif$i$j-rx.pcap \
+				     ofport-request=$i$j
+    done
+done
+
+# Pre-populate the hypervisors' ARP tables so that we don't lose any
+# packets for ARP resolution (native tunneling doesn't queue packets
+# for ARP resolution).
+ovn_populate_arp
+
+# Allow some time for ovn-northd and ovn-controller to catch up.
+# XXX This should be more systematic.
+sleep 1
+
+# test_packet INPORT SRC_MAC DST_MAC SRC_IP DST_IP OUTPORT...
+#
+# This shell function causes a packet to be received on INPORT.  The packet's
+# content has Ethernet destination DST and source SRC (each exactly 12 hex
+# digits) and Ethernet type ETHTYPE (4 hex digits).  The OUTPORTs (zero or
+# more) list the VIFs on which the packet should be received.  INPORT and the
+# OUTPORTs are specified as lport numbers, e.g. 11 for vif11.
+trim_zeros() {
+    sed 's/\(00\)\{1,\}$//'
+}
+for i in 1 2 3; do
+    for j in 1 2 3; do
+        : > $i$j.expected
+    done
+done
+test_packet() {
+    # This packet has bad checksums but logical L3 routing doesn't check.
+    local inport=$1 src_mac=$2 dst_mac=$3 src_ip=$4 dst_ip=$5
+    local packet=$3$208004500001c0000000040110000$4$50035111100080000
+    shift; shift; shift; shift; shift
+    hv=hv`vif_to_hv $inport`
+    as $hv ovs-appctl netdev-dummy/receive vif$inport $packet
+    #as $hv ovs-appctl ofproto/trace br-int in_port=$inport $packet
+    for outport; do
+        ins=`echo $inport | sed 's/^\(.\).*/\1/'`
+	outs=`echo $outport | sed 's/^\(.\).*/\1/'`
+	if test $ins = $outs; then
+	    # Ports on the same logical switch receive exactly the same packet.
+            echo $packet
+	else
+	    # Routing decrements TTL and updates source and dest MAC
+	    # (and checksum).
+	    echo f000000000${outport}00000000ff0${outs}08004500001c00000000"3f1101"00${src_ip}${dst_ip}0035111100080000
+	fi | trim_zeros >> $outport.expected
+    done
+}
+
+as hv1 ovn-sbctl dump-flows
+as hv1 ovs-ofctl dump-flows br-int
+
+# Send packets between all pairs of source and destination ports:
+#
+# 1. Unicast IP packets are delivered to exactly one lport (except
+#    that packets destined to their input ports are dropped).
+#
+# 2. Broadcast IP packets are delivered to all lports except the input port.
+for is in 1 2 3; do
+    for js in 1 2 3; do
+        bcast=
+        s=$is$js
+	smac=f000000000$s
+	sip=c0a80${is}0${js}
+        for id in 1 2 3; do
+            for jd in 1 2 3; do
+                d=$id$jd
+		dip=c0a80${id}0${jd}
+		if test $is = $id; then dmac=f000000000$d; else dmac=00000000ff0$is; fi
+                if test $d != $s; then unicast=$d; else unicast=; fi
+
+                test_packet $s $smac $dmac $sip $dip $unicast #1
+
+                if test $id = $is && test $jd != $js; then bcast="$bcast $d"; fi
+            done
+        done
+	test_packet $s $smac ffffffffffff $sip ffffffff $bcast #2
+    done
+done
+
+# Allow some time for packet forwarding.
+# XXX This can be improved.
+sleep 1
+
+# Now check the packets actually received against the ones expected.
+for i in 1 2 3; do
+    for j in 1 2 3; do
+        file=hv`vif_to_hv $i$j`/vif$i$j-tx.pcap
+        echo $file
+        $PYTHON "$top_srcdir/utilities/ovs-pcap.in" $file | trim_zeros > $i$j.packets
+        cp $i$j.expected expout
+        AT_CHECK([cat $i$j.packets], [0], [expout])
+	echo
+    done
+done
+AT_CLEANUP
-- 
2.1.3




More information about the dev mailing list