[ovs-dev] [PATCH 23/23] ovn: Implement basic logical L3 routing.
Ben Pfaff
blp at nicira.com
Sat Oct 10 04:21:42 UTC 2015
This implement basic logical L3 routing. It has a lot of caveats,
including the following regarding testing:
* Only single-router hops have been tested. Chains or trees of
logical routers may work but definitely need testing and may
need a little extra code.
* No testing of logical router ARP replies.
* Not enough testing in general.
ovn/TODO describes a lot of other caveats in terms of the work needed
to fix them.
Signed-off-by: Ben Pfaff <blp at nicira.com>
---
ovn/TODO | 6 -
ovn/northd/ovn-northd.c | 641 +++++++++++++++++++++++++++++++++++++++++-------
ovn/ovn-sb.xml | 28 ++-
tests/ovn.at | 168 ++++++++++++-
4 files changed, 738 insertions(+), 105 deletions(-)
diff --git a/ovn/TODO b/ovn/TODO
index 8ea0d17..1c3a0dc 100644
--- a/ovn/TODO
+++ b/ovn/TODO
@@ -47,12 +47,6 @@ various ways to ensure it could be implemented, e.g. the same as for
OpenFlow by allowing the logical inport to be zeroed, or by
introducing a new action that ignores the inport.
-** ovn-northd
-
-*** What flows should it generate?
-
-See description in ovn-northd(8).
-
** New OVN logical actions
*** arp
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index ac3b39e..8825b52 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -222,13 +222,20 @@ allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
/* The 'key' comes from nb->header_.uuid or sb->external_ids:logical-switch. */
struct ovn_datapath {
struct hmap_node key_node; /* Index on 'key'. */
- struct uuid key; /* nb->header_.uuid. */
+ struct uuid key; /* (nbs/nbr)->header_.uuid. */
- const struct nbrec_logical_switch *nb; /* May be NULL. */
+ const struct nbrec_logical_switch *nbs; /* May be NULL. */
+ const struct nbrec_logical_router *nbr; /* May be NULL. */
const struct sbrec_datapath_binding *sb; /* May be NULL. */
struct ovs_list list; /* In list of similar records. */
+ /* Logical router data (digested from nbr). */
+ ovs_be32 gateway;
+
+ /* Logical switch data. */
+ struct ovn_port *router_port;
+
struct hmap port_tnlids;
uint32_t port_key_hint;
@@ -237,13 +244,15 @@ struct ovn_datapath {
static struct ovn_datapath *
ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
- const struct nbrec_logical_switch *nb,
+ const struct nbrec_logical_switch *nbs,
+ const struct nbrec_logical_router *nbr,
const struct sbrec_datapath_binding *sb)
{
struct ovn_datapath *od = xzalloc(sizeof *od);
od->key = *key;
od->sb = sb;
- od->nb = nb;
+ od->nbs = nbs;
+ od->nbr = nbr;
hmap_init(&od->port_tnlids);
od->port_key_hint = 0;
hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
@@ -301,7 +310,8 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
const struct sbrec_datapath_binding *sb, *sb_next;
SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
struct uuid key;
- if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key)) {
+ if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
+ !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
ovsdb_idl_txn_add_comment(ctx->ovnsb_txn,
"deleting Datapath_Binding "UUID_FMT" that "
"lacks external-ids:logical-switch",
@@ -320,23 +330,62 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
}
struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
- NULL, sb);
+ NULL, NULL, sb);
list_push_back(sb_only, &od->list);
}
- const struct nbrec_logical_switch *nb;
- NBREC_LOGICAL_SWITCH_FOR_EACH (nb, ctx->ovnnb_idl) {
+ const struct nbrec_logical_switch *nbs;
+ NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
struct ovn_datapath *od = ovn_datapath_find(datapaths,
- &nb->header_.uuid);
+ &nbs->header_.uuid);
if (od) {
- od->nb = nb;
+ od->nbs = nbs;
list_remove(&od->list);
list_push_back(both, &od->list);
} else {
- od = ovn_datapath_create(datapaths, &nb->header_.uuid, nb, NULL);
+ od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
+ nbs, NULL, NULL);
list_push_back(nb_only, &od->list);
}
}
+
+ const struct nbrec_logical_router *nbr;
+ NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
+ struct ovn_datapath *od = ovn_datapath_find(datapaths,
+ &nbr->header_.uuid);
+ if (od) {
+ if (!od->nbs) {
+ od->nbr = nbr;
+ list_remove(&od->list);
+ list_push_back(both, &od->list);
+ } else {
+ /* Can't happen! */
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl,
+ "duplicate UUID "UUID_FMT" in OVN_Northbound",
+ UUID_ARGS(&nbr->header_.uuid));
+ continue;
+ }
+ } else {
+ od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
+ NULL, nbr, NULL);
+ list_push_back(nb_only, &od->list);
+ }
+
+ od->gateway = 0;
+ if (nbr->default_gw) {
+ ovs_be32 ip, mask;
+ char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
+ if (error || !ip || mask != OVS_BE32_MAX) {
+ static struct vlog_rate_limit rl
+ = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
+ free(error);
+ } else {
+ od->gateway = ip;
+ }
+ }
+ }
}
static uint32_t
@@ -371,8 +420,9 @@ build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
char uuid_s[UUID_LEN + 1];
- sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->nb->header_.uuid));
- const struct smap id = SMAP_CONST1(&id, "logical-switch", uuid_s);
+ sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
+ const char *key = od->nbs ? "logical-switch" : "logical-router";
+ const struct smap id = SMAP_CONST1(&id, key, uuid_s);
sbrec_datapath_binding_set_external_ids(od->sb, &id);
sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
@@ -391,10 +441,19 @@ build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
struct ovn_port {
struct hmap_node key_node; /* Index on 'key'. */
- const char *key; /* nb->name and sb->logical_port */
+ char *key; /* nb->name and sb->logical_port */
+ char *json_key; /* 'key', quoted for use in JSON. */
+
+ const struct nbrec_logical_port *nbs; /* May be NULL. */
+ const struct nbrec_logical_router_port *nbr; /* May be NULL. */
+ const struct sbrec_port_binding *sb; /* May be NULL. */
- const struct nbrec_logical_port *nb; /* May be NULL. */
- const struct sbrec_port_binding *sb; /* May be NULL. */
+ /* Logical router port data. */
+ ovs_be32 ip, mask; /* 192.168.10.123/24. */
+ ovs_be32 network; /* 192.168.10.0. */
+ ovs_be32 bcast; /* 192.168.10.255. */
+ struct eth_addr mac;
+ struct ovn_port *peer;
struct ovn_datapath *od;
@@ -403,13 +462,20 @@ struct ovn_port {
static struct ovn_port *
ovn_port_create(struct hmap *ports, const char *key,
- const struct nbrec_logical_port *nb,
+ const struct nbrec_logical_port *nbs,
+ const struct nbrec_logical_router_port *nbr,
const struct sbrec_port_binding *sb)
{
struct ovn_port *op = xzalloc(sizeof *op);
- op->key = key;
+
+ struct ds json_key = DS_EMPTY_INITIALIZER;
+ json_string_escape(key, &json_key);
+ op->json_key = ds_steal_cstr(&json_key);
+
+ op->key = xstrdup(key);
op->sb = sb;
- op->nb = nb;
+ op->nbs = nbs;
+ op->nbr = nbr;
hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
return op;
}
@@ -422,6 +488,8 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
* private list and once we've exited that function it is not safe to
* use it. */
hmap_remove(ports, &port->key_node);
+ free(port->json_key);
+ free(port->key);
free(port);
}
}
@@ -460,24 +528,111 @@ join_logical_ports(struct northd_context *ctx,
const struct sbrec_port_binding *sb;
SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
- NULL, sb);
+ NULL, NULL, sb);
list_push_back(sb_only, &op->list);
}
struct ovn_datapath *od;
HMAP_FOR_EACH (od, key_node, datapaths) {
- for (size_t i = 0; i < od->nb->n_ports; i++) {
- const struct nbrec_logical_port *nb = od->nb->ports[i];
- struct ovn_port *op = ovn_port_find(ports, nb->name);
- if (op) {
- op->nb = nb;
- list_remove(&op->list);
- list_push_back(both, &op->list);
- } else {
- op = ovn_port_create(ports, nb->name, nb, NULL);
- list_push_back(nb_only, &op->list);
+ if (od->nbs) {
+ for (size_t i = 0; i < od->nbs->n_ports; i++) {
+ const struct nbrec_logical_port *nbs = od->nbs->ports[i];
+ struct ovn_port *op = ovn_port_find(ports, nbs->name);
+ if (op) {
+ if (op->nbs || op->nbr) {
+ static struct vlog_rate_limit rl
+ = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "duplicate logical port %s",
+ nbs->name);
+ continue;
+ }
+ op->nbs = nbs;
+ list_remove(&op->list);
+ list_push_back(both, &op->list);
+ } else {
+ op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
+ list_push_back(nb_only, &op->list);
+ }
+
+ op->od = od;
+ }
+ } else {
+ for (size_t i = 0; i < od->nbr->n_ports; i++) {
+ const struct nbrec_logical_router_port *nbr
+ = od->nbr->ports[i];
+
+ struct eth_addr mac;
+ if (!eth_addr_from_string(nbr->mac, &mac)) {
+ static struct vlog_rate_limit rl
+ = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
+ continue;
+ }
+
+ ovs_be32 ip, mask;
+ char *error = ip_parse_masked(nbr->network, &ip, &mask);
+ if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
+ static struct vlog_rate_limit rl
+ = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
+ free(error);
+ continue;
+ }
+
+ char name[UUID_LEN + 1];
+ snprintf(name, sizeof name, UUID_FMT,
+ UUID_ARGS(&nbr->header_.uuid));
+ struct ovn_port *op = ovn_port_find(ports, name);
+ if (op) {
+ if (op->nbs || op->nbr) {
+ static struct vlog_rate_limit rl
+ = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "duplicate logical router port %s",
+ name);
+ continue;
+ }
+ op->nbr = nbr;
+ list_remove(&op->list);
+ list_push_back(both, &op->list);
+ } else {
+ op = ovn_port_create(ports, name, NULL, nbr, NULL);
+ list_push_back(nb_only, &op->list);
+ }
+
+ op->ip = ip;
+ op->mask = mask;
+ op->network = ip & mask;
+ op->bcast = ip | ~mask;
+ op->mac = mac;
+
+ op->od = od;
}
- op->od = od;
+ }
+ }
+
+ /* Connect logical router ports, and logical switch ports of type "router",
+ * to their peers. */
+ struct ovn_port *op;
+ HMAP_FOR_EACH (op, key_node, ports) {
+ if (op->nbs && !strcmp(op->nbs->type, "router")) {
+ const char *peer_name = smap_get(&op->nbs->options, "router-port");
+ if (!peer_name) {
+ continue;
+ }
+
+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
+ if (!peer || !peer->nbr) {
+ continue;
+ }
+
+ peer->peer = op;
+ op->peer = peer;
+ op->od->router_port = op;
+ } else if (op->nbr && op->nbr->peer) {
+ char peer_name[UUID_LEN + 1];
+ snprintf(peer_name, sizeof peer_name, UUID_FMT,
+ UUID_ARGS(&op->nbr->peer->header_.uuid));
+ op->peer = ovn_port_find(ports, peer_name);
}
}
}
@@ -485,13 +640,37 @@ join_logical_ports(struct northd_context *ctx,
static void
ovn_port_update_sbrec(const struct ovn_port *op)
{
- sbrec_port_binding_set_type(op->sb, op->nb->type);
- sbrec_port_binding_set_options(op->sb, &op->nb->options);
sbrec_port_binding_set_datapath(op->sb, op->od->sb);
- sbrec_port_binding_set_parent_port(op->sb, op->nb->parent_name);
- sbrec_port_binding_set_tag(op->sb, op->nb->tag, op->nb->n_tag);
- sbrec_port_binding_set_mac(op->sb, (const char **) op->nb->addresses,
- op->nb->n_addresses);
+ if (op->nbr) {
+ sbrec_port_binding_set_type(op->sb, "patch");
+
+ const char *peer = op->peer ? op->peer->key : "<error>";
+ const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
+ sbrec_port_binding_set_options(op->sb, &ids);
+
+ sbrec_port_binding_set_parent_port(op->sb, NULL);
+ sbrec_port_binding_set_tag(op->sb, NULL, 0);
+ sbrec_port_binding_set_mac(op->sb, NULL, 0);
+ } else {
+ if (strcmp(op->nbs->type, "router")) {
+ sbrec_port_binding_set_type(op->sb, op->nbs->type);
+ sbrec_port_binding_set_options(op->sb, &op->nbs->options);
+ } else {
+ sbrec_port_binding_set_type(op->sb, "patch");
+
+ const char *router_port = smap_get(&op->nbs->options,
+ "router-port");
+ if (!router_port) {
+ router_port = "<error>";
+ }
+ const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
+ sbrec_port_binding_set_options(op->sb, &ids);
+ }
+ sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
+ sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
+ sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
+ op->nbs->n_addresses);
+ }
}
static void
@@ -759,57 +938,63 @@ lport_is_enabled(const struct nbrec_logical_port *lport)
return !lport->enabled || *lport->enabled;
}
-/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
- * constructing their contents based on the OVN_NB database. */
static void
-build_lflows(struct northd_context *ctx, struct hmap *datapaths,
- struct hmap *ports)
+build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
+ struct hmap *lflows, struct hmap *mcgroups)
{
/* This flow table structure is documented in ovn-northd(8), so please
* update ovn-northd.8.xml if you change anything. */
- struct hmap lflows = HMAP_INITIALIZER(&lflows);
- struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
-
- /* Ingress table 0: Admission control framework (priorities 0 and 100). */
+ /* Logical switch ingress table 0: Admission control framework (priority
+ * 100). */
struct ovn_datapath *od;
HMAP_FOR_EACH (od, key_node, datapaths) {
+ if (!od->nbs) {
+ continue;
+ }
+
/* Logical VLANs not supported. */
- ovn_lflow_add(&lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
"drop;");
/* Broadcast/multicast source address is invalid. */
- ovn_lflow_add(&lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
"drop;");
/* Port security flows have priority 50 (see below) and will continue
* to the next table if packet source is acceptable. */
}
- /* Ingress table 0: Ingress port security (priority 50). */
+ /* Logical switch ingress table 0: Ingress port security (priority 50). */
struct ovn_port *op;
HMAP_FOR_EACH (op, key_node, ports) {
- if (!lport_is_enabled(op->nb)) {
+ if (!op->nbs) {
+ continue;
+ }
+
+ if (!lport_is_enabled(op->nbs)) {
/* Drop packets from disabled logical ports (since logical flow
* tables are default-drop). */
continue;
}
struct ds match = DS_EMPTY_INITIALIZER;
- ds_put_cstr(&match, "inport == ");
- json_string_escape(op->key, &match);
+ ds_put_format(&match, "inport == %s", op->json_key);
build_port_security("eth.src",
- op->nb->port_security, op->nb->n_port_security,
+ op->nbs->port_security, op->nbs->n_port_security,
&match);
- ovn_lflow_add(&lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
+ ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
ds_cstr(&match), "next;");
ds_destroy(&match);
}
/* Ingress table 1: ACLs (any priority). */
HMAP_FOR_EACH (od, key_node, datapaths) {
- for (size_t i = 0; i < od->nb->n_acls; i++) {
- const struct nbrec_acl *acl = od->nb->acls[i];
+ if (!od->nbs) {
+ continue;
+ }
+ for (size_t i = 0; i < od->nbs->n_acls; i++) {
+ const struct nbrec_acl *acl = od->nbs->acls[i];
const char *action;
if (strcmp(acl->direction, "from-lport")) {
@@ -819,48 +1004,55 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
action = (!strcmp(acl->action, "allow") ||
!strcmp(acl->action, "allow-related"))
? "next;" : "drop;";
- ovn_lflow_add(&lflows, od, S_SWITCH_IN_ACL, acl->priority,
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, acl->priority,
acl->match, action);
}
}
HMAP_FOR_EACH (od, key_node, datapaths) {
- ovn_lflow_add(&lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
}
/* Ingress table 2: Destination lookup, broadcast and multicast handling
* (priority 100). */
HMAP_FOR_EACH (op, key_node, ports) {
- if (lport_is_enabled(op->nb)) {
- ovn_multicast_add(&mcgroups, &mc_flood, op);
+ if (!op->nbs) {
+ continue;
+ }
+
+ if (lport_is_enabled(op->nbs)) {
+ ovn_multicast_add(mcgroups, &mc_flood, op);
}
}
HMAP_FOR_EACH (od, key_node, datapaths) {
- ovn_lflow_add(&lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
"outport = \""MC_FLOOD"\"; output;");
}
/* Ingress table 2: Destination lookup, unicast handling (priority 50), */
HMAP_FOR_EACH (op, key_node, ports) {
- for (size_t i = 0; i < op->nb->n_addresses; i++) {
+ if (!op->nbs) {
+ continue;
+ }
+
+ for (size_t i = 0; i < op->nbs->n_addresses; i++) {
struct eth_addr mac;
- if (eth_addr_from_string(op->nb->addresses[i], &mac)) {
+ if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
struct ds match, actions;
ds_init(&match);
- ds_put_format(&match, "eth.dst == %s", op->nb->addresses[i]);
+ ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(mac));
ds_init(&actions);
- ds_put_cstr(&actions, "outport = ");
- json_string_escape(op->nb->name, &actions);
- ds_put_cstr(&actions, "; output;");
- ovn_lflow_add(&lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
+ ds_put_format(&actions, "outport = %s; output;", op->json_key);
+ ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
ds_cstr(&match), ds_cstr(&actions));
ds_destroy(&actions);
ds_destroy(&match);
- } else if (!strcmp(op->nb->addresses[i], "unknown")) {
- if (lport_is_enabled(op->nb)) {
- ovn_multicast_add(&mcgroups, &mc_unknown, op);
+ } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
+ if (lport_is_enabled(op->nbs)) {
+ ovn_multicast_add(mcgroups, &mc_unknown, op);
op->od->has_unknown = true;
}
} else {
@@ -868,23 +1060,30 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
VLOG_INFO_RL(&rl,
"%s: invalid syntax '%s' in addresses column",
- op->nb->name, op->nb->addresses[i]);
+ op->nbs->name, op->nbs->addresses[i]);
}
}
}
/* Ingress table 2: Destination lookup for unknown MACs (priority 0). */
HMAP_FOR_EACH (od, key_node, datapaths) {
+ if (!od->nbs) {
+ continue;
+ }
+
if (od->has_unknown) {
- ovn_lflow_add(&lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
"outport = \""MC_UNKNOWN"\"; output;");
}
}
/* Egress table 0: ACLs (any priority). */
HMAP_FOR_EACH (od, key_node, datapaths) {
- for (size_t i = 0; i < od->nb->n_acls; i++) {
- const struct nbrec_acl *acl = od->nb->acls[i];
+ if (!od->nbs) {
+ continue;
+ }
+ for (size_t i = 0; i < od->nbs->n_acls; i++) {
+ const struct nbrec_acl *acl = od->nbs->acls[i];
const char *action;
if (strcmp(acl->direction, "to-lport")) {
@@ -894,18 +1093,26 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
action = (!strcmp(acl->action, "allow") ||
!strcmp(acl->action, "allow-related"))
? "next;" : "drop;";
- ovn_lflow_add(&lflows, od, S_SWITCH_OUT_ACL, acl->priority,
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, acl->priority,
acl->match, action);
}
}
HMAP_FOR_EACH (od, key_node, datapaths) {
- ovn_lflow_add(&lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
+ if (!od->nbs) {
+ continue;
+ }
+
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
}
/* Egress table 1: Egress port security multicast/broadcast (priority
* 100). */
HMAP_FOR_EACH (od, key_node, datapaths) {
- ovn_lflow_add(&lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
+ if (!od->nbs) {
+ continue;
+ }
+
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
"output;");
}
@@ -916,24 +1123,276 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
* Priority 150 rules drop packets to disabled logical ports, so that they
* don't even receive multicast or broadcast packets. */
HMAP_FOR_EACH (op, key_node, ports) {
- struct ds match;
-
- ds_init(&match);
- ds_put_cstr(&match, "outport == ");
- json_string_escape(op->key, &match);
- if (lport_is_enabled(op->nb)) {
- build_port_security("eth.dst",
- op->nb->port_security, op->nb->n_port_security,
- &match);
- ovn_lflow_add(&lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
+ if (!op->nbs) {
+ continue;
+ }
+
+ struct ds match = DS_EMPTY_INITIALIZER;
+ ds_put_format(&match, "outport == %s", op->json_key);
+ if (lport_is_enabled(op->nbs)) {
+ build_port_security("eth.dst", op->nbs->port_security,
+ op->nbs->n_port_security, &match);
+ ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
ds_cstr(&match), "output;");
} else {
- ovn_lflow_add(&lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
+ ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
ds_cstr(&match), "drop;");
}
ds_destroy(&match);
}
+}
+
+static bool
+lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
+{
+ return !lrport->enabled || *lrport->enabled;
+}
+
+static void
+add_route(struct hmap *lflows, struct ovn_datapath *od,
+ ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
+{
+ char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
+ IP_ARGS(network), IP_ARGS(mask));
+
+ struct ds actions = DS_EMPTY_INITIALIZER;
+ ds_put_cstr(&actions, "ip4.ttl--; reg0 = ");
+ if (gateway) {
+ ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
+ } else {
+ ds_put_cstr(&actions, "ip4.dst");
+ }
+ ds_put_cstr(&actions, "; next;");
+
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
+ count_1bits(ntohl(mask)), match, ds_cstr(&actions));
+ ds_destroy(&actions);
+ free(match);
+}
+
+static void
+build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
+ struct hmap *lflows)
+{
+ /* This flow table structure is documented in ovn-northd(8), so please
+ * update ovn-northd.8.xml if you change anything. */
+
+ /* XXX ICMP echo reply */
+
+ /* Logical router ingress table 0: Admission control framework. */
+ struct ovn_datapath *od;
+ HMAP_FOR_EACH (od, key_node, datapaths) {
+ if (!od->nbr) {
+ continue;
+ }
+
+ /* Logical VLANs not supported.
+ * Broadcast/multicast source address is invalid. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
+ "vlan.present || eth.src[40]", "drop;");
+ }
+
+ /* Logical router ingress table 0: match (priority 50). */
+ struct ovn_port *op;
+ HMAP_FOR_EACH (op, key_node, ports) {
+ if (!op->nbr) {
+ continue;
+ }
+
+ if (!lrport_is_enabled(op->nbr)) {
+ /* Drop packets from disabled logical ports (since logical flow
+ * tables are default-drop). */
+ continue;
+ }
+
+ char *match = xasprintf(
+ "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
+ ETH_ADDR_ARGS(op->mac), op->json_key);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
+ match, "next;");
+ }
+
+ /* Logical router ingress table 1: IP Input. */
+ HMAP_FOR_EACH (od, key_node, datapaths) {
+ if (!od->nbr) {
+ continue;
+ }
+
+ /* L3 admission control: drop multicast and broadcast source, localhost
+ * source or destination, and zero network source or destination
+ * (priority 220). */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 220,
+ "ip4.mcast || "
+ "ip4.src == 255.255.255.255 || "
+ "ip4.src == 127.0.0.0/8 || "
+ "ip4.dst == 127.0.0.0/8 || "
+ "ip4.src == 0.0.0.0/8 || "
+ "ip4.dst == 0.0.0.0/8",
+ "drop;");
+
+ /* Drop Ethernet local broadcast. By definition this traffic should
+ * not be forwarded.*/
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 190,
+ "eth.bcast", "drop;");
+
+ /* Drop IP multicast. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 190,
+ "ip4.mcast", "drop;");
+
+ /* TTL discard.
+ *
+ * XXX Need to send ICMP time exceeded if !ip.later_frag. */
+ char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 170, match, "drop;");
+ free(match);
+ }
+
+ HMAP_FOR_EACH (op, key_node, ports) {
+ if (!op->nbr) {
+ continue;
+ }
+
+ /* L3 admission control: drop packets that originate from an IP address
+ * owned by the router or a broadcast address known to the router
+ * (priority 220). */
+ char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
+ IP_ARGS(op->ip), IP_ARGS(op->bcast));
+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 220,
+ match, "drop;");
+ free(match);
+
+ /* ARP reply. These flows reply to ARP requests for the router's own
+ * IP address. */
+ match = xasprintf(
+ "inport == %s && arp.tha == "ETH_ADDR_FMT" && arp.op == 1",
+ op->json_key, ETH_ADDR_ARGS(op->mac));
+ char *actions = xasprintf(
+ "eth.dst = eth.src; "
+ "eth.src = "ETH_ADDR_FMT"; "
+ "arp.op = 2; /* ARP reply */ "
+ "arp.tha = arp.sha; "
+ "arp.sha = "ETH_ADDR_FMT"; "
+ "arp.tpa = arp.spa; "
+ "arp.spa = "IP_FMT"; "
+ "outport = %s; "
+ "inport = \"\"; /* Allow sending out inport. */ "
+ "output;",
+ ETH_ADDR_ARGS(op->mac),
+ ETH_ADDR_ARGS(op->mac),
+ IP_ARGS(op->ip),
+ op->json_key);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 210,
+ match, actions);
+
+ /* Drop IP traffic to this router. */
+ match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 200,
+ match, "drop;");
+ free(match);
+ }
+
+ /* Logical router ingress table 2: IP Routing.
+ *
+ * A packet that arrives at this table is an IP packet that should be
+ * routed to the address in ip4.dst. This table sets reg0 to the next-hop
+ * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
+ * and advances to the next table for ARP resolution. */
+ HMAP_FOR_EACH (op, key_node, ports) {
+ if (!op->nbr) {
+ continue;
+ }
+
+ add_route(lflows, op->od, op->network, op->mask, 0);
+ }
+ HMAP_FOR_EACH (od, key_node, datapaths) {
+ if (!od->nbr) {
+ continue;
+ }
+
+ if (od->gateway) {
+ add_route(lflows, od, 0, 0, od->gateway);
+ }
+ }
+ /* XXX destination unreachable */
+
+ /* Local router ingress table 3: ARP Resolution.
+ *
+ * Any packet that reaches this table is an IP packet whose next-hop IP
+ * address is in reg0. (ip4.dst is the final destination.) This table
+ * resolves the IP address in reg0 into an output port in outport and an
+ * Ethernet address in eth.dst. */
+ HMAP_FOR_EACH (op, key_node, ports) {
+ if (op->nbr) {
+ /* XXX ARP for neighboring router */
+ } else if (op->od->router_port) {
+ const char *peer_name = smap_get(
+ &op->od->router_port->nbs->options, "router-port");
+ if (!peer_name) {
+ continue;
+ }
+
+ struct ovn_port *peer = ovn_port_find(ports, peer_name);
+ if (!peer || !peer->nbr) {
+ continue;
+ }
+
+ for (size_t i = 0; i < op->nbs->n_addresses; i++) {
+ struct eth_addr ea;
+ ovs_be32 ip;
+
+ if (ovs_scan(op->nbs->addresses[i],
+ ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
+ ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
+ char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
+ char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
+ "eth.dst = "ETH_ADDR_FMT"; "
+ "outport = %s; "
+ "output;",
+ ETH_ADDR_ARGS(peer->mac),
+ ETH_ADDR_ARGS(ea),
+ peer->json_key);
+ ovn_lflow_add(lflows, peer->od,
+ S_ROUTER_IN_ARP, 200, match, actions);
+ free(actions);
+ free(match);
+ }
+ }
+ }
+ }
+
+ /* Logical router egress table 0: Delivery (priority 100).
+ *
+ * Priority 100 rules deliver packets to enabled logical ports. */
+ HMAP_FOR_EACH (op, key_node, ports) {
+ if (!op->nbr) {
+ continue;
+ }
+
+ if (!lrport_is_enabled(op->nbr)) {
+ /* Drop packets to disabled logical ports (since logical flow
+ * tables are default-drop). */
+ continue;
+ }
+
+ char *match = xasprintf("outport == %s", op->json_key);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
+ match, "output;");
+ free(match);
+ }
+}
+
+/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
+ * constructing their contents based on the OVN_NB database. */
+static void
+build_lflows(struct northd_context *ctx, struct hmap *datapaths,
+ struct hmap *ports)
+{
+ struct hmap lflows = HMAP_INITIALIZER(&lflows);
+ struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
+
+ build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
+ build_lrouter_flows(datapaths, ports, &lflows);
/* Push changes to the Logical_Flow table to database. */
const struct sbrec_logical_flow *sbflow, *next_sbflow;
@@ -945,7 +1404,7 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
continue;
}
- enum ovn_datapath_type dp_type = DP_SWITCH; /* XXX no routers yet. */
+ enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
enum ovn_pipeline pipeline
= !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
struct ovn_lflow *lflow = ovn_lflow_find(
@@ -964,8 +1423,8 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths,
sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
- sbrec_logical_flow_set_pipeline(sbflow,
- pipeline ? "ingress" : "egress");
+ sbrec_logical_flow_set_pipeline(
+ sbflow, pipeline == P_IN ? "ingress" : "egress");
sbrec_logical_flow_set_table_id(sbflow, table);
sbrec_logical_flow_set_priority(sbflow, lflow->priority);
sbrec_logical_flow_set_match(sbflow, lflow->match);
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index f898f97..7d9d22f 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -1025,12 +1025,28 @@ tcp.flags = RST;
constructed for each supported encapsulation.
</column>
- <column name="external_ids" key="logical-switch" type='{"type": "uuid"}'>
- Each row in <ref table="Datapath_Binding"/> is associated with some
- logical datapath. <code>ovn-northd</code> uses this key to store the
- UUID of the logical datapath <ref table="Logical_Switch"
- db="OVN_Northbound"/> row in the <ref db="OVN_Northbound"/> database.
- </column>
+ <group title="OVN_Northbound Relationship">
+ <p>
+ Each row in <ref table="Datapath_Binding"/> is associated with some
+ logical datapath. <code>ovn-northd</code> uses these key to track the
+ association of a logical datapath with concepts in the <ref
+ db="OVN_Northbound"/> database.
+ </p>
+
+ <column name="external_ids" key="logical-switch" type='{"type": "uuid"}'>
+ For a logical datapath that represents a logical switch,
+ <code>ovn-northd</code> stores in this key the UUID of the
+ corresponding <ref table="Logical_Switch" db="OVN_Northbound"/> row in
+ the <ref db="OVN_Northbound"/> database.
+ </column>
+
+ <column name="external_ids" key="logical-router" type='{"type": "uuid"}'>
+ For a logical datapath that represents a logical router,
+ <code>ovn-northd</code> stores in this key the UUID of the
+ corresponding <ref table="Logical_Router" db="OVN_Northbound"/> row in
+ the <ref db="OVN_Northbound"/> database.
+ </column>
+ </group>
<group title="Common Columns">
The overall purpose of these columns is described under <code>Common
diff --git a/tests/ovn.at b/tests/ovn.at
index a42a319..319ab4b 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -510,12 +510,13 @@ AT_CLEANUP
AT_BANNER([OVN end-to-end tests])
-AT_SETUP([ovn -- 3 HVs, 3 VIFs/HV, 1 logical switch])
+# 3 hypervisors, one logical switch, 3 logical ports per hypervisor
+AT_SETUP([ovn -- 3 HVs, 1 LS, 3 lports/HV])
AT_SKIP_IF([test $HAVE_PYTHON = no])
ovn_start
# Create hypervisors hv[123].
-# Add vif1[123] to hv1, vif2[123] to hv2, vif3[123].
+# Add vif1[123] to hv1, vif2[123] to hv2, vif3[123] to hv3.
# Add all of the vifs to a single logical switch lsw0.
# Turn on port security on all the vifs except vif[123]1.
# Make vif13, vif2[23], vif3[123] destinations for unknown MACs.
@@ -676,3 +677,166 @@ for i in 1 2 3; do
done
done
AT_CLEANUP
+
+# 3 hypervisors, 3 logical switches with 3 logical ports each, 1 logical router
+AT_SETUP([ovn -- 3 HVs, 3 LS, 3 lports/LS, 1 LR])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+# Logical network:
+#
+# Three logical switches ls1, ls2, ls3.
+# Three VIFs on each: lp1[123], lp2[123], lp3[123].
+# One logical router lr connected to ls[123].
+ovn-nbctl \
+ -- create Logical_Router name=lr0 ports=@lrp1, at lrp2, at lrp3 \
+ -- --id=@lrp1 create Logical_Router_Port name=lrp1 \
+ network=192.168.1.254/24 mac='"00:00:00:00:ff:01"' \
+ -- --id=@lrp2 create Logical_Router_Port name=lrp2 \
+ network=192.168.2.254/24 mac='"00:00:00:00:ff:02"' \
+ -- --id=@lrp3 create Logical_Router_Port name=lrp3 \
+ network=192.168.3.254/24 mac='"00:00:00:00:ff:03"'
+for i in 1 2 3; do
+ lrp_uuid=`ovn-nbctl get Logical_Router_Port lrp$i _uuid`
+ ovn-nbctl \
+ -- lswitch-add ls$i \
+ -- lport-add ls$i lrp$i-attachment \
+ -- set Logical_Port lrp$i-attachment type=router \
+ options:router-port=$lrp_uuid \
+ addresses='"00:00:00:00:ff:0'$i'"'
+ for j in 1 2 3; do
+ ovn-nbctl \
+ -- lport-add ls$i lp$i$j \
+ -- lport-set-addresses lp$i$j "f0:00:00:00:00:$i$j 192.168.$i.$j"
+ done
+done
+
+# Physical network:
+#
+# Three hypervisors hv[123].
+# lp1[123] spread across hv[123]: lp11 on hv1, lp12 on hv2, lp13 on hv3.
+# lp2[123] spread across hv[23]: lp21 and lp22 on hv2, lp23 on hv3.
+# lp3[123] all on hv3.
+
+# Given the name of a logical port, prints the name of the hypervisor
+# on which it is located.
+vif_to_hv() {
+ case $1 in dnl (
+ 11) echo 1 ;; dnl (
+ 12 | 21 | 22) echo 2 ;; dnl (
+ 13 | 23 | 3?) echo 3 ;;
+ esac
+}
+
+net_add n1
+for i in 1 2 3; do
+ sim_add hv$i
+ as hv$i
+ ovs-vsctl add-br br-phys
+ ovn_attach n1 br-phys 192.168.0.$i
+done
+for i in 1 2 3; do
+ for j in 1 2 3; do
+ hv=`vif_to_hv $i$j`
+ as hv$hv ovs-vsctl \
+ -- add-port br-int vif$i$j \
+ -- set Interface vif$i$j external-ids:iface-id=lp$i$j \
+ options:tx_pcap=hv$hv/vif$i$j-tx.pcap \
+ options:rxq_pcap=hv$hv/vif$i$j-rx.pcap \
+ ofport-request=$i$j
+ done
+done
+
+# Pre-populate the hypervisors' ARP tables so that we don't lose any
+# packets for ARP resolution (native tunneling doesn't queue packets
+# for ARP resolution).
+ovn_populate_arp
+
+# Allow some time for ovn-northd and ovn-controller to catch up.
+# XXX This should be more systematic.
+sleep 1
+
+# test_packet INPORT SRC_MAC DST_MAC SRC_IP DST_IP OUTPORT...
+#
+# This shell function causes a packet to be received on INPORT. The packet's
+# content has Ethernet destination DST and source SRC (each exactly 12 hex
+# digits) and Ethernet type ETHTYPE (4 hex digits). The OUTPORTs (zero or
+# more) list the VIFs on which the packet should be received. INPORT and the
+# OUTPORTs are specified as lport numbers, e.g. 11 for vif11.
+trim_zeros() {
+ sed 's/\(00\)\{1,\}$//'
+}
+for i in 1 2 3; do
+ for j in 1 2 3; do
+ : > $i$j.expected
+ done
+done
+test_packet() {
+ # This packet has bad checksums but logical L3 routing doesn't check.
+ local inport=$1 src_mac=$2 dst_mac=$3 src_ip=$4 dst_ip=$5
+ local packet=$3$208004500001c0000000040110000$4$50035111100080000
+ shift; shift; shift; shift; shift
+ hv=hv`vif_to_hv $inport`
+ as $hv ovs-appctl netdev-dummy/receive vif$inport $packet
+ #as $hv ovs-appctl ofproto/trace br-int in_port=$inport $packet
+ for outport; do
+ ins=`echo $inport | sed 's/^\(.\).*/\1/'`
+ outs=`echo $outport | sed 's/^\(.\).*/\1/'`
+ if test $ins = $outs; then
+ # Ports on the same logical switch receive exactly the same packet.
+ echo $packet
+ else
+ # Routing decrements TTL and updates source and dest MAC
+ # (and checksum).
+ echo f000000000${outport}00000000ff0${outs}08004500001c00000000"3f1101"00${src_ip}${dst_ip}0035111100080000
+ fi | trim_zeros >> $outport.expected
+ done
+}
+
+as hv1 ovn-sbctl dump-flows
+as hv1 ovs-ofctl dump-flows br-int
+
+# Send packets between all pairs of source and destination ports:
+#
+# 1. Unicast IP packets are delivered to exactly one lport (except
+# that packets destined to their input ports are dropped).
+#
+# 2. Broadcast IP packets are delivered to all lports except the input port.
+for is in 1 2 3; do
+ for js in 1 2 3; do
+ bcast=
+ s=$is$js
+ smac=f000000000$s
+ sip=c0a80${is}0${js}
+ for id in 1 2 3; do
+ for jd in 1 2 3; do
+ d=$id$jd
+ dip=c0a80${id}0${jd}
+ if test $is = $id; then dmac=f000000000$d; else dmac=00000000ff0$is; fi
+ if test $d != $s; then unicast=$d; else unicast=; fi
+
+ test_packet $s $smac $dmac $sip $dip $unicast #1
+
+ if test $id = $is && test $jd != $js; then bcast="$bcast $d"; fi
+ done
+ done
+ test_packet $s $smac ffffffffffff $sip ffffffff $bcast #2
+ done
+done
+
+# Allow some time for packet forwarding.
+# XXX This can be improved.
+sleep 1
+
+# Now check the packets actually received against the ones expected.
+for i in 1 2 3; do
+ for j in 1 2 3; do
+ file=hv`vif_to_hv $i$j`/vif$i$j-tx.pcap
+ echo $file
+ $PYTHON "$top_srcdir/utilities/ovs-pcap.in" $file | trim_zeros > $i$j.packets
+ cp $i$j.expected expout
+ AT_CHECK([cat $i$j.packets], [0], [expout])
+ echo
+ done
+done
+AT_CLEANUP
--
2.1.3
More information about the dev
mailing list