[ovs-dev] [PATCH v4 ovn 2/2] Forwarding group to load balance l2 traffic with liveness detection

Manoj Sharma manoj.sharma at nutanix.com
Wed Jan 22 21:31:25 UTC 2020


A forwarding group is an aggregation of logical switch
ports of a logical switch to load balance traffic across the ports. It also
detects the liveness if the logical switch ports are realized as OVN tunnel
ports on the physical topology.

In the below logical topology diagram, the logical switch has two ports
connected to chassis / external routers R1 and R2. The logical router needs
to send traffic to an external network that is connected through R1 and R2.

                                                    +----+
                                         +----------+ R1 |    *****
                                        /           +----+  **     **
  +----------+        +--------------+ / lsp1              *         *
  | Logical  |        |   Logical    |/                   * External  *
  | Router   +--------+   switch     X                    *  Network  *
  |          |        |              |\                   *           *
  +----------+        +--------------+ \ lsp2              *         *
                             ^          \           +----+  **     **
                             |           +----------+ R2 |    *****
                             |                      +----+
                   fwd_group -> (lsp1, lsp2)

In the absence of forwarding group, the logical router will have unicast
route to point to either R1 or R2. In case of R1 or R2 going down, it will
require control plane's intervention to update the route to point to proper
nexthop.

With forwarding group, a virtual IP (VIP) and virtual MAC (VMAC) address
are configured on the forwarding group. The logical router points to the
forwarding group's VIP as the nexthop for hosts behind R1 and R2.

[root at fwd-group]# ovn-nbctl fwd-group-add fwd ls1 VIP_1 VMAC_1 lsp1 lsp2

[root at fwd-group]# ovn-nbctl fwd-group-list
FWD_GROUP       LS            VIP             VMAC          CHILD_PORTS
fwd             ls1          VIP_1           VMAC_1         lsp1 lsp2

[root at fwd-group]# ovn-nbctl lr-route-list lr1
IPv4 Routes
external_host_prefix/prefix_len            VIP_1 dst-ip

The logical switch will install an ARP responder rule to reply with VMAC
as the MAC address for ARP requests for VIP. It will also install a MAC
lookup rule for VMAC with action to load balance across the logical switch
ports of the forwarding group.

Datapath: "ls1" Pipeline: ingress
table=10(ls_in_arp_rsp      ), priority=50   , match=(arp.tpa == VIP_1 &&
    arp.op == 1), action=(eth.dst = eth.src; eth.src = VMAC_1; arp.op = 2;
    /* ARP reply */ arp.tha = arp.sha; arp.sha = VMAC_1; arp.tpa = arp.spa;
    arp.spa = VIP; outport = inport; flags.loopback = 1; output;)

table=13(ls_in_l2_lkup      ), priority=50   , match=(eth.dst == VMAC_1),
    action=(fwd_group(childports="lsp1","lsp2");)

In the physical topology, OVN managed hypervisors are connected to R1 and
R2 through overlay tunnels. The logical flow's "fwd_group" action mentioned
above, gets translated to openflow group type "select" with one bucket for
each logical switch port.

cookie=0x0, duration=16.869s, table=29, n_packets=4, n_bytes=392, idle_age=0,
priority=111,metadata=0x9,dl_dst=VMAC_1 actions=group:1

group_id=1,type=select,selection_method=dp_hash,
    bucket=actions=load:0x2->NXM_NX_REG15[0..15], resubmit(,32),
    bucket=actions=load:0x3->NXM_NX_REG15[0..15],resubmit(,32)

where 0x2 and 0x3 are port tunnel keys of lsp1 and lsp2.

The openflow group type "select" with selection method "dp_hash" load
balances traffic based on source and destination Ethernet address, VLAN ID,
Ethernet type, IPv4/v6 source and destination address and protocol, and for
TCP and SCTP only, the source and destination ports.

To detect path failure between OVN managed hypervisors and (R1, R2), BFD is
enabled on the tunnel interfaces. The openflow group is modified to include
watch_port for liveness detection of a port. The forwarding group must be
configured with --liveness to enable it. With liveness enabled, the logical
flow changes to:
table=13(ls_in_l2_lkup      ), priority=50   , match=(eth.dst == VMAC_1),
    action=(fwd_group(liveness="true",childports="lsp1","lsp2");)

While the openflow group is:
group_id=1,type=select,selection_method=dp_hash,
  bucket=watch_port:31,actions=load:0x2->NXM_NX_REG15[0..15],resubmit(,32),
  bucket=watch_port:32,actions=load:0x3->NXM_NX_REG15[0..15],resubmit(,32)

Where 31 and 32 are ovs port numbers for the tunnel interfaces connecting
to R1 and R2.

If the BFD forwarding status is down for any of the tunnels, the
corresponding bucket will not be selected for packet forwarding.

Signed-off-by: Manoj Sharma <manoj.sharma at nutanix.com>
---
 controller/lflow.c    |  20 +++++++
 controller/physical.c |  13 +++++
 controller/physical.h |   4 ++
 include/ovn/actions.h |  19 ++++++-
 lib/actions.c         | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++
 northd/ovn-northd.c   |  64 +++++++++++++++++++++++
 utilities/ovn-trace.c |   3 ++
 7 files changed, 264 insertions(+), 1 deletion(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index 997c596..9a3c1eb 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -105,6 +105,25 @@ lookup_port_cb(const void *aux_, const char *port_name, unsigned int *portp)
     return false;
 }
 
+/* Given the OVN port name, get its openflow port */
+static bool
+tunnel_ofport_cb(const void *aux_, const char *port_name, ofp_port_t *ofport)
+{
+    const struct lookup_port_aux *aux = aux_;
+
+    const struct sbrec_port_binding *pb
+        = lport_lookup_by_name(aux->sbrec_port_binding_by_name, port_name);
+    if (!pb || (pb->datapath != aux->dp) || !pb->chassis) {
+        return false;
+    }
+
+    if (!get_tunnel_ofport(pb->chassis->name, NULL, ofport)) {
+        return false;
+    }
+
+    return true;
+}
+
 static bool
 is_chassis_resident_cb(const void *c_aux_, const char *port_name)
 {
@@ -773,6 +792,7 @@ consider_logical_flow(
     struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
     struct ovnact_encode_params ep = {
         .lookup_port = lookup_port_cb,
+        .tunnel_ofport = tunnel_ofport_cb,
         .aux = &aux,
         .is_switch = is_switch(ldp),
         .group_table = group_table,
diff --git a/controller/physical.c b/controller/physical.c
index 500d419..af1d10f 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1794,3 +1794,16 @@ physical_run(struct ovsdb_idl_index *sbrec_port_binding_by_name,
 
     simap_destroy(&new_tunnel_to_ofport);
 }
+
+bool
+get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t *ofport)
+{
+    struct chassis_tunnel *tun = NULL;
+    tun = chassis_tunnel_find(chassis_name, encap_ip);
+    if (!tun) {
+        return false;
+    }
+
+    *ofport = tun->ofport;
+    return true;
+}
diff --git a/controller/physical.h b/controller/physical.h
index c93f6b1..c0e17cd 100644
--- a/controller/physical.h
+++ b/controller/physical.h
@@ -72,4 +72,8 @@ void physical_handle_mc_group_changes(
         const struct simap *ct_zones,
         const struct hmap *local_datapaths,
         struct ovn_desired_flow_table *);
+bool get_tunnel_ofport(
+        const char *chassis_name,
+        char *encap_ip,
+        ofp_port_t *ofport);
 #endif /* controller/physical.h */
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 2d4b05b..9b01492 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -90,7 +90,8 @@ struct ovn_extend_table;
     OVNACT(CHECK_PKT_LARGER,  ovnact_check_pkt_larger) \
     OVNACT(TRIGGER_EVENT,     ovnact_controller_event) \
     OVNACT(BIND_VPORT,        ovnact_bind_vport)       \
-    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check)
+    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check) \
+    OVNACT(FWD_GROUP,         ovnact_fwd_group)
 
 /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
 enum OVS_PACKED_ENUM ovnact_type {
@@ -374,6 +375,15 @@ struct ovnact_handle_svc_check {
     struct expr_field port;     /* Logical port name. */
 };
 
+/* OVNACT_FWD_GROUP. */
+struct ovnact_fwd_group {
+    struct ovnact ovnact;
+    bool liveness;
+    char **child_ports;       /* Logical ports */
+    size_t n_child_ports;
+    uint8_t ltable;           /* Logical table ID of next table. */
+};
+
 /* Internal use by the helpers below. */
 void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
 void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
@@ -635,6 +645,13 @@ struct ovnact_encode_params {
      * '*portp' and returns true; otherwise, returns false. */
     bool (*lookup_port)(const void *aux, const char *port_name,
                         unsigned int *portp);
+
+    /* Looks up tunnel port to a chassis by its port name.  If found, stores
+     * its openflow port number in '*ofport' and returns true;
+     * otherwise, returns false. */
+    bool (*tunnel_ofport)(const void *aux, const char *port_name,
+                          ofp_port_t *ofport);
+
     const void *aux;
 
     /* 'true' if the flow is for a switch. */
diff --git a/lib/actions.c b/lib/actions.c
index cd3f586..6cde9ea 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -2988,6 +2988,146 @@ ovnact_handle_svc_check_free(struct ovnact_handle_svc_check *sc OVS_UNUSED)
 {
 }
 
+static void
+parse_fwd_group_action(struct action_context *ctx)
+{
+    char *child_port, **child_port_list = NULL;
+    size_t allocated_ports = 0;
+    size_t n_child_ports = 0;
+    bool liveness = false;
+
+    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
+        if (lexer_match_id(ctx->lexer, "liveness")) {
+            if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+                return;
+            }
+            if (ctx->lexer->token.type != LEX_T_STRING) {
+                lexer_syntax_error(ctx->lexer,
+                                   "expecting true/false");
+                return;
+            }
+            if (!strcmp(ctx->lexer->token.s, "true")) {
+                liveness = true;
+                lexer_get(ctx->lexer);
+            }
+            lexer_force_match(ctx->lexer, LEX_T_COMMA);
+        }
+        if (lexer_match_id(ctx->lexer, "childports")) {
+            if (!lexer_force_match(ctx->lexer, LEX_T_EQUALS)) {
+                return;
+            }
+            while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+                if (ctx->lexer->token.type != LEX_T_STRING) {
+                    lexer_syntax_error(ctx->lexer,
+                                       "expecting logical switch port");
+                    if (child_port_list) {
+                        free(child_port_list);
+                    }
+                    return;
+                }
+                /* Parse child's logical ports */
+                child_port = xstrdup(ctx->lexer->token.s);
+                lexer_get(ctx->lexer);
+                lexer_match(ctx->lexer, LEX_T_COMMA);
+
+                if (n_child_ports >= allocated_ports) {
+                    child_port_list = x2nrealloc(child_port_list,
+                                                 &allocated_ports,
+                                                 sizeof *child_port_list);
+                }
+                child_port_list[n_child_ports++] = child_port;
+            }
+        }
+    }
+
+    struct ovnact_fwd_group *fwd_group = ovnact_put_FWD_GROUP(ctx->ovnacts);
+    fwd_group->ltable = ctx->pp->cur_ltable + 1;
+    fwd_group->liveness = liveness;
+    fwd_group->child_ports = child_port_list;
+    fwd_group->n_child_ports = n_child_ports;
+}
+
+static void
+format_FWD_GROUP(const struct ovnact_fwd_group *fwd_group, struct ds *s)
+{
+    ds_put_cstr(s, "fwd_group(");
+    if (fwd_group->liveness) {
+        ds_put_cstr(s, "liveness=true,");
+    }
+    if (fwd_group->n_child_ports) {
+        for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
+            if (i) {
+                ds_put_cstr(s, ", ");
+            }
+
+            ds_put_format(s, "childports=%s", fwd_group->child_ports[i]);
+        }
+    }
+    ds_put_cstr(s, ");");
+}
+
+static void
+encode_FWD_GROUP(const struct ovnact_fwd_group *fwd_group,
+                 const struct ovnact_encode_params *ep,
+                 struct ofpbuf *ofpacts)
+{
+    if (!fwd_group->n_child_ports) {
+        /* Nothing to do without child ports */
+        return;
+    }
+
+    uint32_t reg_index = MFF_LOG_OUTPORT - MFF_REG0;
+    struct ds ds = DS_EMPTY_INITIALIZER;
+
+    ds_put_format(&ds, "type=select,selection_method=dp_hash");
+
+    for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
+        uint32_t  port_tunnel_key;
+        ofp_port_t ofport;
+
+        const char *port_name = fwd_group->child_ports[i];
+
+        /* Find the tunnel key of the logical port */
+        if (!ep->lookup_port(ep->aux, port_name, &port_tunnel_key)) {
+            return;
+        }
+        ds_put_format(&ds, ",bucket=");
+
+        if (fwd_group->liveness) {
+            /* Find the openflow port number of the tunnel port */
+            if (!ep->tunnel_ofport(ep->aux, port_name, &ofport)) {
+                return;
+            }
+
+            /* Watch port for failure, used with BFD */
+            ds_put_format(&ds, "watch_port:%d,", ofport);
+        }
+
+        ds_put_format(&ds, "load=0x%d->NXM_NX_REG%d[0..15]",
+                      port_tunnel_key, reg_index);
+        ds_put_format(&ds, ",resubmit(,%d)", ep->output_ptable);
+    }
+
+    uint32_t table_id = 0;
+    struct ofpact_group *og;
+    table_id = ovn_extend_table_assign_id(ep->group_table, ds_cstr(&ds),
+                                          ep->lflow_uuid);
+    ds_destroy(&ds);
+    if (table_id == EXT_TABLE_ID_INVALID) {
+        return;
+    }
+
+    /* Create an action to set the group */
+    og = ofpact_put_GROUP(ofpacts);
+    og->group_id = table_id;
+}
+
+static void
+ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
+{
+    free(fwd_group->child_ports);
+}
+
 /* Parses an assignment or exchange or put_dhcp_opts action. */
 static void
 parse_set_action(struct action_context *ctx)
@@ -3110,6 +3250,8 @@ parse_action(struct action_context *ctx)
         parse_bind_vport(ctx);
     } else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
         parse_handle_svc_check(ctx);
+    } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
+        parse_fwd_group_action(ctx);
     } else {
         lexer_syntax_error(ctx->lexer, "expecting action");
     }
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index b1e782e..14a615b 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -5428,6 +5428,61 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
 }
 
 static void
+build_fwd_group_lflows(struct ovn_datapath *od, struct hmap *lflows)
+{
+    struct ds match = DS_EMPTY_INITIALIZER;
+    struct ds actions = DS_EMPTY_INITIALIZER;
+
+    for (int i = 0; i < od->nbs->n_forwarding_groups; ++i) {
+        const struct nbrec_forwarding_group *fwd_group = NULL;
+        fwd_group = od->nbs->forwarding_groups[i];
+        if (!fwd_group || (fwd_group->n_child_port == 0)) {
+            continue;
+        }
+
+        /* ARP responder for the forwarding group's virtual IP */
+        ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
+                      fwd_group->vip);
+        ds_put_format(&actions,
+            "eth.dst = eth.src; "
+            "eth.src = %s; "
+            "arp.op = 2; /* ARP reply */ "
+            "arp.tha = arp.sha; "
+            "arp.sha = %s; "
+            "arp.tpa = arp.spa; "
+            "arp.spa = %s; "
+            "outport = inport; "
+            "flags.loopback = 1; "
+            "output;",
+            fwd_group->vmac, fwd_group->vmac, fwd_group->vip);
+
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 50,
+                      ds_cstr(&match), ds_cstr(&actions));
+
+        /* L2 lookup for the forwarding group's virtual MAC */
+        ds_clear(&match);
+        ds_put_format(&match, "eth.dst == %s", fwd_group->vmac);
+
+        /* Create a comma separated string of child ports */
+        struct ds group_ports = DS_EMPTY_INITIALIZER;
+        if (fwd_group->liveness) {
+            ds_put_cstr(&group_ports, "liveness=\"true\",");
+        }
+        ds_put_cstr(&group_ports, "childports=");
+        for (i = 0; i < (fwd_group->n_child_port - 1); ++i) {
+            ds_put_format(&group_ports, "\"%s\",", fwd_group->child_port[i]);
+        }
+        ds_put_format(&group_ports, "\"%s\"",
+                      fwd_group->child_port[fwd_group->n_child_port - 1]);
+
+        ds_clear(&actions);
+        ds_put_format(&actions, "fwd_group(%s);", ds_cstr(&group_ports));
+        ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 50,
+                      ds_cstr(&match), ds_cstr(&actions));
+    }
+}
+
+static void
 build_lrouter_groups__(struct hmap *ports, struct ovn_datapath *od)
 {
     ovs_assert((od && od->nbr && od->lr_group));
@@ -5727,6 +5782,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
         build_stateful(od, lflows, lbs);
     }
 
+    /* Build logical flows for the forwarding groups */
+    HMAP_FOR_EACH (od, key_node, datapaths) {
+        if (!od->nbs || !od->nbs->n_forwarding_groups) {
+            continue;
+        }
+
+        build_fwd_group_lflows(od, lflows);
+    }
+
     /* Logical switch ingress table 0: Admission control framework (priority
      * 100). */
     HMAP_FOR_EACH (od, key_node, datapaths) {
diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
index c5c40a8..89f1a87 100644
--- a/utilities/ovn-trace.c
+++ b/utilities/ovn-trace.c
@@ -2291,6 +2291,9 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
 
         case OVNACT_HANDLE_SVC_CHECK:
             break;
+
+        case OVNACT_FWD_GROUP:
+            break;
         }
     }
     ds_destroy(&s);
-- 
1.8.3.1



More information about the dev mailing list