[ovs-dev] [PATCH v2 ovn 2/3] Forwarding group to load balance l2 traffic with liveness detection

Numan Siddique numans at ovn.org
Mon Jan 20 15:34:50 UTC 2020


On Mon, Jan 20, 2020 at 8:16 PM Manoj Sharma <manoj.sharma at nutanix.com> wrote:
>
> Hi Numan,
>
> Please see my answers inline.
>
> From: Numan Siddique <numans at ovn.org>
> Date: Thursday, January 16, 2020 at 7:53 AM
> To: Manoj Sharma <manoj.sharma at nutanix.com>
> Cc: "ovs-dev at openvswitch.org" <ovs-dev at openvswitch.org>
> Subject: Re: [ovs-dev] [PATCH v2 ovn 2/3] Forwarding group to load balance l2 traffic with liveness detection
>
>
>
> On Sat, Jan 11, 2020 at 6:11 AM Manoj Sharma <manoj.sharma at nutanix.com<mailto:manoj.sharma at nutanix.com>> wrote:
> Add forwarding group support for a logical switch. It will add a new OVN
> action "fwd_group" with a virtual IP and virtual MAC. Any number of logical
> switch ports of this switch can be added to this forwarding group.
> If traffic has to be load balanced across these logical switch ports then
> traffic should be sent to forwarding group's virtual IP.
>
> If the logical switch ports correspond to tunnel interfaces and BFD
> can be enabled on these tunnel interfaces, then liveness can be enabled
> for the forwarding group so that if any of the tunnel is down then
> the corresponding logical switch port will not be selected during
> load balancing.
>
> Signed-off-by: Manoj Sharma <manoj.sharma at nutanix.com<mailto:manoj.sharma at nutanix.com>>
>
> Hi Manoj,
>
> As I mentioned in the earlier email, please add the detailed description in patch 0 to this patch.
>
> How about defining the action - fwd_group as - fwd_group(liveliness=true, childports=p1,p2,...) ?
>
> Normally in the OVN action, we use ";" for inner OpenFlow actions, but these are more like arguments
> to the action - fwd_group.
>
> Thank you for the suggestion. I am sending the new changes in v3.
>
> In your setup, you would manually set the child port's chassis yourself ?
> i.e ovn-sbctl lsp-bind <child port> <external router chassis>  right ?
>
> Yes, if the chassis is not running ovn-controller.
>
> You need to add the relevant documentation in ovn-northd.8.xml for the logical flows added
>
> You need to add documentation for the action - fwd_group in ovn-sb.xml.
>
> Also please update the NEWS file about this feature.
>
> Done
>
> Please see below for few comments.
>
> ---
>  controller/lflow.c    |  20 +++++++++
>  controller/physical.c |  13 ++++++
>  controller/physical.h |   4 ++
>  include/ovn/actions.h |  19 +++++++-
>  lib/actions.c         | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  northd/ovn-northd.c   |  63 ++++++++++++++++++++++++++
>  utilities/ovn-trace.c |   3 ++
>  7 files changed, 243 insertions(+), 1 deletion(-)
>
> diff --git a/controller/lflow.c b/controller/lflow.c
> index a689320..49dfa06 100644
> --- a/controller/lflow.c
> +++ b/controller/lflow.c
> @@ -103,6 +103,25 @@ lookup_port_cb(const void *aux_, const char *port_name, unsigned int *portp)
>      return false;
>  }
>
> +/* Given the OVN port name, get its openflow port */
> +static bool
> +tunnel_ofport_cb(const void *aux_, const char *port_name, ofp_port_t *ofport)
> +{
> +    const struct lookup_port_aux *aux = aux_;
> +
> +    const struct sbrec_port_binding *pb
> +        = lport_lookup_by_name(aux->sbrec_port_binding_by_name, port_name);
> +    if (!pb || (pb->datapath != aux->dp) || !pb->chassis) {
> +        return false;
> +    }
> +
> +    if (!get_tunnel_ofport(pb->chassis->name, NULL, ofport)) {
> +        return false;
> +    }
> +
> +    return true;
> +}
> +
>  static bool
>  is_chassis_resident_cb(const void *c_aux_, const char *port_name)
>  {
> @@ -681,6 +700,7 @@ consider_logical_flow(
>      struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
>      struct ovnact_encode_params ep = {
>          .lookup_port = lookup_port_cb,
> +        .tunnel_ofport = tunnel_ofport_cb,
>          .aux = &aux,
>          .is_switch = is_switch(ldp),
>          .group_table = group_table,
> diff --git a/controller/physical.c b/controller/physical.c
> index 500d419..af1d10f 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -1794,3 +1794,16 @@ physical_run(struct ovsdb_idl_index *sbrec_port_binding_by_name,
>
>      simap_destroy(&new_tunnel_to_ofport);
>  }
> +
> +bool
> +get_tunnel_ofport(const char *chassis_name, char *encap_ip, ofp_port_t *ofport)
> +{
> +    struct chassis_tunnel *tun = NULL;
> +    tun = chassis_tunnel_find(chassis_name, encap_ip);
> +    if (!tun) {
> +        return false;
> +    }
> +
> +    *ofport = tun->ofport;
> +    return true;
> +}
> diff --git a/controller/physical.h b/controller/physical.h
> index c93f6b1..c0e17cd 100644
> --- a/controller/physical.h
> +++ b/controller/physical.h
> @@ -72,4 +72,8 @@ void physical_handle_mc_group_changes(
>          const struct simap *ct_zones,
>          const struct hmap *local_datapaths,
>          struct ovn_desired_flow_table *);
> +bool get_tunnel_ofport(
> +        const char *chassis_name,
> +        char *encap_ip,
> +        ofp_port_t *ofport);
>  #endif /* controller/physical.h */
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 047a8d7..2d39239 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -89,7 +89,8 @@ struct ovn_extend_table;
>      OVNACT(CHECK_PKT_LARGER,  ovnact_check_pkt_larger) \
>      OVNACT(TRIGGER_EVENT,     ovnact_controller_event) \
>      OVNACT(BIND_VPORT,        ovnact_bind_vport)       \
> -    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check)
> +    OVNACT(HANDLE_SVC_CHECK,  ovnact_handle_svc_check) \
> +    OVNACT(FWD_GROUP,         ovnact_fwd_group)
>
>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>  enum OVS_PACKED_ENUM ovnact_type {
> @@ -359,6 +360,15 @@ struct ovnact_handle_svc_check {
>      struct expr_field port;     /* Logical port name. */
>  };
>
> +/* OVNACT_FWD_GROUP. */
> +struct ovnact_fwd_group {
> +    struct ovnact ovnact;
> +    bool liveness;
> +    char **child_ports;       /* Logical ports */
> +    size_t n_child_ports;
> +    uint8_t ltable;           /* Logical table ID of next table. */
> +};
> +
>  /* Internal use by the helpers below. */
>  void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
>  void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
> @@ -620,6 +630,13 @@ struct ovnact_encode_params {
>       * '*portp' and returns true; otherwise, returns false. */
>      bool (*lookup_port)(const void *aux, const char *port_name,
>                          unsigned int *portp);
> +
> +    /* Looks up tunnel port to a chassis by its port name.  If found, stores
> +     * its openflow port number in '*ofport' and returns true;
> +     * otherwise, returns false. */
> +    bool (*tunnel_ofport)(const void *aux, const char *port_name,
> +                          ofp_port_t *ofport);
> +
>      const void *aux;
>
>      /* 'true' if the flow is for a switch. */
> diff --git a/lib/actions.c b/lib/actions.c
> index 051e6c8..73dade9 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -2854,6 +2854,126 @@ ovnact_handle_svc_check_free(struct ovnact_handle_svc_check *sc OVS_UNUSED)
>  {
>  }
>
> +static void
> +parse_fwd_group_action(struct action_context *ctx)
> +{
> +    char *child_port, **child_port_list = NULL;
> +    size_t allocated_ports = 0;
> +    size_t n_child_ports = 0;
> +    bool liveness = false;
> +
> +    if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> +        if (lexer_match_id(ctx->lexer, "liveness")) {
> +            liveness = true;
> +            lexer_force_match(ctx->lexer, LEX_T_SEMICOLON);
> +        }
> +        while (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> +            if (ctx->lexer->token.type != LEX_T_STRING) {
> +                lexer_syntax_error(ctx->lexer,
> +                                   "expecting logical switch port");
> +                return;
>
> I think there is a memory leak here, if it returns in the middle of parsing.
>
> It should get freed at the end of the command. I have not seen any explicit free in case of lexer error.

I mean the memory allocated via x2nrealloc() below for the
child_port_list and which gets assigned to fwd_group->child_ports.

I know that the function ovnact_fwd_group_free() would free this
memory at the end.
If in case parse_fwd_group_action() sets error via
lexer_syntax_error(), I don't think ovnact_fwd_group_free()
is called, since the parsed actions are not added via
ovnact_put_FWD_GROUP() to ovnacts buffer.

Can you please verify once.

Thanks
Numan

>
> +            }
> +            /* Parse child's logical ports */
> +            child_port = xstrdup(ctx->lexer->token.s);
> +            lexer_get(ctx->lexer);
> +            lexer_match(ctx->lexer, LEX_T_COMMA);
> +
> +            if (n_child_ports >= allocated_ports) {
> +                child_port_list = x2nrealloc(child_port_list, &allocated_ports,
> +                                             sizeof *child_port_list);
> +            }
> +            child_port_list[n_child_ports++] = child_port;
> +        }
> +    }
> +
> +    struct ovnact_fwd_group *fwd_group = ovnact_put_FWD_GROUP(ctx->ovnacts);
> +    fwd_group->ltable = ctx->pp->cur_ltable + 1;
> +    fwd_group->liveness = liveness;
> +    fwd_group->child_ports = child_port_list;
> +    fwd_group->n_child_ports = n_child_ports;
> +}
> +
> +static void
> +format_FWD_GROUP(const struct ovnact_fwd_group *fwd_group, struct ds *s)
> +{
> +    ds_put_cstr(s, "fwd_group(");
> +    if (fwd_group->liveness) {
> +        ds_put_cstr(s, "liveness;");
> +    }
> +    if (fwd_group->n_child_ports) {
> +        for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
> +            if (i) {
> +                ds_put_cstr(s, ", ");
> +            }
> +
> +            ds_put_format(s, "%s", fwd_group->child_ports[i]);
> +        }
> +    }
> +    ds_put_cstr(s, ");");
> +}
> +
> +static void
> +encode_FWD_GROUP(const struct ovnact_fwd_group *fwd_group,
> +                 const struct ovnact_encode_params *ep,
> +                 struct ofpbuf *ofpacts)
> +{
> +    if (!fwd_group->n_child_ports) {
> +        /* Nothing to do without child ports */
> +        return;
> +    }
> +
> +    uint32_t reg_index = MFF_LOG_OUTPORT - MFF_REG0;
> +    struct ds ds = DS_EMPTY_INITIALIZER;
> +
> +    ds_put_format(&ds, "type=select,selection_method=dp_hash");
> +
> +    for (size_t i = 0; i < fwd_group->n_child_ports; i++) {
> +        uint32_t  port_tunnel_key;
> +        ofp_port_t ofport;
> +
> +        const char *port_name = fwd_group->child_ports[i];
> +
> +        /* Find the tunnel key of the logical port */
> +        if (!ep->lookup_port(ep->aux, port_name, &port_tunnel_key)) {
> +            return;
> +        }
> +        ds_put_format(&ds, ",bucket=");
> +
> +        if (fwd_group->liveness) {
> +            /* Find the openflow port number of the tunnel port */
> +            if (!ep->tunnel_ofport(ep->aux, port_name, &ofport)) {
> +                return;
> +            }
> +
> +            /* Watch port for failure, used with BFD */
> +            ds_put_format(&ds, "watch_port:%d,", ofport);
> +        }
> +
> +        ds_put_format(&ds, "load=0x%d->NXM_NX_REG%d[0..15]",
> +                      port_tunnel_key, reg_index);
> +        ds_put_format(&ds, ",resubmit(,%d)", ep->output_ptable);
> +    }
> +
> +    uint32_t table_id = 0;
> +    struct ofpact_group *og;
> +    table_id = ovn_extend_table_assign_id(ep->group_table, ds_cstr(&ds),
> +                                          ep->lflow_uuid);
> +    ds_destroy(&ds);
> +    if (table_id == EXT_TABLE_ID_INVALID) {
> +        return;
> +    }
> +
> +    /* Create an action to set the group */
> +    og = ofpact_put_GROUP(ofpacts);
> +    og->group_id = table_id;
> +}
> +
> +static void
> +ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
> +{
> +    free(fwd_group->child_ports);
> +}
> +
>  /* Parses an assignment or exchange or put_dhcp_opts action. */
>  static void
>  parse_set_action(struct action_context *ctx)
> @@ -2973,6 +3093,8 @@ parse_action(struct action_context *ctx)
>          parse_bind_vport(ctx);
>      } else if (lexer_match_id(ctx->lexer, "handle_svc_check")) {
>          parse_handle_svc_check(ctx);
> +    } else if (lexer_match_id(ctx->lexer, "fwd_group")) {
> +        parse_fwd_group_action(ctx);
>      } else {
>          lexer_syntax_error(ctx->lexer, "expecting action");
>      }
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index b6dc809..29bb27a 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -5419,6 +5419,60 @@ build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
>  }
>
>  static void
> +build_fwd_group_lflows(struct ovn_datapath *od, struct hmap *lflows)
> +{
> +    struct ds match = DS_EMPTY_INITIALIZER;
> +    struct ds actions = DS_EMPTY_INITIALIZER;
> +
> +    for (int i = 0; i < od->nbs->n_forwarding_groups; ++i) {
> +        const struct nbrec_forwarding_group *fwd_group = NULL;
> +        fwd_group = od->nbs->forwarding_groups[i];
> +        if (!fwd_group || (fwd_group->n_child_port == 0)) {
> +            continue;
> +        }
> +
> +        /* ARP responder for the forwarding group's virtual IP */
> +        ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
> +                      fwd_group->vip);
> +        ds_put_format(&actions,
> +            "eth.dst = eth.src; "
> +            "eth.src = %s; "
> +            "arp.op = 2; /* ARP reply */ "
> +            "arp.tha = arp.sha; "
> +            "arp.sha = %s; "
> +            "arp.tpa = arp.spa; "
> +            "arp.spa = %s; "
> +            "outport = inport; "
> +            "flags.loopback = 1; "
> +            "output;",
> +            fwd_group->vmac, fwd_group->vmac, fwd_group->vip);
> +
> +        ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 50,
> +                      ds_cstr(&match), ds_cstr(&actions));
> +
> +        /* L2 lookup for the forwarding group's virtual MAC */
> +        ds_clear(&match);
> +        ds_put_format(&match, "eth.dst == %s", fwd_group->vmac);
> +
> +        /* Create a comma separated string of child ports */
> +        struct ds group_ports = DS_EMPTY_INITIALIZER;
> +        if (fwd_group->liveness) {
> +            ds_put_cstr(&group_ports, "liveness;");
> +        }
> +        for (i = 0; i < (fwd_group->n_child_port - 1); ++i) {
> +            ds_put_format(&group_ports, "\"%s\",", fwd_group->child_port[i]);
> +        }
> +        ds_put_format(&group_ports, "\"%s\"",
> +                      fwd_group->child_port[fwd_group->n_child_port - 1]);
> +
> +        ds_clear(&actions);
> +        ds_put_format(&actions, "fwd_group(%s);", group_ports.string);
>
> Can you please use "ds_cstr()" instead of directly accessing the string.
> Done
>
> +        ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 50,
> +                      ds_cstr(&match), ds_cstr(&actions));
> +    }
> +}
> +
> +static void
>  build_lrouter_groups__(struct hmap *ports, struct ovn_datapath *od)
>  {
>      ovs_assert((od && od->nbr && od->lr_group));
> @@ -5718,6 +5772,15 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
>          build_stateful(od, lflows, lbs);
>      }
>
> +    /* Build logical flows for the forwarding groups */
> +    HMAP_FOR_EACH (od, key_node, datapaths) {
> +        if (!od->nbs) {
> +            continue;
> +        }
> How about
>            if(!ods->nbs || !ods->n_forwarding_groups)  {
>                  continue;
>            }
> Right, done.
>
> +
> +        build_fwd_group_lflows(od, lflows);
> +    }
> +
>      /* Logical switch ingress table 0: Admission control framework (priority
>       * 100). */
>      HMAP_FOR_EACH (od, key_node, datapaths) {
> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> index 2645438..a40700a 100644
> --- a/utilities/ovn-trace.c
> +++ b/utilities/ovn-trace.c
> @@ -2224,6 +2224,9 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
>
>          case OVNACT_HANDLE_SVC_CHECK:
>              break;
> +
> +        case OVNACT_FWD_GROUP:
> +            break;
>
> It would be nice if you could add ovn-trace support for this if its possible.
> A follow up patch would do too. Otherwise this would become a technical debt  :).
>
> Sure, I will add it in separate patch.
>
>          }
>      }
>      ds_destroy(&s);
> --
> 1.8.3.1
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org<mailto:dev at openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev [mail.openvswitch.org]<https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwMFaQ&c=s883GpUCOChKOHiocYtGcg&r=9SN4tlEcxQzh-CvuC81YSi9I6ERNBOWQbg-05pnXlNw&m=qHMqdUI30B2OQvMDCELNR-4Qrn9KZl-ur1GFQQ7WQCU&s=P7MufuELa-MXYQMnFzNAITSeDY1Qyhmn37vnqoSQ0QE&e=>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list