[ovs-dev] [PATCH ovs V2 12/21] dpif-netlink: Use netdev flow put api to insert a flow

Paul Blakey paulb at mellanox.com
Tue Jan 10 15:50:41 UTC 2017



On 06/01/2017 01:25, Joe Stringer wrote:
> On 25 December 2016 at 03:39, Paul Blakey <paulb at mellanox.com> wrote:
>> Using the new netdev flow api operate will now try and
>> offload flows to the relevant netdev of the input port.
>> Other operate methods flows will come in later patches.
>>
>> Signed-off-by: Paul Blakey <paulb at mellanox.com>
>> Reviewed-by: Roi Dayan <roid at mellanox.com>
>> ---
>>   lib/dpif-netlink.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 228 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 3d8940e..717af90 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -1908,15 +1908,239 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
>>       return n_ops;
>>   }
>>
>> +static int
>> +parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len,
>> +                            const struct nlattr *mask, size_t mask_len,
>> +                            struct match *match)
>> +{
>> +    enum odp_key_fitness fitness;
>> +
>> +    fitness = odp_flow_key_to_flow(key, key_len, &match->flow);
>> +    if (fitness) {
>> +        /* This should not happen: it indicates that odp_flow_key_from_flow()
>> +         * and odp_flow_key_to_flow() disagree on the acceptable form of a
>> +         * flow.  Log the problem as an error, with enough details to enable
>> +         * debugging. */
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +
>> +        if (!VLOG_DROP_ERR(&rl)) {
>> +            struct ds s;
>> +
>> +            ds_init(&s);
>> +            odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
>> +            VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
>> +            ds_destroy(&s);
>> +        }
>> +
>> +        return EINVAL;
>> +    }
>> +
>> +    fitness = odp_flow_key_to_mask(mask, mask_len, &match->wc, &match->flow);
>> +    if (fitness) {
>> +        /* This should not happen: it indicates that
>> +         * odp_flow_key_from_mask() and odp_flow_key_to_mask()
>> +         * disagree on the acceptable form of a mask.  Log the problem
>> +         * as an error, with enough details to enable debugging. */
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +
>> +        if (!VLOG_DROP_ERR(&rl)) {
>> +            struct ds s;
>> +
>> +            VLOG_ERR("internal error parsing flow mask %s (%s)",
>> +                     ds_cstr(&s), odp_key_fitness_to_string(fitness));
>> +            ds_destroy(&s);
>> +        }
>> +
>> +        return EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
> Duplicated code. Please refactor and reuse. (Separate patch for
> refactoring to keep the patches clear).
>
>> +
>> +static bool
>> +parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
>> +{
>> +    struct match match;
>> +    odp_port_t in_port;
>> +    const struct nlattr *nla;
>> +    size_t left;
>> +    int outputs = 0;
>> +    struct ofpbuf buf;
>> +    uint64_t act_stub[1024 / 8];
>> +    size_t offset;
>> +    struct nlattr *act;
>> +    struct netdev *dev;
>> +    int err;
>> +
>> +    /* 0x1234 - fake eth type sent to probe feature */
>> +    if (put->flags & DPIF_FP_PROBE || match.flow.dl_type == htons(0x1234)) {
>> +        return false;
>> +    }
> Shouldn't DPIF_FP_PROBE be sufficient?
I guess, I think we added it because we still got some probe features. I've
checked it now and I couldn't find any.
>> +
>> +    if (parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
>> +                                put->mask_len, &match)) {
>> +        return false;
>> +    }
>> +
>> +    in_port = match.flow.in_port.odp_port;
>> +    ofpbuf_use_stub(&buf, act_stub, sizeof act_stub);
>> +    offset = nl_msg_start_nested(&buf, OVS_FLOW_ATTR_ACTIONS);
>> +    NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>> +            struct netdev *outdev;
>> +            int ifindex_out;
>> +            const struct netdev_tunnel_config *tnl_cfg;
>> +            size_t out_off;
>> +            odp_port_t out_port;
>> +
>> +            outputs++;
>> +            if (outputs > 1) {
>> +                break;
>> +            }
>> +
>> +            out_port = nl_attr_get_u32(nla);
>> +            outdev = netdev_hmap_port_get(out_port, dpif->dpif.dpif_class);
>> +            tnl_cfg = netdev_get_tunnel_config(outdev);
>> +
>> +            out_off = nl_msg_start_nested(&buf, OVS_ACTION_ATTR_OUTPUT);
>> +            ifindex_out = netdev_get_ifindex(outdev);
> Can this fail? (port not backed by device with ifindex?)
>
>> +            nl_msg_put_u32(&buf, OVS_ACTION_ATTR_OUTPUT, ifindex_out);
>> +            if (tnl_cfg && tnl_cfg->dst_port != 0) {
>> +                nl_msg_put_u32(&buf, OVS_TUNNEL_KEY_ATTR_TP_DST, tnl_cfg->dst_port);
>> +            }
>> +            nl_msg_end_nested(&buf, out_off);
>> +
>> +            if (outdev) {
>> +                netdev_close(outdev);
>> +            }
>> +        } else {
>> +            nl_msg_put_unspec(&buf, nl_attr_type(nla), nl_attr_get(nla),
>> +                              nl_attr_get_size(nla));
>> +        }
>> +    }
>> +    nl_msg_end_nested(&buf, offset);
> Hmm. I'm a bit uneasy about this whole copying/rewriting of the
> actions. Firstly, the tunnel stuff just looks wrong. A quick "git
> grep" shows that OVS_TUNNEL_KEY_ATTR_TP_DST is using the wrong type,
> and looking closer at the other places it is used, it looks like this
> function is shortcutting a whole bunch of the tunnel config.
Besides the wrong type, I'm not sure what you mean, dpif_netlink_port_add__
seems to access the tunnel dst port the same way.
>
> I recognise that the ODP port number needs to be translated into an
> ifindex. Do we need to do anything else?
tunnel dst port for set action was missing in ACTION_SET ,  TUNNEL pair.
We needed that for offloading.
> Also, these actions are only going to end up getting translated again,
> into the actual tc format so is there a way we can get rid of this
> extra piece?
>
> 'buf' is used with ofpbuf_use_stub(), and the comment says that the
> caller must call ofpbuf_uninit() on the buffer in case it was
> reallocated with malloc'd memory.
>
>> +
>> +    if (outputs > 1) {
>> +        return false;
>> +    }
>> +
>> +    act = ofpbuf_at_assert(&buf, offset, sizeof(struct nlattr));
>> +    dev = netdev_hmap_port_get(in_port, dpif->dpif.dpif_class);
>> +    err = netdev_flow_put(dev, &match, CONST_CAST(struct nlattr *,
>> +                                                  nl_attr_get(act)),
>> +                          nl_attr_get_size(act), put->stats,
>> +                          CONST_CAST(ovs_u128 *, put->ufid));
>> +    netdev_close(dev);
>> +
>> +    if (!err) {
>> +        if (put->flags & DPIF_FP_MODIFY) {
>> +            struct dpif_op *opp;
>> +            struct dpif_op op;
>> +
>> +            op.type = DPIF_OP_FLOW_DEL;
>> +            op.u.flow_del.key = put->key;
>> +            op.u.flow_del.key_len = put->key_len;
>> +            op.u.flow_del.ufid = put->ufid;
>> +            op.u.flow_del.pmd_id = put->pmd_id;
>> +            op.u.flow_del.stats = NULL;
>> +            op.u.flow_del.terse = false;
>> +
>> +            opp = &op;
>> +            dpif_netlink_operate__(dpif, &opp, 1);
> Won't this just delete the flow that was just added?
The above FLOW_DEL calls the original operate (without offload) to delete it
if it from netlink kernel datapath. This will happen if we got a modify 
request and we now
can offload the flow and previously couldn't (so its in kernel datapath).
> Is there a modify at the tc layer?
Yes there is, and implemented by given a already used handle.
So if we find the ufid in the map (ufid to handle/prio) already, we 
modify it by using
the same handle.

>> +        }
>> +        VLOG_DBG("added flow");
>> +        return true;
>> +    }
>> +    VLOG_DBG("failed adding flow: %s", ovs_strerror(err));
>> +
>> +    return false;
>> +}
>> +
>> +static void
>> +dbg_print_flow(const struct nlattr *key, size_t key_len,
>> +               const struct nlattr *mask, size_t mask_len,
>> +               const struct nlattr *actions, size_t actions_len,
>> +               const ovs_u128 *ufid,
>> +               const char *op)
>> +{
>> +        struct ds s;
>> +
>> +        ds_init(&s);
>> +        ds_put_cstr(&s, op);
>> +        ds_put_cstr(&s, " (");
>> +        odp_format_ufid(ufid, &s);
>> +        ds_put_cstr(&s, ")");
>> +        if (key_len) {
>> +            ds_put_cstr(&s, "\nflow (verbose): ");
>> +            odp_flow_format(key, key_len, mask, mask_len, NULL, &s, true);
>> +            ds_put_cstr(&s, "\nflow: ");
>> +            odp_flow_format(key, key_len, mask, mask_len, NULL, &s, false);
>> +        }
>> +        if (actions_len) {
>> +            ds_put_cstr(&s, "\nactions: ");
>> +            format_odp_actions(&s, actions, actions_len);
>> +        }
>> +        VLOG_DBG("\n%s", ds_cstr(&s));
>> +        ds_destroy(&s);
>> +}
>> +
>> +static bool
>> +try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
>> +{
>> +    switch (op->type) {
>> +    case DPIF_OP_FLOW_PUT: {
>> +        struct dpif_flow_put *put = &op->u.flow_put;
>> +
>> +        if (!put->ufid) {
>> +            return false;
>> +        }
>> +        dbg_print_flow(put->key, put->key_len, put->mask, put->mask_len,
>> +                       put->actions, put->actions_len, put->ufid, "PUT");
>> +        return parse_flow_put(dpif, put);
>> +    }
>> +    case DPIF_OP_FLOW_DEL:
>> +    case DPIF_OP_FLOW_GET:
>> +    case DPIF_OP_EXECUTE:
>> +    default:
> Do you never delete from hardware?
Yes its in later patch in the patch set. we split those
logically.

>
>> +        break;
>> +    }
>> +    return false;
>> +}
>> +
>>   static void
>>   dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
>>   {
>>       struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
>> +    struct dpif_op **new_ops;
>> +    int n_new_ops = 0;
>> +    int i = 0;
>> +
>> +    if (!netdev_flow_api_enabled) {
> In general, it is easier to read if (foo) {...} else {...} rather than
> if(!foo) {...} else {...}.
>
>> +        new_ops = ops;
>> +        n_new_ops = n_ops;
>> +    } else {
>> +        new_ops = xmalloc((sizeof *new_ops) * n_ops);
>> +        n_new_ops = 0;
>> +
>> +        for (i = 0; i < n_ops; i++) {
>> +            if (try_send_to_netdev(dpif, ops[i])) {
>> +                ops[i]->error = 0;
>> +            } else {
>> +                new_ops[n_new_ops++] = ops[i];
>> +            }
>> +        }
>> +        ops = new_ops;
> Please don't overwrite function parameters.
>
>> +    }
>> +
>> +    while (n_new_ops > 0) {
>> +        size_t chunk = dpif_netlink_operate__(dpif, new_ops, n_new_ops);
>>
>> -    while (n_ops > 0) {
>> -        size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
>> -        ops += chunk;
>> -        n_ops -= chunk;
>> +        new_ops += chunk;
>> +        n_new_ops -= chunk;
>> +    }
>> +
>> +    if (netdev_flow_api_enabled) {
>> +       free(ops);
>>       }
>>   }
> I think you can get rid of the malloc/free by using a style closer to
> the one at the beginning of push_dp_ops() in ofproto-dpif-upcall.c.



More information about the dev mailing list