[ovs-dev] [PATCH ovs V2 12/21] dpif-netlink: Use netdev flow put api to insert a flow
Paul Blakey
paulb at mellanox.com
Tue Jan 10 15:50:41 UTC 2017
On 06/01/2017 01:25, Joe Stringer wrote:
> On 25 December 2016 at 03:39, Paul Blakey <paulb at mellanox.com> wrote:
>> Using the new netdev flow api operate will now try and
>> offload flows to the relevant netdev of the input port.
>> Other operate methods flows will come in later patches.
>>
>> Signed-off-by: Paul Blakey <paulb at mellanox.com>
>> Reviewed-by: Roi Dayan <roid at mellanox.com>
>> ---
>> lib/dpif-netlink.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 228 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 3d8940e..717af90 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -1908,15 +1908,239 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
>> return n_ops;
>> }
>>
>> +static int
>> +parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len,
>> + const struct nlattr *mask, size_t mask_len,
>> + struct match *match)
>> +{
>> + enum odp_key_fitness fitness;
>> +
>> + fitness = odp_flow_key_to_flow(key, key_len, &match->flow);
>> + if (fitness) {
>> + /* This should not happen: it indicates that odp_flow_key_from_flow()
>> + * and odp_flow_key_to_flow() disagree on the acceptable form of a
>> + * flow. Log the problem as an error, with enough details to enable
>> + * debugging. */
>> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +
>> + if (!VLOG_DROP_ERR(&rl)) {
>> + struct ds s;
>> +
>> + ds_init(&s);
>> + odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
>> + VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
>> + ds_destroy(&s);
>> + }
>> +
>> + return EINVAL;
>> + }
>> +
>> + fitness = odp_flow_key_to_mask(mask, mask_len, &match->wc, &match->flow);
>> + if (fitness) {
>> + /* This should not happen: it indicates that
>> + * odp_flow_key_from_mask() and odp_flow_key_to_mask()
>> + * disagree on the acceptable form of a mask. Log the problem
>> + * as an error, with enough details to enable debugging. */
>> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +
>> + if (!VLOG_DROP_ERR(&rl)) {
>> + struct ds s;
>> +
>> + VLOG_ERR("internal error parsing flow mask %s (%s)",
>> + ds_cstr(&s), odp_key_fitness_to_string(fitness));
>> + ds_destroy(&s);
>> + }
>> +
>> + return EINVAL;
>> + }
>> +
>> + return 0;
>> +}
> Duplicated code. Please refactor and reuse. (Separate patch for
> refactoring to keep the patches clear).
>
>> +
>> +static bool
>> +parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
>> +{
>> + struct match match;
>> + odp_port_t in_port;
>> + const struct nlattr *nla;
>> + size_t left;
>> + int outputs = 0;
>> + struct ofpbuf buf;
>> + uint64_t act_stub[1024 / 8];
>> + size_t offset;
>> + struct nlattr *act;
>> + struct netdev *dev;
>> + int err;
>> +
>> + /* 0x1234 - fake eth type sent to probe feature */
>> + if (put->flags & DPIF_FP_PROBE || match.flow.dl_type == htons(0x1234)) {
>> + return false;
>> + }
> Shouldn't DPIF_FP_PROBE be sufficient?
I guess, I think we added it because we still got some probe features. I've
checked it now and I couldn't find any.
>> +
>> + if (parse_key_and_mask_to_match(put->key, put->key_len, put->mask,
>> + put->mask_len, &match)) {
>> + return false;
>> + }
>> +
>> + in_port = match.flow.in_port.odp_port;
>> + ofpbuf_use_stub(&buf, act_stub, sizeof act_stub);
>> + offset = nl_msg_start_nested(&buf, OVS_FLOW_ATTR_ACTIONS);
>> + NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
>> + if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>> + struct netdev *outdev;
>> + int ifindex_out;
>> + const struct netdev_tunnel_config *tnl_cfg;
>> + size_t out_off;
>> + odp_port_t out_port;
>> +
>> + outputs++;
>> + if (outputs > 1) {
>> + break;
>> + }
>> +
>> + out_port = nl_attr_get_u32(nla);
>> + outdev = netdev_hmap_port_get(out_port, dpif->dpif.dpif_class);
>> + tnl_cfg = netdev_get_tunnel_config(outdev);
>> +
>> + out_off = nl_msg_start_nested(&buf, OVS_ACTION_ATTR_OUTPUT);
>> + ifindex_out = netdev_get_ifindex(outdev);
> Can this fail? (port not backed by device with ifindex?)
>
>> + nl_msg_put_u32(&buf, OVS_ACTION_ATTR_OUTPUT, ifindex_out);
>> + if (tnl_cfg && tnl_cfg->dst_port != 0) {
>> + nl_msg_put_u32(&buf, OVS_TUNNEL_KEY_ATTR_TP_DST, tnl_cfg->dst_port);
>> + }
>> + nl_msg_end_nested(&buf, out_off);
>> +
>> + if (outdev) {
>> + netdev_close(outdev);
>> + }
>> + } else {
>> + nl_msg_put_unspec(&buf, nl_attr_type(nla), nl_attr_get(nla),
>> + nl_attr_get_size(nla));
>> + }
>> + }
>> + nl_msg_end_nested(&buf, offset);
> Hmm. I'm a bit uneasy about this whole copying/rewriting of the
> actions. Firstly, the tunnel stuff just looks wrong. A quick "git
> grep" shows that OVS_TUNNEL_KEY_ATTR_TP_DST is using the wrong type,
> and looking closer at the other places it is used, it looks like this
> function is shortcutting a whole bunch of the tunnel config.
Besides the wrong type, I'm not sure what you mean, dpif_netlink_port_add__
seems to access the tunnel dst port the same way.
>
> I recognise that the ODP port number needs to be translated into an
> ifindex. Do we need to do anything else?
tunnel dst port for set action was missing in ACTION_SET , TUNNEL pair.
We needed that for offloading.
> Also, these actions are only going to end up getting translated again,
> into the actual tc format so is there a way we can get rid of this
> extra piece?
>
> 'buf' is used with ofpbuf_use_stub(), and the comment says that the
> caller must call ofpbuf_uninit() on the buffer in case it was
> reallocated with malloc'd memory.
>
>> +
>> + if (outputs > 1) {
>> + return false;
>> + }
>> +
>> + act = ofpbuf_at_assert(&buf, offset, sizeof(struct nlattr));
>> + dev = netdev_hmap_port_get(in_port, dpif->dpif.dpif_class);
>> + err = netdev_flow_put(dev, &match, CONST_CAST(struct nlattr *,
>> + nl_attr_get(act)),
>> + nl_attr_get_size(act), put->stats,
>> + CONST_CAST(ovs_u128 *, put->ufid));
>> + netdev_close(dev);
>> +
>> + if (!err) {
>> + if (put->flags & DPIF_FP_MODIFY) {
>> + struct dpif_op *opp;
>> + struct dpif_op op;
>> +
>> + op.type = DPIF_OP_FLOW_DEL;
>> + op.u.flow_del.key = put->key;
>> + op.u.flow_del.key_len = put->key_len;
>> + op.u.flow_del.ufid = put->ufid;
>> + op.u.flow_del.pmd_id = put->pmd_id;
>> + op.u.flow_del.stats = NULL;
>> + op.u.flow_del.terse = false;
>> +
>> + opp = &op;
>> + dpif_netlink_operate__(dpif, &opp, 1);
> Won't this just delete the flow that was just added?
The above FLOW_DEL calls the original operate (without offload) to delete it
if it from netlink kernel datapath. This will happen if we got a modify
request and we now
can offload the flow and previously couldn't (so its in kernel datapath).
> Is there a modify at the tc layer?
Yes there is, and implemented by given a already used handle.
So if we find the ufid in the map (ufid to handle/prio) already, we
modify it by using
the same handle.
>> + }
>> + VLOG_DBG("added flow");
>> + return true;
>> + }
>> + VLOG_DBG("failed adding flow: %s", ovs_strerror(err));
>> +
>> + return false;
>> +}
>> +
>> +static void
>> +dbg_print_flow(const struct nlattr *key, size_t key_len,
>> + const struct nlattr *mask, size_t mask_len,
>> + const struct nlattr *actions, size_t actions_len,
>> + const ovs_u128 *ufid,
>> + const char *op)
>> +{
>> + struct ds s;
>> +
>> + ds_init(&s);
>> + ds_put_cstr(&s, op);
>> + ds_put_cstr(&s, " (");
>> + odp_format_ufid(ufid, &s);
>> + ds_put_cstr(&s, ")");
>> + if (key_len) {
>> + ds_put_cstr(&s, "\nflow (verbose): ");
>> + odp_flow_format(key, key_len, mask, mask_len, NULL, &s, true);
>> + ds_put_cstr(&s, "\nflow: ");
>> + odp_flow_format(key, key_len, mask, mask_len, NULL, &s, false);
>> + }
>> + if (actions_len) {
>> + ds_put_cstr(&s, "\nactions: ");
>> + format_odp_actions(&s, actions, actions_len);
>> + }
>> + VLOG_DBG("\n%s", ds_cstr(&s));
>> + ds_destroy(&s);
>> +}
>> +
>> +static bool
>> +try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
>> +{
>> + switch (op->type) {
>> + case DPIF_OP_FLOW_PUT: {
>> + struct dpif_flow_put *put = &op->u.flow_put;
>> +
>> + if (!put->ufid) {
>> + return false;
>> + }
>> + dbg_print_flow(put->key, put->key_len, put->mask, put->mask_len,
>> + put->actions, put->actions_len, put->ufid, "PUT");
>> + return parse_flow_put(dpif, put);
>> + }
>> + case DPIF_OP_FLOW_DEL:
>> + case DPIF_OP_FLOW_GET:
>> + case DPIF_OP_EXECUTE:
>> + default:
> Do you never delete from hardware?
Yes its in later patch in the patch set. we split those
logically.
>
>> + break;
>> + }
>> + return false;
>> +}
>> +
>> static void
>> dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
>> {
>> struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
>> + struct dpif_op **new_ops;
>> + int n_new_ops = 0;
>> + int i = 0;
>> +
>> + if (!netdev_flow_api_enabled) {
> In general, it is easier to read if (foo) {...} else {...} rather than
> if(!foo) {...} else {...}.
>
>> + new_ops = ops;
>> + n_new_ops = n_ops;
>> + } else {
>> + new_ops = xmalloc((sizeof *new_ops) * n_ops);
>> + n_new_ops = 0;
>> +
>> + for (i = 0; i < n_ops; i++) {
>> + if (try_send_to_netdev(dpif, ops[i])) {
>> + ops[i]->error = 0;
>> + } else {
>> + new_ops[n_new_ops++] = ops[i];
>> + }
>> + }
>> + ops = new_ops;
> Please don't overwrite function parameters.
>
>> + }
>> +
>> + while (n_new_ops > 0) {
>> + size_t chunk = dpif_netlink_operate__(dpif, new_ops, n_new_ops);
>>
>> - while (n_ops > 0) {
>> - size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
>> - ops += chunk;
>> - n_ops -= chunk;
>> + new_ops += chunk;
>> + n_new_ops -= chunk;
>> + }
>> +
>> + if (netdev_flow_api_enabled) {
>> + free(ops);
>> }
>> }
> I think you can get rid of the malloc/free by using a style closer to
> the one at the beginning of push_dp_ops() in ofproto-dpif-upcall.c.
More information about the dev
mailing list