[ovs-dev] [PATCH ovs V2 09/21] dpif-netlink: Dump netdevs flows on flow dump

Sun Jan 22 16:08:36 UTC 2017

On 05/01/2017 23:27, Joe Stringer wrote:
> On 25 December 2016 at 03:39, Paul Blakey <paulb at mellanox.com> wrote:
>> While dumping flows, dump flows that were offloaded to
>> netdev and parse them back to dpif flow.
>>
>> Signed-off-by: Paul Blakey <paulb at mellanox.com>
>> Reviewed-by: Roi Dayan <roid at mellanox.com>
>> ---
>>  lib/dpif-netlink.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 179 insertions(+)
>>
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 36f2888..3d8940e 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -38,6 +38,7 @@
>>  #include "flow.h"
>>  #include "fat-rwlock.h"
>>  #include "netdev.h"
>> +#include "netdev-provider.h"
>>  #include "netdev-linux.h"
>>  #include "netdev-vport.h"
>>  #include "netlink-conntrack.h"
>> @@ -55,6 +56,7 @@
>>  #include "unaligned.h"
>>  #include "util.h"
>>  #include "openvswitch/vlog.h"
>> +#include "openvswitch/match.h"
>>
>>  VLOG_DEFINE_THIS_MODULE(dpif_netlink);
>>  #ifdef _WIN32
>> @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX };
>>   * missing if we have old headers. */
>>  #define ETH_FLAG_LRO      (1 << 15)    /* LRO is enabled */
>>
>> +#define FLOW_DUMP_MAX_BATCH 50
>> +
>>  struct dpif_netlink_dp {
>>      /* Generic Netlink header. */
>>      uint8_t cmd;
>> @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump {
>>      struct dpif_flow_dump up;
>>      struct nl_dump nl_dump;
>>      atomic_int status;
>> +    struct netdev_flow_dump **netdev_dumps;
>> +    int netdev_num;
>> +    int netdev_given;
>> +    struct ovs_mutex netdev_lock;
>
> Could you add a brief comment above these variables that describes
> their use? (It's also common in OVS code to mention that, eg,
> netdev_lock protects the following elements. )
>
> If there's a more descriptive name than "netdev_num", like
> netdev_max_dumps or something then please use that instead. At a
> glance, "given" and "num" don't provide particularly much context
> about how they relate to each other or to the dump.
>
>>  };
>>
>>  static struct dpif_netlink_flow_dump *
>> @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump *dump)
>>      return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
>>  }
>>
>> +static void start_netdev_dump(const struct dpif *dpif_,
>> +                              struct dpif_netlink_flow_dump *dump) {
>> +
>> +    if (!netdev_flow_api_enabled) {
>> +        dump->netdev_num = 0;
>> +        return;
>> +    }
>
> Typically for style we still place all variable declarations at the
> top of a function, in a christmas tree long lines to short lines,
> before functional code like this.
>
>> +
>> +    struct netdev_list_element *element;
>> +    struct ovs_list port_list;
>> +    int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list);
>> +    int i = 0;
>> +
>> +    dump->netdev_dumps =
>> +        ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0;
>
> Can this be sizeof(dump->netdev_dumps)?
>
>> +    dump->netdev_num = ports;
>> +    dump->netdev_given = 0;
>> +
>> +    LIST_FOR_EACH(element, node, &port_list) {
>> +        dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev);
>> +        dump->netdev_dumps[i]->port = element->port_no;
>> +        i++;
>> +    }
>
> As a matter of style, it's easier to see that this loop is bounded by
> 'ports' (and that number is correct) if it's structured as
>
> for (i = 0; i < ports; i++) {
> element = get_next_node;
> ...
> }
>
> Also, it seems that even if the netdev doesn't support flow_dump, we
> allocate a netdev_flow_dump and add it to the netdev_dumps here..
> perhaps we could/should skip it for these netdevs instead?
>
>> +    netdev_port_list_del(&port_list);
>> +
>> +    ovs_mutex_init(&dump->netdev_lock);
>
> I don't see a corresponding ovs_mutex_destroy() call for this.
>
>> +}
>> +
>>  static struct dpif_flow_dump *
>>  dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
>>  {
>> @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
>>      atomic_init(&dump->status, 0);
>>      dump->up.terse = terse;
>>
>> +    start_netdev_dump(dpif_, dump);
>> +
>>      return &dump->up;
>>  }
>>
>> @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump *dump_)
>>      unsigned int nl_status = nl_dump_done(&dump->nl_dump);
>>      int dump_status;
>>
>> +    if (netdev_flow_api_enabled) {
>> +        for (int i = 0; i < dump->netdev_num; i++) {
>> +            int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
>> +            if (err != 0 && err != EOPNOTSUPP) {
>> +                VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
>> +            }
>> +        }
>> +        free(dump->netdev_dumps);
>> +    }
>
> You don't really need to check for netdev_flow_api_enabled here;
> netdev_num will be 0 if it is disabled, so that for loop turns into a
> no-op; then you could initialize dump->netdev_dumps to NULL in that
> case and unconditionally free it. It's a bit simpler to read the code
> if you don't have to think about whether or not hardware offloads are
> enabled.
>
>> +
>>      /* No other thread has access to 'dump' at this point. */
>>      atomic_read_relaxed(&dump->status, &dump_status);
>>      free(dump);
>> @@ -1410,6 +1458,11 @@ struct dpif_netlink_flow_dump_thread {
>>      struct dpif_flow_stats stats;
>>      struct ofpbuf nl_flows;     /* Always used to store flows. */
>>      struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
>> +    struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
>> +    struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
>> +    struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
>> +    int netdev_cur_dump;
>> +    bool netdev_done;
>
> I wonder if it's worthwhile to reuse 'nl_flows' to store all of these
> netlink-formatted key/mask/acts instead of having these keybufs? It
> seems that it is currently unused for the first half of the
> dpif_netlink_flow_dump while the flows are being dumped from the
> netdev.
>
> Regardless of the above question, I also question whether
> FLOW_DUMP_MAX_BATCH is too big for dumping from the kernel. How many
> tc flows will we really get from the kernel at once?

Hi,
Unlike dpif_netlink dumping nl_flows, which already gets the correct 
netlink stream format and can just take pointers to it, We need a 
temporary buffers to store the conversion from struct match to dpif_flow 
key/mask/actions netlink attributes.
So that's a KEY_SIZE*2*DUMP_MAX (size*(key+mask)*max) right there, same 
with actions (but probably can be smaller). We can't reuse nl_flows for 
that because that is used for nl_dump_next to keep the buffer filled 
with multiple replies.

The flow described in kernel datapath netlink format and tc format is 
should be about the same size but tc doesn't support getting only stats 
(terse). NL_DUMP_BUFSIZE should be enough for
~30 flows (20bytes for struct tcmsg, 32 for macs & masks, 12 for 
eth_type and ip_proto,  12 per port, actions and stats.... not all are 
present), But don't nl_dump_next refills it anyways (won't just stop at
buffer size).

<snip>