[ovs-dev] [PATCHv3] netdev-provider: Apply batch object to netdev provider.

Tue Jul 19 00:06:45 UTC 2016

Thanks for reviewing the patch. I've updated patchv4 at
https://patchwork.ozlabs.org/patch/649854/

At netdev_dpdk_send__(), we do need to introduce extra variables,
other than that, I've removed the rest of extra variables.

Regards,
William

On Thu, Jul 14, 2016 at 5:34 PM, Daniele Di Proietto
<diproiettod at ovn.org> wrote:
> Hi William,
>
> Thanks for the patch, it makes sense to me!
>
> In general I think it would be better to avoid introducing extra local
> variables like 'pkts' and 'c': I think it would be more readable and the
> compiler might not always be able to optimize them (I checked the assebly
> output in a couple of functions).
>
> Few comments inline, otherwise this looks good to me
>
> 2016-06-29 13:53 GMT-07:00 William Tu <u9012063 at gmail.com>:
>>
>> Commit 1895cc8dbb64 ("dpif-netdev: create batch object") introduces
>> batch process functions and 'struct dp_packet_batch' to associate with
>> batch-level metadata.  This patch applies the packet batch object to
>> the netdev provider interface (dummy, Linux, BSD, and DPDK) so that
>> batch APIs can be used in providers.  With batch metadata visible in
>> providers, optimizations can be introduced at per-batch level instead
>> of per-packet.
>>
>> Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/141178364
>> Signed-off-by: William Tu <u9012063 at gmail.com>
>> --
>> v2->v3:
>> - fix freebsd build issue
>> v1->v2:
>> - more descriptive commit message
>> ---
>>  lib/netdev-bsd.c      |  9 ++++--
>>  lib/netdev-dpdk.c     | 81
>> +++++++++++++++++++++++++--------------------------
>>  lib/netdev-dummy.c    |  9 ++++--
>>  lib/netdev-linux.c    |  9 ++++--
>>  lib/netdev-provider.h | 25 ++++++++--------
>>  lib/netdev.c          |  6 ++--
>>  6 files changed, 72 insertions(+), 67 deletions(-)
>>
>> diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
>> index 2e92d97..6ab5048 100644
>> --- a/lib/netdev-bsd.c
>> +++ b/lib/netdev-bsd.c
>> @@ -618,12 +618,13 @@ netdev_rxq_bsd_recv_tap(struct netdev_rxq_bsd *rxq,
>> struct dp_packet *buffer)
>>  }
>>
>>  static int
>> -netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
>> -                    int *c)
>> +netdev_bsd_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
>> *batch)
>>  {
>>      struct netdev_rxq_bsd *rxq = netdev_rxq_bsd_cast(rxq_);
>>      struct netdev *netdev = rxq->up.netdev;
>>      struct dp_packet *packet;
>> +    struct dp_packet **packets = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variable
>
>>
>>      ssize_t retval;
>>      int mtu;
>>
>> @@ -681,10 +682,12 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
>>   */
>>  static int
>>  netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
>> -                struct dp_packet **pkts, int cnt, bool may_steal)
>> +                struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
>>      const char *name = netdev_get_name(netdev_);
>> +    int cnt = batch->count;
>> +    struct dp_packet **pkts = batch->packets;
>
>
> extra variable
>
>>
>>      int error;
>>      int i;
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index 02e2c58..80ce233 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -1253,12 +1253,14 @@ netdev_dpdk_vhost_update_rx_counters(struct
>> netdev_stats *stats,
>>   */
>>  static int
>>  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>> -                           struct dp_packet **packets, int *c)
>> +                           struct dp_packet_batch *batch)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>>      int qid = rxq->queue_id;
>>      struct ingress_policer *policer =
>> netdev_dpdk_get_ingress_policer(dev);
>> +    struct dp_packet **packets = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variable
>
>>
>>      uint16_t nb_rx = 0;
>>      uint16_t dropped = 0;
>>
>> @@ -1294,12 +1296,13 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>>  }
>>
>>  static int
>> -netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet **packets,
>> -                     int *c)
>> +netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch
>> *batch)
>>  {
>>      struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq);
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
>>      struct ingress_policer *policer =
>> netdev_dpdk_get_ingress_policer(dev);
>> +    struct dp_packet **packets = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variable
>
>>
>>      int nb_rx;
>>      int dropped = 0;
>>
>> @@ -1373,11 +1376,13 @@ netdev_dpdk_vhost_update_tx_counters(struct
>> netdev_stats *stats,
>>
>>  static void
>>  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>> -                         struct dp_packet **pkts, int cnt,
>> +                         struct dp_packet_batch *batch,
>>                           bool may_steal)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>      struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>> +    struct dp_packet **pkts = batch->packets;
>> +    int cnt = batch->count;
>
>
> extra variable
>
>>
>>      struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
>>      unsigned int total_pkts = cnt;
>>      unsigned int qos_pkts = cnt;
>> @@ -1425,11 +1430,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
>> qid,
>>
>>  out:
>>      if (may_steal) {
>> -        int i;
>> -
>> -        for (i = 0; i < total_pkts; i++) {
>> -            dp_packet_delete(pkts[i]);
>> -        }
>> +        dp_packet_delete_batch(batch, may_steal);
>>      }
>
>
> As discussed offline the if becomes unnecessary.  Maybe it's more clear if
> we remove it?
>
>>
>>  }
>>
>> @@ -1464,18 +1465,19 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid,
>>
>>  /* Tx function. Transmit packets indefinitely */
>>  static void
>> -dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet **pkts,
>> -                int cnt)
>> -    OVS_NO_THREAD_SAFETY_ANALYSIS
>> +dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
>> *batch)
>> +OVS_NO_THREAD_SAFETY_ANALYSIS
>>  {
>>  #if !defined(__CHECKER__) && !defined(_WIN32)
>> -    const size_t PKT_ARRAY_SIZE = cnt;
>> +    const size_t PKT_ARRAY_SIZE = batch->count;
>>  #else
>>      /* Sparse or MSVC doesn't like variable length array. */
>>      enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
>>  #endif
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>      struct rte_mbuf *mbufs[PKT_ARRAY_SIZE];
>> +    struct dp_packet **pkts = batch->packets;
>> +    int cnt = batch->count;
>
>
> extra variables
>
>>
>>      int dropped = 0;
>>      int newcnt = 0;
>>      int i;
>> @@ -1519,7 +1521,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
>> struct dp_packet **pkts,
>>      }
>>
>>      if (dev->type == DPDK_DEV_VHOST) {
>> -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
>> mbufs, newcnt, true);
>> +        struct dp_packet_batch mbatch;
>> +
>> +        dp_packet_batch_init(&mbatch);
>> +        mbatch.count = newcnt;
>> +        memcpy(mbatch.packets, mbufs, newcnt * sizeof(struct rte_mbuf
>> *));
>> +        __netdev_dpdk_vhost_send(netdev, qid, &mbatch, true);
>
>
> Even though the performance here doesn't really matter tha much (we're on
> dpdk_do_tx_copy,
> meaning that the packet is coming from a non DPDK source or that we have
> multiple outputs),
> I'd still like to avoid the memcpy().
>
> Maybe we can keep the __netdev_dpdk_vhost_send signature as it is (keep the
> packet and count
> parameter separate) to avoid it?
>
>>
>>      } else {
>>          unsigned int qos_pkts = newcnt;
>>
>> @@ -1543,37 +1550,30 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
>> struct dp_packet **pkts,
>>  }
>>
>>  static int
>> -netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet
>> **pkts,
>> -                 int cnt, bool may_steal)
>> +netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>> +                       struct dp_packet_batch *batch,
>> +                       bool may_steal)
>>  {
>> -    if (OVS_UNLIKELY(pkts[0]->source != DPBUF_DPDK)) {
>> -        int i;
>>
>> -        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
>> +    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
>> +        dpdk_do_tx_copy(netdev, qid, batch);
>>          if (may_steal) {
>> -            for (i = 0; i < cnt; i++) {
>> -                dp_packet_delete(pkts[i]);
>> -            }
>> +            dp_packet_delete_batch(batch, may_steal);
>>          }
>>      } else {
>> -        int i;
>> -
>> -        for (i = 0; i < cnt; i++) {
>> -            int cutlen = dp_packet_get_cutlen(pkts[i]);
>> -
>> -            dp_packet_set_size(pkts[i], dp_packet_size(pkts[i]) -
>> cutlen);
>> -            dp_packet_reset_cutlen(pkts[i]);
>> -        }
>> -        __netdev_dpdk_vhost_send(netdev, qid, pkts, cnt, may_steal);
>> +        dp_packet_batch_apply_cutlen(batch);
>> +        __netdev_dpdk_vhost_send(netdev, qid, batch, may_steal);
>>      }
>>      return 0;
>>  }
>>
>>  static inline void
>>  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>> -                   struct dp_packet **pkts, int cnt, bool may_steal)
>> +                   struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      int i;
>> +    struct dp_packet **pkts = batch->packets;
>> +    int cnt = batch->count;
>
>
> extra variables
>
>>
>>
>>      if (OVS_UNLIKELY(dev->txq_needs_locking)) {
>>          qid = qid % dev->real_n_txq;
>> @@ -1584,12 +1584,10 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
>> qid,
>>                       pkts[0]->source != DPBUF_DPDK)) {
>>          struct netdev *netdev = &dev->up;
>>
>> -        dpdk_do_tx_copy(netdev, qid, pkts, cnt);
>> +        dpdk_do_tx_copy(netdev, qid, batch);
>>
>>          if (may_steal) {
>> -            for (i = 0; i < cnt; i++) {
>> -                dp_packet_delete(pkts[i]);
>> -            }
>> +            dp_packet_delete_batch(batch, may_steal);
>>          }
>>      } else {
>>          int next_tx_idx = 0;
>> @@ -1649,11 +1647,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
>> qid,
>>
>>  static int
>>  netdev_dpdk_eth_send(struct netdev *netdev, int qid,
>> -                     struct dp_packet **pkts, int cnt, bool may_steal)
>> +                     struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>>
>> -    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
>> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
>>      return 0;
>>  }
>>
>> @@ -2648,20 +2646,21 @@ dpdk_ring_open(const char dev_name[], unsigned int
>> *eth_port_id) OVS_REQUIRES(dp
>>
>>  static int
>>  netdev_dpdk_ring_send(struct netdev *netdev, int qid,
>> -                      struct dp_packet **pkts, int cnt, bool may_steal)
>> +                      struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>> +    struct dp_packet **pkts = batch->packets;
>
>
> extra variable
>
>>
>>      unsigned i;
>>
>>      /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure
>> that the
>>       * rss hash field is clear. This is because the same mbuf may be
>> modified by
>>       * the consumer of the ring and return into the datapath without
>> recalculating
>>       * the RSS hash. */
>> -    for (i = 0; i < cnt; i++) {
>> +    for (i = 0; i < batch->count; i++) {
>>          dp_packet_rss_invalidate(pkts[i]);
>>      }
>>
>> -    netdev_dpdk_send__(dev, qid, pkts, cnt, may_steal);
>> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
>>      return 0;
>>  }
>>
>> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
>> index 24c107e..6b1d3a3 100644
>> --- a/lib/netdev-dummy.c
>> +++ b/lib/netdev-dummy.c
>> @@ -964,12 +964,13 @@ netdev_dummy_rxq_dealloc(struct netdev_rxq *rxq_)
>>  }
>>
>>  static int
>> -netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **arr,
>> -                      int *c)
>> +netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
>> *batch)
>>  {
>>      struct netdev_rxq_dummy *rx = netdev_rxq_dummy_cast(rxq_);
>>      struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev);
>>      struct dp_packet *packet;
>> +    struct dp_packet **arr = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variables
>
>>
>>
>>      ovs_mutex_lock(&netdev->mutex);
>>      if (!ovs_list_is_empty(&rx->recv_queue)) {
>> @@ -1047,10 +1048,12 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_)
>>
>>  static int
>>  netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
>> -                  struct dp_packet **pkts, int cnt, bool may_steal)
>> +                  struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      struct netdev_dummy *dev = netdev_dummy_cast(netdev);
>>      int error = 0;
>> +    int cnt = batch->count;
>> +    struct dp_packet **pkts = batch->packets;
>
>
> extra variables
>
>>
>>      int i;
>>
>>      for (i = 0; i < cnt; i++) {
>> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>> index ef11d12..594d437 100644
>> --- a/lib/netdev-linux.c
>> +++ b/lib/netdev-linux.c
>> @@ -1091,12 +1091,13 @@ netdev_linux_rxq_recv_tap(int fd, struct dp_packet
>> *buffer)
>>  }
>>
>>  static int
>> -netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet
>> **packets,
>> -                      int *c)
>> +netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch
>> *batch)
>>  {
>>      struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
>>      struct netdev *netdev = rx->up.netdev;
>>      struct dp_packet *buffer;
>> +    struct dp_packet **packets = batch->packets;
>> +    int *c = &batch->count;
>
>
> extra variables
>
>>
>>      ssize_t retval;
>>      int mtu;
>>
>> @@ -1161,10 +1162,12 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
>>   * expected to do additional queuing of packets. */
>>  static int
>>  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
>> -                  struct dp_packet **pkts, int cnt, bool may_steal)
>> +                  struct dp_packet_batch *batch, bool may_steal)
>>  {
>>      int i;
>>      int error = 0;
>> +    int cnt = batch->count;
>> +    struct dp_packet **pkts = batch->packets;
>
>
> extra variables
>
>>
>>
>>      /* 'i' is incremented only if there's no error */
>>      for (i = 0; i < cnt;) {
>> diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
>> index 5da377f..8c7c8ea 100644
>> --- a/lib/netdev-provider.h
>> +++ b/lib/netdev-provider.h
>> @@ -340,8 +340,8 @@ struct netdev_class {
>>       * network device from being usefully used by the netdev-based
>> "userspace
>>       * datapath".  It will also prevent the OVS implementation of bonding
>> from
>>       * working properly over 'netdev'.) */
>> -    int (*send)(struct netdev *netdev, int qid, struct dp_packet
>> **buffers,
>> -                int cnt, bool may_steal);
>> +    int (*send)(struct netdev *netdev, int qid, struct dp_packet_batch
>> *batch,
>> +                bool may_steal);
>>
>>      /* Registers with the poll loop to wake up from the next call to
>>       * poll_block() when the packet transmission queue for 'netdev' has
>> @@ -727,25 +727,24 @@ struct netdev_class {
>>      void (*rxq_destruct)(struct netdev_rxq *);
>>      void (*rxq_dealloc)(struct netdev_rxq *);
>>
>> -    /* Attempts to receive a batch of packets from 'rx'.  The caller
>> supplies
>> -     * 'pkts' as the pointer to the beginning of an array of MAX_RX_BATCH
>> -     * pointers to dp_packet.  If successful, the implementation stores
>> -     * pointers to up to MAX_RX_BATCH dp_packets into the array,
>> transferring
>> -     * ownership of the packets to the caller, stores the number of
>> received
>> -     * packets into '*cnt', and returns 0.
>> +    /* Attempts to receive a batch of packets from 'batch'.  In batch,
>> the
>
>
> "packets from 'rx'."
>
>>
>> +     * caller supplies 'packets' as the pointer to the beginning of an
>> array
>> +     * of MAX_RX_BATCH pointers to dp_packet.  If successful, the
>> +     * implementation stores pointers to up to MAX_RX_BATCH dp_packets
>> into
>> +     * the array, transferring ownership of the packets to the caller,
>> stores
>> +     * the number of received packets into 'count', and returns 0.
>>       *
>>       * The implementation does not necessarily initialize any non-data
>> members
>> -     * of 'pkts'.  That is, the caller must initialize layer pointers and
>> -     * metadata itself, if desired, e.g. with pkt_metadata_init() and
>> -     * miniflow_extract().
>> +     * of 'packets' in 'batch'.  That is, the caller must initialize
>> layer
>> +     * pointers and metadata itself, if desired, e.g. with
>> pkt_metadata_init()
>> +     * and miniflow_extract().
>>       *
>>       * Implementations should allocate buffers with DP_NETDEV_HEADROOM
>> bytes of
>>       * headroom.
>>       *
>>       * Returns EAGAIN immediately if no packet is ready to be received or
>>       * another positive errno value if an error was encountered. */
>> -    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet **pkts,
>> -                    int *cnt);
>> +    int (*rxq_recv)(struct netdev_rxq *rx, struct dp_packet_batch
>> *batch);
>>
>>      /* Registers with the poll loop to wake up from the next call to
>>       * poll_block() when a packet is ready to be received with
>> diff --git a/lib/netdev.c b/lib/netdev.c
>> index 6651173..405bf41 100644
>> --- a/lib/netdev.c
>> +++ b/lib/netdev.c
>> @@ -625,7 +625,7 @@ netdev_rxq_recv(struct netdev_rxq *rx, struct
>> dp_packet_batch *batch)
>>  {
>>      int retval;
>>
>> -    retval = rx->netdev->netdev_class->rxq_recv(rx, batch->packets,
>> &batch->count);
>> +    retval = rx->netdev->netdev_class->rxq_recv(rx,  batch);
>>      if (!retval) {
>>          COVERAGE_INC(netdev_received);
>>      } else {
>> @@ -711,9 +711,7 @@ netdev_send(struct netdev *netdev, int qid, struct
>> dp_packet_batch *batch,
>>          return EOPNOTSUPP;
>>      }
>>
>> -    int error = netdev->netdev_class->send(netdev, qid,
>> -                                           batch->packets, batch->count,
>> -                                           may_steal);
>> +    int error = netdev->netdev_class->send(netdev, qid, batch,
>> may_steal);
>>      if (!error) {
>>          COVERAGE_INC(netdev_sent);
>>          if (!may_steal) {
>> --
>> 2.5.0
>>
>> _______________________________________________
>> dev mailing list
>> dev at openvswitch.org
>> http://openvswitch.org/mailman/listinfo/dev
>
>