[ovs-dev] [PATCH v2] netdev-dpdk: Execute QoS Checking before copying to mbuf

Stokes, Ian ian.stokes at intel.com
Sun Sep 3 10:46:24 UTC 2017


OK,

In my own testing I’m seeing the same behavior with a tap interface pinging to a bridge with a DPDK device. As such I think this should be ok.

Acked-by: ian.stokes at intel.com<mailto:ian.stokes at intel.com>

Ian

From: Gao Zhenyu [mailto:sysugaozhenyu at gmail.com]
Sent: Thursday, August 31, 2017 2:56 AM
To: Stokes, Ian <ian.stokes at intel.com>
Cc: dev at openvswitch.org
Subject: Re: [PATCH v2] netdev-dpdk: Execute QoS Checking before copying to mbuf

Here is the the testing I just did:
1. Add log in function netdev_dpdk_policer_pkt_handle
static inline bool
netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm *meter,
                               struct rte_mbuf *pkt, uint64_t time)
{
    uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
    VLOG_WARN("GGGG, payload_len:%u, rte_pkt_len:%u",
              pkt_len, rte_pktmbuf_pkt_len(pkt)); <------------------------print out pkt_len

    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len) ==
                                                e_RTE_METER_GREEN;
}
2. rebuild a rpm, install in machine, enable DPDK.
3. Create a container on the bridge(userspace bridge). (The container use veth device)
4. Config qos on eth dpdk eth by using : ovs-vsctl set port dpdk-uplink-eth3 qos=@newqos -- --id=@newqos create qos  type=egress-policer other-config:cir=1250000 other-config:cbs=2048
5. execute ping on the container and monitor ovs-vswitchd.log. Then I can see:

2017-08-31T01:39:28.036Z|00133|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
2017-08-31T01:39:29.036Z|00134|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
2017-08-31T01:39:30.036Z|00135|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
2017-08-31T01:39:31.036Z|00136|netdev_dpdk|WARN|GGGG, payload_len:88, rte_pkt_len:102
So the mbuf->pkt_len has a correct length.
On the code side, we have function dp_packet_set_size(), if ovs compiled with dpdk, this function set the mbuf.data_len and mbuf.pkt_len. Many functions consume dp_packet_set_size() like netdev_linux_rxq_recv_sock in Netdev-linux.c

#ifdef DPDK_NETDEV
......

static inline void
dp_packet_set_size(struct dp_packet *b, uint32_t v)
{
.....
    b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
    b->mbuf.pkt_len = v;             /* Total length of all segments linked to
                                      * this segment. */
}
Please let me know if you have any concern on it.

Thanks
Zhenyu Gao

2017-08-30 23:18 GMT+08:00 Stokes, Ian <ian.stokes at intel.com<mailto:ian.stokes at intel.com>>:
> In dpdk_do_tx_copy function, all packets were copied to mbuf first, but
> QoS checking may drop some of them.
> Move the QoS checking in front of copying data to mbuf, it helps to reduce
> useless copy.

Hi Zhenyu, thanks for the patch, I agree with the objective of the patch but have some comments below I'd like to see addressed/tested before signing off.

>
> Signed-off-by: Zhenyu Gao <sysugaozhenyu at gmail.com<mailto:sysugaozhenyu at gmail.com>>
> ---
>  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++++++++--------------
> -----
>  1 file changed, 36 insertions(+), 19 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8
> 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
>       * For all QoS implementations it should always be non-null.
>       */
>      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
> -                   int pkt_cnt);
> +                   int pkt_cnt, bool may_steal);
>  };
>
>  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -
> 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
> *meter,
>
>  static int
>  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> -                        struct rte_mbuf **pkts, int pkt_cnt)
> +                        struct rte_mbuf **pkts, int pkt_cnt,
> +                        bool may_steal)
>  {
>      int i = 0;
>      int cnt = 0;
> @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
>              }
>              cnt++;
>          } else {
> -            rte_pktmbuf_free(pkt);
> +            if (may_steal) {
> +                rte_pktmbuf_free(pkt);
> +            }
>          }
>      }
>
> @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> *meter,
>
>  static int
>  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> **pkts,
> -                    int pkt_cnt)
> +                    int pkt_cnt, bool may_steal)
>  {
>      int cnt = 0;
>
>      rte_spinlock_lock(&policer->policer_lock);
> -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
> +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> +                                  pkt_cnt, may_steal);
>      rte_spinlock_unlock(&policer->policer_lock);
>
>      return cnt;
> @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
>          dropped = nb_rx;
>          nb_rx = ingress_policer_run(policer,
>                                      (struct rte_mbuf **) batch->packets,
> -                                    nb_rx);
> +                                    nb_rx, true);
>          dropped -= nb_rx;
>      }
>
> @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> dp_packet_batch *batch)
>          dropped = nb_rx;
>          nb_rx = ingress_policer_run(policer,
>                                      (struct rte_mbuf **) batch->packets,
> -                                    nb_rx);
> +                                    nb_rx, true);
>          dropped -= nb_rx;
>      }
>
> @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> struct dp_packet_batch *batch)
>
>  static inline int
>  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> -                    int cnt)
> +                    int cnt, bool may_steal)
>  {
>      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-
> >qos_conf);
>
>      if (qos_conf) {
>          rte_spinlock_lock(&qos_conf->lock);
> -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
> +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
>          rte_spinlock_unlock(&qos_conf->lock);
>      }
>
> @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
> qid,
>
>      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
>      /* Check has QoS has been configured for the netdev */
> -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
> +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
>      dropped = total_pkts - cnt;
>
>      do {
> @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> struct dp_packet_batch *batch)  #endif
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> +    int txcnt_eth = batch->count;
>      int dropped = 0;
>      int newcnt = 0;
>      int i;
>
> +    if (dev->type != DPDK_DEV_VHOST) {
> +        /* Check if QoS has been configured for this netdev. */
> +        txcnt_eth = netdev_dpdk_qos_run(dev,
> +                                        (struct rte_mbuf **)batch-
> >packets,
> +                                        txcnt_eth, false);
So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its triggered by the following conditions

    if (OVS_UNLIKELY(!may_steal ||
                     batch->packets[0]->source != DPBUF_DPDK)

What will happen in above if the source of the packet is not DPBUF_DPDK?

For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()' is called later on.

This has specific DPDK calls that expect an mbuf source for the packet.

Previously QoS took place after the packet had been copied so this was not an issue but now we have DPDK rte functions being called on a non mbuf source packet. I'm not sure of the behavior that could occur in this case. Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt) in 'netdev_dpdk_policer_pkt_handle()'?

It could be the case then that non DPDK specific call maybe required to attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()' before calling the meter itself.

Have you tested this use case?

Ian

> +        if (txcnt_eth == 0) {
> +            dropped = batch->count;
> +            goto out;
> +        }
> +    }
> +
>      dp_packet_batch_apply_cutlen(batch);
>
>      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@
> dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> *batch)
>          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
>
>          newcnt++;
> +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
> +            dropped += batch->count - i - 1;
> +            break;
> +        }
>      }
>
>      if (dev->type == DPDK_DEV_VHOST) {
>          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) pkts,
>                                   newcnt);
>      } else {
> -        unsigned int qos_pkts = newcnt;
> -
> -        /* Check if QoS has been configured for this netdev. */
> -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
> -
> -        dropped += qos_pkts - newcnt;
>          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
>      }
>
> +out:
>      if (OVS_UNLIKELY(dropped)) {
>          rte_spinlock_lock(&dev->stats_lock);
>          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@
> netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>          dp_packet_batch_apply_cutlen(batch);
>
>          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
> -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
> +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
>          dropped = batch->count - cnt;
>
>          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -
> 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf
> *conf,  }
>
>  static int
> -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt)
> +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> pkt_cnt,
> +                   bool may_steal)
>  {
>      int cnt = 0;
>      struct egress_policer *policer =
>          CONTAINER_OF(conf, struct egress_policer, qos_conf);
>
> -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts, pkt_cnt);
> +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> +                                  pkt_cnt, may_steal);
>
>      return cnt;
>  }
> --
> 1.8.3.1



More information about the dev mailing list