[ovs-dev] [PATCH v2] netdev-dpdk: Execute QoS Checking before copying to mbuf

Gao Zhenyu sysugaozhenyu at gmail.com
Thu Aug 31 01:56:21 UTC 2017


Here is the the testing I just did:

1. Add log in function netdev_dpdk_policer_pkt_handle
static inline
bool
netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
*meter,
                               struct rte_mbuf *pkt, uint64_t
time)
{
    uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
ether_hdr);
    VLOG_WARN("GGGG, payload_len:%u,
rte_pkt_len:%u",
              pkt_len, rte_pktmbuf_pkt_len(pkt));
<------------------------print out pkt_len


    return rte_meter_srtcm_color_blind_check(meter, time, pkt_len)
==

e_RTE_METER_GREEN;
}

2. rebuild a rpm, install in machine, enable DPDK.
3. Create a container on the bridge(userspace bridge). (The container use
veth device)
4. Config qos on eth dpdk eth by using : ovs-vsctl set port
dpdk-uplink-eth3 qos=@newqos -- --id=@newqos create qos
type=egress-policer other-config:cir=1250000 other-config:cbs=2048
5. execute ping on the container and monitor ovs-vswitchd.log. Then I can
see:

2017-08-31T01:39:28.036Z|00133|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
2017-08-31T01:39:29.036Z|00134|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
2017-08-31T01:39:30.036Z|00135|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
2017-08-31T01:39:31.036Z|00136|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102

So the mbuf->pkt_len has a correct length.

On the code side, we have function dp_packet_set_size(), if ovs compiled
with dpdk, this function set the mbuf.data_len and mbuf.pkt_len. Many
functions consume dp_packet_set_size() like netdev_linux_rxq_recv_sock in
Netdev-linux.c

#ifdef DPDK_NETDEV
......

static inline void
dp_packet_set_size(struct dp_packet *b, uint32_t v)
{
.....
    b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
    b->mbuf.pkt_len = v;             /* Total length of all segments linked
to
                                      * this segment. */
}

Please let me know if you have any concern on it.

Thanks
Zhenyu Gao

2017-08-30 23:18 GMT+08:00 Stokes, Ian <ian.stokes at intel.com>:

> > In dpdk_do_tx_copy function, all packets were copied to mbuf first, but
> > QoS checking may drop some of them.
> > Move the QoS checking in front of copying data to mbuf, it helps to
> reduce
> > useless copy.
>
> Hi Zhenyu, thanks for the patch, I agree with the objective of the patch
> but have some comments below I'd like to see addressed/tested before
> signing off.
>
> >
> > Signed-off-by: Zhenyu Gao <sysugaozhenyu at gmail.com>
> > ---
> >  lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++
> ++++++--------------
> > -----
> >  1 file changed, 36 insertions(+), 19 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8
> > 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
> >       * For all QoS implementations it should always be non-null.
> >       */
> >      int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
> > -                   int pkt_cnt);
> > +                   int pkt_cnt, bool may_steal);
> >  };
> >
> >  /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -
> > 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
> > *meter,
> >
> >  static int
> >  netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > -                        struct rte_mbuf **pkts, int pkt_cnt)
> > +                        struct rte_mbuf **pkts, int pkt_cnt,
> > +                        bool may_steal)
> >  {
> >      int i = 0;
> >      int cnt = 0;
> > @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> >              }
> >              cnt++;
> >          } else {
> > -            rte_pktmbuf_free(pkt);
> > +            if (may_steal) {
> > +                rte_pktmbuf_free(pkt);
> > +            }
> >          }
> >      }
> >
> > @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> >
> >  static int
> >  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> > **pkts,
> > -                    int pkt_cnt)
> > +                    int pkt_cnt, bool may_steal)
> >  {
> >      int cnt = 0;
> >
> >      rte_spinlock_lock(&policer->policer_lock);
> > -    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
> > +    cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> > +                                  pkt_cnt, may_steal);
> >      rte_spinlock_unlock(&policer->policer_lock);
> >
> >      return cnt;
> > @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> >          dropped = nb_rx;
> >          nb_rx = ingress_policer_run(policer,
> >                                      (struct rte_mbuf **) batch->packets,
> > -                                    nb_rx);
> > +                                    nb_rx, true);
> >          dropped -= nb_rx;
> >      }
> >
> > @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> > dp_packet_batch *batch)
> >          dropped = nb_rx;
> >          nb_rx = ingress_policer_run(policer,
> >                                      (struct rte_mbuf **) batch->packets,
> > -                                    nb_rx);
> > +                                    nb_rx, true);
> >          dropped -= nb_rx;
> >      }
> >
> > @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> > struct dp_packet_batch *batch)
> >
> >  static inline int
> >  netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> > -                    int cnt)
> > +                    int cnt, bool may_steal)
> >  {
> >      struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-
> > >qos_conf);
> >
> >      if (qos_conf) {
> >          rte_spinlock_lock(&qos_conf->lock);
> > -        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
> > +        cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
> >          rte_spinlock_unlock(&qos_conf->lock);
> >      }
> >
> > @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev,
> int
> > qid,
> >
> >      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> >      /* Check has QoS has been configured for the netdev */
> > -    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
> > +    cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
> >      dropped = total_pkts - cnt;
> >
> >      do {
> > @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> > struct dp_packet_batch *batch)  #endif
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >      struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> > +    int txcnt_eth = batch->count;
> >      int dropped = 0;
> >      int newcnt = 0;
> >      int i;
> >
> > +    if (dev->type != DPDK_DEV_VHOST) {
> > +        /* Check if QoS has been configured for this netdev. */
> > +        txcnt_eth = netdev_dpdk_qos_run(dev,
> > +                                        (struct rte_mbuf **)batch-
> > >packets,
> > +                                        txcnt_eth, false);
>
> So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its
> triggered by the following conditions
>
>     if (OVS_UNLIKELY(!may_steal ||
>                      batch->packets[0]->source != DPBUF_DPDK)
>
> What will happen in above if the source of the packet is not DPBUF_DPDK?
>
> For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()'
> is called later on.
>
> This has specific DPDK calls that expect an mbuf source for the packet.
>
> Previously QoS took place after the packet had been copied so this was not
> an issue but now we have DPDK rte functions being called on a non mbuf
> source packet. I'm not sure of the behavior that could occur in this case.
> Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt)
> in 'netdev_dpdk_policer_pkt_handle()'?
>
> It could be the case then that non DPDK specific call maybe required to
> attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()'
> before calling the meter itself.
>
> Have you tested this use case?
>
> Ian
>
> > +        if (txcnt_eth == 0) {
> > +            dropped = batch->count;
> > +            goto out;
> > +        }
> > +    }
> > +
> >      dp_packet_batch_apply_cutlen(batch);
> >
> >      for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@
> > dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> > *batch)
> >          rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
> >
> >          newcnt++;
> > +        if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
> > +            dropped += batch->count - i - 1;
> > +            break;
> > +        }
> >      }
> >
> >      if (dev->type == DPDK_DEV_VHOST) {
> >          __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> pkts,
> >                                   newcnt);
> >      } else {
> > -        unsigned int qos_pkts = newcnt;
> > -
> > -        /* Check if QoS has been configured for this netdev. */
> > -        newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
> > -
> > -        dropped += qos_pkts - newcnt;
> >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
> >      }
> >
> > +out:
> >      if (OVS_UNLIKELY(dropped)) {
> >          rte_spinlock_lock(&dev->stats_lock);
> >          dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@
> > netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> >          dp_packet_batch_apply_cutlen(batch);
> >
> >          cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
> > -        cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
> > +        cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
> >          dropped = batch->count - cnt;
> >
> >          dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -
> > 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf
> > *conf,  }
> >
> >  static int
> > -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt)
> > +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt,
> > +                   bool may_steal)
> >  {
> >      int cnt = 0;
> >      struct egress_policer *policer =
> >          CONTAINER_OF(conf, struct egress_policer, qos_conf);
> >
> > -    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> pkt_cnt);
> > +    cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> > +                                  pkt_cnt, may_steal);
> >
> >      return cnt;
> >  }
> > --
> > 1.8.3.1
>
>


More information about the dev mailing list