[ovs-dev] [PATCH v2] netdev-dpdk: Execute QoS Checking before copying to mbuf
Gao Zhenyu
sysugaozhenyu at gmail.com
Thu Aug 31 01:56:21 UTC 2017
Here is the the testing I just did:
1. Add log in function netdev_dpdk_policer_pkt_handle
static inline
bool
netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
*meter,
struct rte_mbuf *pkt, uint64_t
time)
{
uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct
ether_hdr);
VLOG_WARN("GGGG, payload_len:%u,
rte_pkt_len:%u",
pkt_len, rte_pktmbuf_pkt_len(pkt));
<------------------------print out pkt_len
return rte_meter_srtcm_color_blind_check(meter, time, pkt_len)
==
e_RTE_METER_GREEN;
}
2. rebuild a rpm, install in machine, enable DPDK.
3. Create a container on the bridge(userspace bridge). (The container use
veth device)
4. Config qos on eth dpdk eth by using : ovs-vsctl set port
dpdk-uplink-eth3 qos=@newqos -- --id=@newqos create qos
type=egress-policer other-config:cir=1250000 other-config:cbs=2048
5. execute ping on the container and monitor ovs-vswitchd.log. Then I can
see:
2017-08-31T01:39:28.036Z|00133|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
2017-08-31T01:39:29.036Z|00134|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
2017-08-31T01:39:30.036Z|00135|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
2017-08-31T01:39:31.036Z|00136|netdev_dpdk|WARN|GGGG, payload_len:88,
rte_pkt_len:102
So the mbuf->pkt_len has a correct length.
On the code side, we have function dp_packet_set_size(), if ovs compiled
with dpdk, this function set the mbuf.data_len and mbuf.pkt_len. Many
functions consume dp_packet_set_size() like netdev_linux_rxq_recv_sock in
Netdev-linux.c
#ifdef DPDK_NETDEV
......
static inline void
dp_packet_set_size(struct dp_packet *b, uint32_t v)
{
.....
b->mbuf.data_len = (uint16_t)v; /* Current seg length. */
b->mbuf.pkt_len = v; /* Total length of all segments linked
to
* this segment. */
}
Please let me know if you have any concern on it.
Thanks
Zhenyu Gao
2017-08-30 23:18 GMT+08:00 Stokes, Ian <ian.stokes at intel.com>:
> > In dpdk_do_tx_copy function, all packets were copied to mbuf first, but
> > QoS checking may drop some of them.
> > Move the QoS checking in front of copying data to mbuf, it helps to
> reduce
> > useless copy.
>
> Hi Zhenyu, thanks for the patch, I agree with the objective of the patch
> but have some comments below I'd like to see addressed/tested before
> signing off.
>
> >
> > Signed-off-by: Zhenyu Gao <sysugaozhenyu at gmail.com>
> > ---
> > lib/netdev-dpdk.c | 55 ++++++++++++++++++++++++++++++
> ++++++--------------
> > -----
> > 1 file changed, 36 insertions(+), 19 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1aaf6f7..50874b8
> > 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -279,7 +279,7 @@ struct dpdk_qos_ops {
> > * For all QoS implementations it should always be non-null.
> > */
> > int (*qos_run)(struct qos_conf *qos_conf, struct rte_mbuf **pkts,
> > - int pkt_cnt);
> > + int pkt_cnt, bool may_steal);
> > };
> >
> > /* dpdk_qos_ops for each type of user space QoS implementation */ @@ -
> > 1501,7 +1501,8 @@ netdev_dpdk_policer_pkt_handle(struct rte_meter_srtcm
> > *meter,
> >
> > static int
> > netdev_dpdk_policer_run(struct rte_meter_srtcm *meter,
> > - struct rte_mbuf **pkts, int pkt_cnt)
> > + struct rte_mbuf **pkts, int pkt_cnt,
> > + bool may_steal)
> > {
> > int i = 0;
> > int cnt = 0;
> > @@ -1517,7 +1518,9 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> > }
> > cnt++;
> > } else {
> > - rte_pktmbuf_free(pkt);
> > + if (may_steal) {
> > + rte_pktmbuf_free(pkt);
> > + }
> > }
> > }
> >
> > @@ -1526,12 +1529,13 @@ netdev_dpdk_policer_run(struct rte_meter_srtcm
> > *meter,
> >
> > static int
> > ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf
> > **pkts,
> > - int pkt_cnt)
> > + int pkt_cnt, bool may_steal)
> > {
> > int cnt = 0;
> >
> > rte_spinlock_lock(&policer->policer_lock);
> > - cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts, pkt_cnt);
> > + cnt = netdev_dpdk_policer_run(&policer->in_policer, pkts,
> > + pkt_cnt, may_steal);
> > rte_spinlock_unlock(&policer->policer_lock);
> >
> > return cnt;
> > @@ -1635,7 +1639,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> > dropped = nb_rx;
> > nb_rx = ingress_policer_run(policer,
> > (struct rte_mbuf **) batch->packets,
> > - nb_rx);
> > + nb_rx, true);
> > dropped -= nb_rx;
> > }
> >
> > @@ -1672,7 +1676,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
> > dp_packet_batch *batch)
> > dropped = nb_rx;
> > nb_rx = ingress_policer_run(policer,
> > (struct rte_mbuf **) batch->packets,
> > - nb_rx);
> > + nb_rx, true);
> > dropped -= nb_rx;
> > }
> >
> > @@ -1690,13 +1694,13 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> > struct dp_packet_batch *batch)
> >
> > static inline int
> > netdev_dpdk_qos_run(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
> > - int cnt)
> > + int cnt, bool may_steal)
> > {
> > struct qos_conf *qos_conf = ovsrcu_get(struct qos_conf *, &dev-
> > >qos_conf);
> >
> > if (qos_conf) {
> > rte_spinlock_lock(&qos_conf->lock);
> > - cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt);
> > + cnt = qos_conf->ops->qos_run(qos_conf, pkts, cnt, may_steal);
> > rte_spinlock_unlock(&qos_conf->lock);
> > }
> >
> > @@ -1770,7 +1774,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev,
> int
> > qid,
> >
> > cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> > /* Check has QoS has been configured for the netdev */
> > - cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
> > + cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
> > dropped = total_pkts - cnt;
> >
> > do {
> > @@ -1816,10 +1820,22 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
> > struct dp_packet_batch *batch) #endif
> > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
> > + int txcnt_eth = batch->count;
> > int dropped = 0;
> > int newcnt = 0;
> > int i;
> >
> > + if (dev->type != DPDK_DEV_VHOST) {
> > + /* Check if QoS has been configured for this netdev. */
> > + txcnt_eth = netdev_dpdk_qos_run(dev,
> > + (struct rte_mbuf **)batch-
> > >packets,
> > + txcnt_eth, false);
>
> So dpdk_do_tx_copy will be called as part of netdev_dpdk_eth_send__ if its
> triggered by the following conditions
>
> if (OVS_UNLIKELY(!may_steal ||
> batch->packets[0]->source != DPBUF_DPDK)
>
> What will happen in above if the source of the packet is not DPBUF_DPDK?
>
> For the most part it will be ok until 'netdev_dpdk_policer_pkt_handle()'
> is called later on.
>
> This has specific DPDK calls that expect an mbuf source for the packet.
>
> Previously QoS took place after the packet had been copied so this was not
> an issue but now we have DPDK rte functions being called on a non mbuf
> source packet. I'm not sure of the behavior that could occur in this case.
> Could an incorrect length be returned for the call rte_pktmbuf_pkt_len(pkt)
> in 'netdev_dpdk_policer_pkt_handle()'?
>
> It could be the case then that non DPDK specific call maybe required to
> attain the length of the packet in 'netdev_dpdk_policer_pkt_handle()'
> before calling the meter itself.
>
> Have you tested this use case?
>
> Ian
>
> > + if (txcnt_eth == 0) {
> > + dropped = batch->count;
> > + goto out;
> > + }
> > + }
> > +
> > dp_packet_batch_apply_cutlen(batch);
> >
> > for (i = 0; i < batch->count; i++) { @@ -1848,21 +1864,20 @@
> > dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
> > *batch)
> > rte_pktmbuf_pkt_len(pkts[newcnt]) = size;
> >
> > newcnt++;
> > + if (dev->type != DPDK_DEV_VHOST && newcnt >= txcnt_eth) {
> > + dropped += batch->count - i - 1;
> > + break;
> > + }
> > }
> >
> > if (dev->type == DPDK_DEV_VHOST) {
> > __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
> pkts,
> > newcnt);
> > } else {
> > - unsigned int qos_pkts = newcnt;
> > -
> > - /* Check if QoS has been configured for this netdev. */
> > - newcnt = netdev_dpdk_qos_run(dev, pkts, newcnt);
> > -
> > - dropped += qos_pkts - newcnt;
> > dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, newcnt);
> > }
> >
> > +out:
> > if (OVS_UNLIKELY(dropped)) {
> > rte_spinlock_lock(&dev->stats_lock);
> > dev->stats.tx_dropped += dropped; @@ -1915,7 +1930,7 @@
> > netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> > dp_packet_batch_apply_cutlen(batch);
> >
> > cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
> > - cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
> > + cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
> > dropped = batch->count - cnt;
> >
> > dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); @@ -
> > 3132,13 +3147,15 @@ egress_policer_qos_is_equal(const struct qos_conf
> > *conf, }
> >
> > static int
> > -egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt)
> > +egress_policer_run(struct qos_conf *conf, struct rte_mbuf **pkts, int
> > pkt_cnt,
> > + bool may_steal)
> > {
> > int cnt = 0;
> > struct egress_policer *policer =
> > CONTAINER_OF(conf, struct egress_policer, qos_conf);
> >
> > - cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> pkt_cnt);
> > + cnt = netdev_dpdk_policer_run(&policer->egress_meter, pkts,
> > + pkt_cnt, may_steal);
> >
> > return cnt;
> > }
> > --
> > 1.8.3.1
>
>
More information about the dev
mailing list