[ovs-dev] [PATCH v9 2/2] netdev-dpdk:Detailed packet drop statistics

Sriram Vatala sriram.v at altencalsoftlabs.com
Wed Oct 9 06:23:25 UTC 2019


Hi All,
Please consider this as a gentle remainder.

Thanks & Regards,
Sriram.

-----Original Message-----
From: Sriram Vatala <sriram.v at altencalsoftlabs.com> 
Sent: 04 October 2019 13:37
To: 'ovs-dev at openvswitch.org' <ovs-dev at openvswitch.org>
Cc: 'ktraynor at redhat.com' <ktraynor at redhat.com>; 'Ilya Maximets'
<i.maximets at samsung.com>
Subject: RE: [PATCH v9 2/2] netdev-dpdk:Detailed packet drop statistics

Hi,
Please consider this as a gentle remainder.

Thanks & Regards,
Sriram.

-----Original Message-----
From: Sriram Vatala <sriram.v at altencalsoftlabs.com> 
Sent: 25 September 2019 09:58
To: 'ovs-dev at openvswitch.org' <ovs-dev at openvswitch.org>
Cc: 'ktraynor at redhat.com' <ktraynor at redhat.com>; 'Ilya Maximets'
<i.maximets at samsung.com>
Subject: RE: [PATCH v9 2/2] netdev-dpdk:Detailed packet drop statistics

Hi,
Addressed comments given in patch v8. Kindly review patch v9.
 
Changes since patch v8 :
Rebased my changes on top of Ilya's patch " Reuse vhost function for dpdk
ETH custom stats". Patch set 1/2.
Addressed comments given in patch v8.

Thanks & Regards,
Sriram.

-----Original Message-----
From: Sriram Vatala <sriram.v at altencalsoftlabs.com> 
Sent: 21 September 2019 08:10
To: ovs-dev at openvswitch.org; i.maximets at samsung.com
Cc: ktraynor at redhat.com; Sriram Vatala <sriram.v at altencalsoftlabs.com>
Subject: [PATCH v9 2/2] netdev-dpdk:Detailed packet drop statistics

OVS may be unable to transmit packets for multiple reasons and today there
is a single counter to track packets dropped due to any of those reasons.
The most common reason is that a VM is unable to read packets fast enough
causing the vhostuser port transmit queue on the OVS side to become full.
This manifests as a problem with VNFs not receiving all packets. Having a
separate drop counter to track packets dropped because the transmit queue is
full will clearly indicate that the problem is on the VM side and not in
OVS. Similarly maintaining separate counters for all possible drops helps in
indicating sensible cause for packet drops.

This patch adds custom software stats counters to track packets dropped at
port level and these counters are displayed along with other stats in
"ovs-vsctl get interface <iface> statistics"
command. The detailed stats will be available for both dpdk and vhostuser
ports.

--
Changes since v8:
Addressed comments given by Ilya.

Signed-off-by: Sriram Vatala <sriram.v at altencalsoftlabs.com>
---
 Documentation/topics/dpdk/vhost-user.rst      | 13 ++-
 lib/netdev-dpdk.c                             | 81 +++++++++++++++----
 utilities/bugtool/automake.mk                 |  3 +-
 utilities/bugtool/ovs-bugtool-get-port-stats  | 25 ++++++
 .../plugins/network-status/openvswitch.xml    |  1 +
 5 files changed, 105 insertions(+), 18 deletions(-)  create mode 100755
utilities/bugtool/ovs-bugtool-get-port-stats

diff --git a/Documentation/topics/dpdk/vhost-user.rst
b/Documentation/topics/dpdk/vhost-user.rst
index 724aa62f6..89388a2bf 100644
--- a/Documentation/topics/dpdk/vhost-user.rst
+++ b/Documentation/topics/dpdk/vhost-user.rst
@@ -551,7 +551,18 @@ processing packets at the required rate.
 The amount of Tx retries on a vhost-user or vhost-user-client interface can
be  shown with::
 
-  $ ovs-vsctl get Interface dpdkvhostclient0 statistics:tx_retries
+  $ ovs-vsctl get Interface dpdkvhostclient0 
+ statistics:netdev_dpdk_tx_retries
+
+When the guest is not able to consume the packets fast enough, the 
+transmit queue of the port gets filled up i.e queue runs out of free
descriptors.
+This is the most likely reason why dpdk transmit API will fail to send 
+packets besides other reasons.
+
+The amount of tx failure drops on a dpdk vhost/physical interface can 
+be shown with::
+
+  $ ovs-vsctl get Interface dpdkvhostclient0 \
+                            statistics:netdev_dpdk_tx_failure_drops
 
 vhost-user Dequeue Zero Copy (experimental)
 -------------------------------------------
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
652b57e3b..fd8f9102e 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -174,6 +174,20 @@ static const struct vhost_device_ops
virtio_net_device_ops =
     .destroy_connection = destroy_connection,  };
 
+/* Custom software stats for dpdk ports */ struct netdev_dpdk_sw_stats 
+{
+    /* No. of retries when unable to transmit. */
+    uint64_t tx_retries;
+    /* Packet drops when unable to transmit; Probably Tx queue is full. */
+    uint64_t tx_failure_drops;
+    /* Packet length greater than device MTU. */
+    uint64_t tx_mtu_exceeded_drops;
+    /* Packet drops in egress policer processing. */
+    uint64_t tx_qos_drops;
+    /* Packet drops in ingress policer processing. */
+    uint64_t rx_qos_drops;
+};
+
 enum { DPDK_RING_SIZE = 256 };
 BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
 enum { DRAIN_TSC = 200000ULL };
@@ -413,11 +427,10 @@ struct netdev_dpdk {
 
     PADDED_MEMBERS(CACHE_LINE_SIZE,
         struct netdev_stats stats;
-        /* Custom stat for retries when unable to transmit. */
-        uint64_t tx_retries;
+        struct netdev_dpdk_sw_stats *sw_stats;
         /* Protects stats */
         rte_spinlock_t stats_lock;
-        /* 4 pad bytes here. */
+        /* 36 pad bytes here. */
     );
 
     PADDED_MEMBERS(CACHE_LINE_SIZE,
@@ -1173,7 +1186,9 @@ common_construct(struct netdev *netdev, dpdk_port_t
port_no,
     dev->rte_xstats_ids = NULL;
     dev->rte_xstats_ids_size = 0;
 
-    dev->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX;
+    dev->sw_stats = (struct netdev_dpdk_sw_stats *)
+                        xzalloc(sizeof *dev->sw_stats);
+    dev->sw_stats->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : 
+ UINT64_MAX;
 
     return 0;
 }
@@ -1359,6 +1374,7 @@ common_destruct(struct netdev_dpdk *dev)
     ovs_list_remove(&dev->list_node);
     free(ovsrcu_get_protected(struct ingress_policer *,
                               &dev->ingress_policer));
+    free(dev->sw_stats);
     ovs_mutex_destroy(&dev->mutex);
 }
 
@@ -2209,6 +2225,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
     rte_spinlock_lock(&dev->stats_lock);
     netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch->packets,
                                          nb_rx, dropped);
+    dev->sw_stats->rx_qos_drops += dropped;
     rte_spinlock_unlock(&dev->stats_lock);
 
     batch->count = nb_rx;
@@ -2258,6 +2275,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct
dp_packet_batch *batch,
     if (OVS_UNLIKELY(dropped)) {
         rte_spinlock_lock(&dev->stats_lock);
         dev->stats.rx_dropped += dropped;
+        dev->sw_stats->rx_qos_drops += dropped;
         rte_spinlock_unlock(&dev->stats_lock);
     }
 
@@ -2341,6 +2359,10 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int
qid,
     struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
     unsigned int total_pkts = cnt;
     unsigned int dropped = 0;
+    unsigned int tx_failure;
+    unsigned int mtu_drops;
+    unsigned int qos_drops;
+    struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
     int i, retries = 0;
     int max_retries = VHOST_ENQ_RETRY_MIN;
     int vid = netdev_dpdk_get_vid(dev); @@ -2358,9 +2380,12 @@
__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
     rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
 
     cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
+    mtu_drops = total_pkts - cnt;
+    qos_drops = cnt;
     /* Check has QoS has been configured for the netdev */
     cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true);
-    dropped = total_pkts - cnt;
+    qos_drops -= cnt;
+    dropped = qos_drops + mtu_drops;
 
     do {
         int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ; @@ -2385,12
+2410,16 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
         }
     } while (cnt && (retries++ < max_retries));
 
+    tx_failure = cnt;
     rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
 
     rte_spinlock_lock(&dev->stats_lock);
     netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts, total_pkts,
                                          cnt + dropped);
-    dev->tx_retries += MIN(retries, max_retries);
+    sw_stats->tx_retries += MIN(retries, max_retries);
+    sw_stats->tx_failure_drops += tx_failure;
+    sw_stats->tx_mtu_exceeded_drops += mtu_drops;
+    sw_stats->tx_qos_drops += qos_drops;
     rte_spinlock_unlock(&dev->stats_lock);
 
 out:
@@ -2415,12 +2444,16 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
struct dp_packet_batch *batch)
     struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
     uint32_t cnt = batch_cnt;
     uint32_t dropped = 0;
+    uint32_t tx_failure = 0;
+    uint32_t mtu_drops = 0;
+    uint32_t qos_drops = 0;
+    struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
 
     if (dev->type != DPDK_DEV_VHOST) {
         /* Check if QoS has been configured for this netdev. */
         cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
                                   batch_cnt, false);
-        dropped += batch_cnt - cnt;
+        qos_drops = batch_cnt - cnt;
     }
 
     uint32_t txcnt = 0;
@@ -2433,7 +2466,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct
dp_packet_batch *batch)
             VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
                          size, dev->max_packet_len);
 
-            dropped++;
+            mtu_drops++;
             continue;
         }
 
@@ -2456,13 +2489,17 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
struct dp_packet_batch *batch)
             __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
pkts,
                                      txcnt);
         } else {
-            dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, txcnt);
+            tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, 
+ txcnt);
         }
     }
 
+    dropped += qos_drops + mtu_drops + tx_failure;
     if (OVS_UNLIKELY(dropped)) {
         rte_spinlock_lock(&dev->stats_lock);
         dev->stats.tx_dropped += dropped;
+        sw_stats->tx_failure_drops += tx_failure;
+        sw_stats->tx_mtu_exceeded_drops += mtu_drops;
+        sw_stats->tx_qos_drops += qos_drops;
         rte_spinlock_unlock(&dev->stats_lock);
     }
 }
@@ -2503,19 +2540,27 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
         dpdk_do_tx_copy(netdev, qid, batch);
         dp_packet_delete_batch(batch, true);
     } else {
+        struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats;
         int tx_cnt, dropped;
+        int tx_failure, mtu_drops, qos_drops;
         int batch_cnt = dp_packet_batch_size(batch);
         struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
 
         tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
+        mtu_drops = batch_cnt - tx_cnt;
+        qos_drops = tx_cnt;
         tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
-        dropped = batch_cnt - tx_cnt;
+        qos_drops -= tx_cnt;
 
-        dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
+        tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
 
+        dropped = tx_failure + mtu_drops + qos_drops;
         if (OVS_UNLIKELY(dropped)) {
             rte_spinlock_lock(&dev->stats_lock);
             dev->stats.tx_dropped += dropped;
+            sw_stats->tx_failure_drops += tx_failure;
+            sw_stats->tx_mtu_exceeded_drops += mtu_drops;
+            sw_stats->tx_qos_drops += qos_drops;
             rte_spinlock_unlock(&dev->stats_lock);
         }
     }
@@ -2826,8 +2871,12 @@ netdev_dpdk_get_sw_custom_stats(const struct netdev
*netdev,
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     int i, n;
 
-#define SW_CSTATS \
-    SW_CSTAT(tx_retries)
+#define SW_CSTATS                    \
+    SW_CSTAT(tx_retries)             \
+    SW_CSTAT(tx_failure_drops)       \
+    SW_CSTAT(tx_mtu_exceeded_drops)  \
+    SW_CSTAT(tx_qos_drops)           \
+    SW_CSTAT(rx_qos_drops)
 
 #define SW_CSTAT(NAME) + 1
     custom_stats->size = SW_CSTATS;
@@ -2840,7 +2889,7 @@ netdev_dpdk_get_sw_custom_stats(const struct netdev
*netdev,
     rte_spinlock_lock(&dev->stats_lock);
     i = 0;
 #define SW_CSTAT(NAME) \
-    custom_stats->counters[i++].value = dev->NAME;
+    custom_stats->counters[i++].value = dev->sw_stats->NAME;
     SW_CSTATS;
 #undef SW_CSTAT
     rte_spinlock_unlock(&dev->stats_lock);
@@ -2851,8 +2900,8 @@ netdev_dpdk_get_sw_custom_stats(const struct netdev
*netdev,
     n = 0;
 #define SW_CSTAT(NAME) \
     if (custom_stats->counters[i].value != UINT64_MAX) {
\
-        ovs_strlcpy(custom_stats->counters[n].name, #NAME,
\
-                    NETDEV_CUSTOM_STATS_NAME_SIZE);
\
+        ovs_strlcpy(custom_stats->counters[n].name,
\
+                   "netdev_dpdk_"#NAME, NETDEV_CUSTOM_STATS_NAME_SIZE); 
+ \
         custom_stats->counters[n].value = custom_stats->counters[i].value;
\
         n++;
\
     }
\
diff --git a/utilities/bugtool/automake.mk b/utilities/bugtool/automake.mk
index 40980b367..dda58e0a1 100644
--- a/utilities/bugtool/automake.mk
+++ b/utilities/bugtool/automake.mk
@@ -22,7 +22,8 @@ bugtool_scripts = \
 	utilities/bugtool/ovs-bugtool-ovs-bridge-datapath-type \
 	utilities/bugtool/ovs-bugtool-ovs-vswitchd-threads-affinity \
 	utilities/bugtool/ovs-bugtool-qos-configs \
-	utilities/bugtool/ovs-bugtool-get-dpdk-nic-numa
+	utilities/bugtool/ovs-bugtool-get-dpdk-nic-numa \
+	utilities/bugtool/ovs-bugtool-get-port-stats
 
 scripts_SCRIPTS += $(bugtool_scripts)
 
diff --git a/utilities/bugtool/ovs-bugtool-get-port-stats
b/utilities/bugtool/ovs-bugtool-get-port-stats
new file mode 100755
index 000000000..0fe175e6b
--- /dev/null
+++ b/utilities/bugtool/ovs-bugtool-get-port-stats
@@ -0,0 +1,25 @@
+#! /bin/bash
+
+# This library is free software; you can redistribute it and/or # 
+modify it under the terms of version 2.1 of the GNU Lesser General # 
+Public License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful, # but 
+WITHOUT ANY WARRANTY; without even the implied warranty of # 
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU # 
+Lesser General Public License for more details.
+#
+# Copyright (C) 2019 Ericsson AB
+
+for bridge in `ovs-vsctl -- --real list-br` do
+    echo -e "\nBridge : ${bridge}\n"
+    for iface in `ovs-vsctl list-ifaces ${bridge}`
+    do
+        echo -e "iface : ${iface}"
+        ovs-vsctl get interface ${iface} statistics
+        echo -e "\n"
+    done
+    echo -e "iface : ${bridge}"
+    ovs-vsctl get interface ${bridge} statistics done
diff --git a/utilities/bugtool/plugins/network-status/openvswitch.xml
b/utilities/bugtool/plugins/network-status/openvswitch.xml
index d39867c6e..8c1ec643e 100644
--- a/utilities/bugtool/plugins/network-status/openvswitch.xml
+++ b/utilities/bugtool/plugins/network-status/openvswitch.xml
@@ -40,4 +40,5 @@
     <command label="ovs-ofctl-dump-groups"
filters="ovs">/usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over
-bridges "dump-groups"</command>
     <command label="ovs-ofctl-dump-group-stats" filters="ovs"
repeat="2">/usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-br
idges "dump-group-stats"</command>
     <command label="get_dpdk_nic_numa"
filters="ovs">/usr/share/openvswitch/scripts/ovs-bugtool-get-dpdk-nic-numa</
command>
+    <command label="get_port_statistics" 
+ filters="ovs">/usr/share/openvswitch/scripts/ovs-bugtool-get-port-stat
+ s</command>
 </collect>
--
2.20.1


More information about the dev mailing list