[ovs-dev] [RFC PATCH v3 15/18] keepalive: Check the PMD cycle stats as part of PMD health checks.

Bhanuprakash Bodireddy bhanuprakash.bodireddy at intel.com
Sun Jun 18 19:24:28 UTC 2017


This commit adds the support to check the PMD cycle stats. If the cycles
aren't changing for a duration of time this can be flagged as possible
PMD stall.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy at intel.com>
---
 lib/dpif-netdev.c | 16 +++++++++-------
 lib/dpif-netdev.h |  6 ++++++
 lib/keepalive.c   | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/keepalive.h   |  3 +++
 4 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ad48ee5..b1a9fc4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -328,12 +328,6 @@ enum dp_stat_type {
     DP_N_STATS
 };
 
-enum pmd_cycles_counter_type {
-    PMD_CYCLES_POLLING,         /* Cycles spent polling NICs. */
-    PMD_CYCLES_PROCESSING,      /* Cycles spent processing packets */
-    PMD_N_CYCLES
-};
-
 #define XPS_TIMEOUT_MS 500LL
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
@@ -978,6 +972,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
     struct rxq_poll *poll;
     int port_link_status = 0;
     int port_stats = 0;
+    int pmd_polling = 0;
+    uint64_t cycles[PMD_N_CYCLES];
 
     struct svec pmd_poll_list;
     svec_init(&pmd_poll_list);
@@ -1005,6 +1001,11 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
     }
     svec_destroy(&pmd_poll_list);
 
+    for (int idx = 0; idx < ARRAY_SIZE(cycles); idx++) {
+        atomic_read_relaxed(&pmd->cycles.n[idx], &cycles[idx]);
+    }
+    pmd_polling = ka_info_update_pmd_cycles(pmd->core_id, cycles);
+
     int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
     switch (pmd_hc_state) {
     case PMD_HC_ENABLE:
@@ -1018,7 +1019,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
         port_stats = ka_get_polled_ports_stats(pmd->core_id);
 
         if (port_link_status == ACTIVE_RUN_STATE &&
-               port_stats == ACTIVE_RUN_STATE ) {
+              port_stats == ACTIVE_RUN_STATE &&
+                pmd_polling == ACTIVE_RUN_STATE) {
             ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
         }
         break;
diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
index 6db6ed2..e7c2400 100644
--- a/lib/dpif-netdev.h
+++ b/lib/dpif-netdev.h
@@ -33,6 +33,12 @@ extern "C" {
  * headers to be aligned on a 4-byte boundary.  */
 enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
 
+enum pmd_cycles_counter_type {
+    PMD_CYCLES_POLLING,         /* Cycles spent polling NICs. */
+    PMD_CYCLES_PROCESSING,      /* Cycles spent processing packets */
+    PMD_N_CYCLES
+};
+
 bool dpif_is_netdev(const struct dpif *);
 
 #define NR_QUEUE   1
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 4234912..3690b70 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -577,6 +577,57 @@ ka_info_update_port_statistics(const struct netdev *netdev,
                                                         state;
 }
 
+int
+ka_info_update_pmd_cycles(int core_id, uint64_t cycles[PMD_N_CYCLES])
+{
+    int pmd_state = ACTIVE_RUN_STATE;
+    if (!ka_info) {
+        return FAILURE_STATE;
+    }
+
+    uint64_t total_cycles = 0;
+    for (int i = 0; i < PMD_N_CYCLES; i++) {
+        if (cycles[i] > 0) {
+            total_cycles += cycles[i];
+        }
+    }
+
+    if (!total_cycles) {
+        return -1;
+    }
+
+    int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+    if (PMD_HC_ENABLE == pmd_hc_state) {
+        ka_info->ext_stats[core_id].cycles[PMD_CYCLES_POLLING] =
+                   cycles[PMD_CYCLES_POLLING];
+
+        ka_info->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING] =
+                   cycles[PMD_CYCLES_PROCESSING];
+    }
+
+    if (PMD_HC_PROGRESS == pmd_hc_state) {
+        uint64_t polling_cycles_cnt = 0, proc_cycles_cnt = 0;
+        uint64_t prev_poll_cycles =
+            ka_info->ext_stats[core_id].cycles[PMD_CYCLES_POLLING];
+        uint64_t prev_proc_cycles =
+            ka_info->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING];
+
+        VLOG_DBG_RL(&rl, "Keepalive: Going to check the PMD thresholds now.");
+
+        polling_cycles_cnt = cycles[PMD_CYCLES_POLLING] - prev_poll_cycles;
+
+        proc_cycles_cnt = cycles[PMD_CYCLES_PROCESSING]
+                               - prev_proc_cycles;
+
+        if (!polling_cycles_cnt && !proc_cycles_cnt) {
+            VLOG_DBG("PMD FAILURE!");
+            pmd_state = FAILURE_STATE;
+        }
+    }
+
+    return pmd_state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
                            const char *argv[] OVS_UNUSED, void *ka_info_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index a132d74..1bd639b 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -27,6 +27,7 @@
 #define KA_DP_MAXCORES 128
 #endif /* DPDK_NETDEV */
 
+#include "dpif-netdev.h"
 #include "netdev.h"
 
 struct smap;
@@ -76,6 +77,7 @@ struct poll_port_stats {
 struct pmd_extended_stats {
     char *health_status;
     struct poll_port_stats *port_stats;
+    uint64_t cycles[PMD_N_CYCLES];
     int num_poll_ports;
 };
 
@@ -141,5 +143,6 @@ void ka_info_update_port_status(const char *,int,char *,int,int);
 enum pmdhealth_status ka_get_polled_ports_status(unsigned);
 void ka_info_update_port_statistics(const struct netdev *,int,int);
 enum pmdhealth_status ka_get_polled_ports_stats(unsigned);
+int ka_info_update_pmd_cycles(int, uint64_t cycles[PMD_N_CYCLES]);
 
 #endif /* keepalive.h */
-- 
2.4.11



More information about the dev mailing list