[ovs-dev] [RFC PATCH v2 15/19] keepalive: Check the packet statistics as part of PMD health checks.

Bhanuprakash Bodireddy bhanuprakash.bodireddy at intel.com
Mon Jun 12 16:49:43 UTC 2017


This commit adds the support to check the packet statistics on the port
polled by PMD thread. If the packets aren't processed due to PMD thread
stall/deadlock the statistics wont update and this can be used by
monitoring framework to confirm PMD failure.

This mechanism has limitation with MQ enabled. In some cases queues of
the DPDK port can be polled by different PMD threads. Even if one PMD
thread stalls the port statistics will be incremented due to an other
queue processed by different PMD. The function can return active state
considering the packets processed in this case.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy at intel.com>
---
 lib/dpif-netdev.c |  23 +++++++++++--
 lib/keepalive.c   | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/keepalive.h   |   6 ++++
 3 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 32cdb9f..06ca7fb 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -974,8 +974,9 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 static void
 pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 {
-    int port_link_status = 0;
     struct rxq_poll *poll;
+    int port_link_status = 0;
+    int port_stats = 0;
 
     struct svec pmd_poll_list;
     svec_init(&pmd_poll_list);
@@ -998,6 +999,12 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
                 ka_shm_update_port_status(netdev_rxq_get_name(poll->rxq->rx),
                                   netdev_rxq_get_queue_id(poll->rxq->rx),
                                   link_state, pmd->core_id, i);
+
+                if (!strcmp(link_state, "up")) {
+                    ka_shm_update_port_statistics(poll->rxq->port->netdev,
+                                                    pmd->core_id, i);
+                }
+
                 break;
             }
         }
@@ -1005,12 +1012,22 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
     svec_destroy(&pmd_poll_list);
 
     port_link_status = ka_get_polled_ports_status(pmd->core_id);
+    port_stats = ka_get_polled_ports_stats(pmd->core_id);
 
     int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
-    if (PMD_HC_COMPLETE == pmd_hc_state) {
-        if (port_link_status == ACTIVE_RUN_STATE) {
+    switch (pmd_hc_state) {
+    case PMD_HC_ENABLE:
+        ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_PROGRESS);
+        break;
+    case PMD_HC_PROGRESS:
+        ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_COMPLETE);
+        break;
+    case PMD_HC_COMPLETE:
+        if (port_link_status == ACTIVE_RUN_STATE &&
+               port_stats == ACTIVE_RUN_STATE ) {
             ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
         }
+        break;
     }
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 86d39db..b702ebc 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -24,6 +24,7 @@
 #include "dpdk.h"
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "netdev-dpdk.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
@@ -389,6 +390,33 @@ enum pmdhealth_status ka_get_polled_ports_status(unsigned core_id)
     }
 }
 
+enum pmdhealth_status ka_get_polled_ports_stats(unsigned core_id)
+{
+    struct keepalive_shm *ka_shm = get_ka_shm();
+    if (!ka_shm) {
+        VLOG_ERR_RL(&rl, "KeepAlive: Invalid shared memory block.");
+        return -1;
+    }
+
+    int failed = 0;
+    int n_ports = ka_shm->ext_stats[core_id].num_poll_ports;
+    for (int i = 0; i < n_ports; i++) {
+        int state;
+        state =
+          ka_shm->ext_stats[core_id].port_stats[i].state[PORT_STATS_CHECK];
+        if (state == FAILURE_STATE) {
+            failed = 1;
+            break;
+        }
+    }
+
+    if (!failed) {
+        return ACTIVE_RUN_STATE;
+    } else {
+        return FAILURE_STATE;
+    }
+}
+
 void
 ka_shm_update_port_status(const char *port, int qid, char *link_state,
                           int core_id, int idx)
@@ -427,6 +455,78 @@ ka_shm_update_port_status(const char *port, int qid, char *link_state,
                                                                state;
 }
 
+void
+ka_shm_update_port_statistics(const struct netdev *netdev,
+                              int core_id, int idx)
+{
+    int error;
+    int state = FAILURE_STATE;
+    struct keepalive_shm *ka_shm = get_ka_shm();
+    if (!ka_shm) {
+        VLOG_ERR_RL(&rl, "KeepAlive: Invalid shared memory block.");
+        return;
+    }
+
+    ka_shm->ext_stats[core_id].num_poll_ports = idx;
+
+    int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+    if (PMD_HC_ENABLE == pmd_hc_state) {
+        struct netdev_stats *stats;
+        stats = &ka_shm->ext_stats[core_id].port_stats[idx].stats;
+        error = netdev_get_stats(netdev, stats);
+        if (error) {
+            VLOG_ERR("Couldn't retrieve stats (%s)", ovs_strerror(error));
+        }
+        state = ACTIVE_RUN_STATE;
+    }
+
+    if (PMD_HC_PROGRESS == pmd_hc_state) {
+        struct netdev_stats temp_stats;
+        VLOG_DBG_RL(&rl, "KeepAlive: HEALTH CHECKS ENABLED.");
+
+        error = netdev_get_stats(netdev, &temp_stats);
+        if (!error) {
+            uint64_t tx_pkts_cnt = 0;
+            uint64_t rx_pkts_cnt = 0;
+            int skip_tx_check = 0, skip_rx_check = 0;
+
+            struct netdev_stats *prev_stats =
+                   &ka_shm->ext_stats[core_id].port_stats[idx].stats;
+
+            if (!temp_stats.tx_packets && !prev_stats->tx_packets) {
+                VLOG_DBG_RL(&rl, "KeepAlive: No packets transmitted");
+                skip_tx_check = 1;
+            } else {
+                tx_pkts_cnt = temp_stats.tx_packets -
+                                         prev_stats->tx_packets;
+            }
+
+            if (!temp_stats.rx_packets && !prev_stats->rx_packets) {
+                VLOG_DBG_RL(&rl, "KeepAlive: No packets received");
+                skip_rx_check = 1;
+            } else {
+                rx_pkts_cnt = temp_stats.rx_packets -
+                                         prev_stats->rx_packets;
+            }
+
+            if (skip_tx_check && skip_rx_check) {
+                VLOG_DBG_RL(&rl, "KeepAlive: No active traffic");
+                state = ACTIVE_RUN_STATE;
+            } else if ((!skip_tx_check && tx_pkts_cnt) ||
+                      (!skip_rx_check && rx_pkts_cnt)) {
+                VLOG_DBG_RL(&rl, "KeepAlive: Stats updated");
+                state = ACTIVE_RUN_STATE;
+            } else {
+                VLOG_DBG("PMD failure");
+                state = FAILURE_STATE;
+            }
+        }
+    }
+
+    ka_shm->ext_stats[core_id].port_stats[idx].state[PORT_STATS_CHECK] =
+                                                        state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
                        const char *argv[] OVS_UNUSED, void *ka_shm_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index e05e36c..1f1f1c1 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -18,6 +18,7 @@
 #define KEEPALIVE_H
 
 #include <stdint.h>
+
 #ifdef DPDK_NETDEV
 #include <rte_keepalive.h>
 #define KEEPALIVE_MAXCORES RTE_KEEPALIVE_MAXCORES
@@ -25,6 +26,8 @@
 #define KEEPALIVE_MAXCORES 128
 #endif /* DPDK_NETDEV */
 
+#include "netdev.h"
+
 #define MAX_POLL_PORTS 20
 
 struct smap;
@@ -51,6 +54,7 @@ struct poll_port_stats {
     char *link_state;
     int qid;
     int state[PORT_NUM_CHECKS];
+    struct netdev_stats stats;
 };
 
 struct pmd_extended_stats {
@@ -116,5 +120,7 @@ struct smap *ka_stats_run(void);
 
 void ka_shm_update_port_status(const char *,int,char *,int,int);
 enum pmdhealth_status ka_get_polled_ports_status(unsigned);
+void ka_shm_update_port_statistics(const struct netdev *,int,int);
+enum pmdhealth_status ka_get_polled_ports_stats(unsigned);
 
 #endif /* keepalive.h */
-- 
2.4.11



More information about the dev mailing list