[ovs-dev] [PATCH v3 17/19] netdev-dpdk: Enable PMD health checks on heartbeat failure.

Fri Aug 4 08:08:04 UTC 2017

The keepalive thread sends heartbeats to PMD thread and when PMD fails to
respond to successive heartbeats the PMD is potentially stalled. The PMD
state transition is as below:

ALIVE -> MISSING -> DEAD -> GONE

This commit enables PMD healthchecks when PMD doesn't respond to
heartbeats. This is needed to handle false negatives. With this commit
the new state transition is as below:

ALIVE -> MISSING -> DEAD -> CHECK -> GONE

PMD Health checking state is introduced and will immediately kickin when
the PMD gets in to DEAD state. As part of this below are considered.

  - Link status of the ports polled by PMD thread.
  - Statistics of the ports polled by PMD thread.
  - PMD polling and processing cycles.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy at intel.com>
---
 lib/keepalive.c   | 16 ++++++++++++++
 lib/keepalive.h   |  2 ++
 lib/netdev-dpdk.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index a6120e7..2176ada 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -117,6 +117,7 @@ ka_register_thread(int tid, bool thread_is_pmd)
         ka_pinfo->core_id = core_num;
         ovs_strlcpy(ka_pinfo->name, proc_name, sizeof ka_pinfo->name);
         ka_pinfo->healthcheck = PMD_HC_DISABLE;
+        ka_pinfo->failures = 0;
 
         hmap_insert(&ka_info->process_list, &ka_pinfo->node, hash);
 
@@ -278,6 +279,21 @@ ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
 }
 
 void
+ka_inc_pmd_failures(unsigned core_id)
+{
+    struct ka_process_info *pinfo;
+    int tid = ka_get_pmd_tid(core_id);
+    ovs_mutex_lock(&ka_info->proclist_mutex);
+    HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+                             &ka_info->process_list) {
+        if (pinfo->core_id == core_id) {
+            pinfo->failures++;
+        }
+    }
+    ovs_mutex_unlock(&ka_info->proclist_mutex);
+}
+
+void
 ka_load_process_list(struct hmap **process_list)
 {
     if (ka_is_enabled()) {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index df8768c..fb45e02 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -64,6 +64,7 @@ struct ka_process_info {
     enum pmdhealth_check healthcheck;
     enum keepalive_state core_state;
     uint64_t core_last_seen_times;
+    int failures;
     struct hmap_node node;
 };
 
@@ -127,6 +128,7 @@ void ka_disable_pmd_health_check(unsigned);
 bool ka_is_pmdhealth_check_enabled(unsigned);
 enum pmdhealth_check ka_get_pmd_health_check_state(unsigned);
 void ka_set_pmd_health_check_state(unsigned, enum pmdhealth_check);
+void ka_inc_pmd_failures(unsigned);
 
 void ka_store_pmd_id(unsigned core);
 uint32_t get_ka_interval(void);
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 5415544..c44999c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -622,6 +622,52 @@ dpdk_failcore_cb(void *ptr_data OVS_UNUSED, const int core_id)
     }
 }
 
+static void
+dpdk_ka_handle_failure(enum keepalive_state fail_state, const int core_id,
+               const enum rte_keepalive_state core_state,
+               uint64_t last_alive)
+{
+    if (fail_state == KA_STATE_DEAD) {
+        /* If process is in DEFUNC/UNINTERRUPTIBLE/TRACED state it is inactive
+         * and no additional health checks are needed. */
+        uint32_t tid = ka_get_pmd_tid(core_id);
+        if (process_is_active(tid)) {
+           /* Enable PMD health check only when PMD is in 'RUNNING' state and
+            * still doesn't respond to heartbeats. Health checks are needed to
+            * analyze other stats as we are in penultimate state of declaring
+            * PMD as failed. */
+            ka_enable_pmd_health_check(core_id);
+        }
+        ka_set_pmd_state_ts(core_id, KA_STATE_DEAD, last_alive);
+    }
+
+    if (fail_state == KA_STATE_GONE) {
+        int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+
+        switch (pmd_hc_state) {
+        case PMD_HC_ENABLE:
+            break;
+        case PMD_HC_DISABLE:
+            VLOG_DBG_RL(&rl, "PMD thread [%d] died, health check disabled",
+                        core_id);
+            break;
+        case PMD_HC_PROGRESS:
+            ka_set_pmd_state_ts(core_id, KA_STATE_CHECK, last_alive);
+            break;
+
+        case PMD_HC_COMPLETE:
+            ka_inc_pmd_failures(core_id);
+            ka_set_pmd_state_ts(core_id, core_state, last_alive);
+            ka_disable_pmd_health_check(core_id);
+            break;
+
+        default:
+            VLOG_DBG_RL(&rl, "Unknown health check state %d", pmd_hc_state);
+            OVS_NOT_REACHED();
+        }
+    }
+}
+
 /*
  * This function shall be invoked periodically to write the core status and
  * last seen timestamp of the cores in to keepalive info structure.
@@ -634,11 +680,23 @@ dpdk_ka_update_core_state(void *ptr_data OVS_UNUSED, const int core_id,
     case RTE_KA_STATE_ALIVE:
     case RTE_KA_STATE_MISSING:
         ka_set_pmd_state_ts(core_id, KA_STATE_ALIVE, last_alive);
+
+        /* Health checks should be disabled when PMD is alive. */
+        if (OVS_UNLIKELY(ka_get_pmd_health_check_state(core_id) !=
+                PMD_HC_DISABLE)) {
+            ka_disable_pmd_health_check(core_id);
+        }
         break;
-    case RTE_KA_STATE_DOZING:
-    case RTE_KA_STATE_SLEEP:
     case RTE_KA_STATE_DEAD:
+        dpdk_ka_handle_failure(KA_STATE_DEAD, core_id, core_state,
+                               last_alive);
+        break;
     case RTE_KA_STATE_GONE:
+        dpdk_ka_handle_failure(KA_STATE_GONE, core_id, core_state,
+                               last_alive);
+        break;
+    case RTE_KA_STATE_DOZING:
+    case RTE_KA_STATE_SLEEP:
         ka_set_pmd_state_ts(core_id, core_state, last_alive);
         break;
     case RTE_KA_STATE_UNUSED:
-- 
2.4.11