[ovs-dev] [RFC PATCH v3 12/18] dpif-netdev: Add additional datapath health checks.

Bhanuprakash Bodireddy bhanuprakash.bodireddy at intel.com
Sun Jun 18 19:24:25 UTC 2017


This commit enables additional datapath health checks. The checks
are enabled only on a PMD heartbeat failure. On missing three successive
heartbeats additional health checks needs to be performed on respective
PMD thread to confirm the failure.

The datapath health is monitored periodically from keepalive thread.
It should be noted that the PMD health checks are only performed on
the PMD threads whose health check is enabled.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy at intel.com>
---
 lib/dpif-netdev.c | 30 +++++++++++++++++++++
 lib/keepalive.c   | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/keepalive.h   | 16 +++++++++++
 3 files changed, 127 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 93bda20..06d2e23 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -971,6 +971,35 @@ sorted_poll_thread_list(struct dp_netdev *dp,
     *n = k;
 }
 
+static void
+pmd_health_check(struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
+{
+    /* Nothing */
+}
+
+static void
+get_datapath_health(struct dp_netdev *dp)
+{
+    static struct hmap *process_list = NULL;
+    if (!process_list) {
+        ka_load_process_list(&process_list);
+    }
+
+    struct ka_process_info *pinfo;
+    HMAP_FOR_EACH (pinfo, node, process_list) {
+        int core_id = pinfo->core_id;
+        struct dp_netdev_pmd_thread *pmd;
+
+        /* Check only PMD threads whose health check is enabled. */
+        if (OVS_LIKELY(pinfo->healthcheck == PMD_HC_DISABLE)) {
+            continue;
+        }
+
+        pmd = dp_netdev_get_pmd(dp, core_id);
+        pmd_health_check(pmd);
+    }
+}
+
 static void *
 ovs_keepalive(void *f_)
 {
@@ -982,6 +1011,7 @@ ovs_keepalive(void *f_)
         int n_pmds = cmap_count(&dp->poll_threads) - 1;
         if (n_pmds > 0) {
             dispatch_heartbeats();
+            get_datapath_health(dp);
             get_ka_stats();
         }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 6edb440..997bebf 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -105,6 +105,7 @@ ka_register_thread(int tid, bool thread_is_pmd)
         pinfo->tid = tid;
         pinfo->heartbeats = true;
         pinfo->core_id = core_id;
+        pinfo->healthcheck = PMD_HC_DISABLE;
 
         char *pname = get_process_name(tid);
         if (pname) {
@@ -176,6 +177,78 @@ ka_mark_pmd_thread_sleep(void)
 }
 
 void
+ka_enable_pmd_health_check(unsigned core_id)
+{
+    if (ka_is_enabled()) {
+        struct ka_process_info *pinfo;
+        int tid = ka_get_pmd_tid(core_id);
+        ovs_mutex_lock(&ka_info->proclist_mutex);
+        HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+                                 &ka_info->process_list) {
+            if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+                pinfo->healthcheck = PMD_HC_ENABLE;
+            }
+        }
+        ovs_mutex_unlock(&ka_info->proclist_mutex);
+    }
+}
+
+void
+ka_disable_pmd_health_check(unsigned core_id)
+{
+    if (ka_is_enabled()) {
+        struct ka_process_info *pinfo;
+        int tid = ka_get_pmd_tid(core_id);
+        ovs_mutex_lock(&ka_info->proclist_mutex);
+        HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+                                 &ka_info->process_list) {
+            if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+                pinfo->healthcheck = PMD_HC_DISABLE;
+            }
+        }
+        ovs_mutex_unlock(&ka_info->proclist_mutex);
+    }
+}
+
+enum pmdhealth_check
+ka_get_pmd_health_check_state(unsigned core_id)
+    OVS_REQUIRES(ka_info->proclist_mutex)
+{
+    int hc = PMD_HC_DISABLE;
+    if (ka_is_enabled()) {
+        struct ka_process_info *pinfo;
+        int tid = ka_get_pmd_tid(core_id);
+        ovs_mutex_lock(&ka_info->proclist_mutex);
+        HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+                                 &ka_info->process_list) {
+            if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+                hc = pinfo->healthcheck;
+            }
+        }
+        ovs_mutex_unlock(&ka_info->proclist_mutex);
+    }
+
+    return hc;
+}
+
+void
+ka_set_pmd_health_check_state(unsigned core_id, enum pmdhealth_check state)
+{
+    if (ka_is_enabled()) {
+        struct ka_process_info *pinfo;
+        int tid = ka_get_pmd_tid(core_id);
+        ovs_mutex_lock(&ka_info->proclist_mutex);
+        HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+                                 &ka_info->process_list) {
+            if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+                pinfo->healthcheck = state;
+            }
+        }
+        ovs_mutex_unlock(&ka_info->proclist_mutex);
+    }
+}
+
+void
 ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
                     uint64_t last_alive)
 {
@@ -193,6 +266,14 @@ ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
     ovs_mutex_unlock(&ka_info->proclist_mutex);
 }
 
+void
+ka_load_process_list(struct hmap **process_list)
+{
+    if (ka_is_enabled()) {
+        *process_list = &ka_info->process_list;
+    }
+}
+
 /* Retrieve and return the keepalive timer interval from OVSDB. */
 static uint32_t
 get_ka_timer_interval(const struct smap *ovs_other_config OVS_UNUSED)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 356e761..8877ca6 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -40,11 +40,19 @@ enum keepalive_state {
     KA_STATE_CHECK = 7
 };
 
+enum pmdhealth_check {
+    PMD_HC_DISABLE,
+    PMD_HC_ENABLE,
+    PMD_HC_PROGRESS,
+    PMD_HC_COMPLETE
+};
+
 struct ka_process_info {
     char name[16];
     int tid;
     int core_id;
     bool heartbeats;
+    enum pmdhealth_check healthcheck;
     enum keepalive_state core_state;
     uint64_t core_last_seen_times;
     struct hmap_node node;
@@ -95,6 +103,13 @@ void ka_unregister_thread(int, bool);
 void ka_mark_pmd_thread_alive(void);
 void ka_mark_pmd_thread_sleep(void);
 
+void ka_init_pmd_health_check(void);
+void ka_enable_pmd_health_check(unsigned);
+void ka_disable_pmd_health_check(unsigned);
+bool ka_is_pmdhealth_check_enabled(unsigned);
+enum pmdhealth_check ka_get_pmd_health_check_state(unsigned);
+void ka_set_pmd_health_check_state(unsigned, enum pmdhealth_check);
+
 void ka_store_pmd_id(unsigned core);
 uint32_t get_ka_interval(void);
 int get_ka_init_status(void);
@@ -103,6 +118,7 @@ int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
 void get_ka_stats(void);
 struct smap *ka_stats_run(void);
+void ka_load_process_list(struct hmap **);
 
 void dispatch_heartbeats(void);
 #endif /* keepalive.h */
-- 
2.4.11



More information about the dev mailing list