[ovs-dev] [PATCH v4 2/7] Keepalive: Add initial keepalive support.

Bhanuprakash Bodireddy bhanuprakash.bodireddy at intel.com
Tue Aug 22 09:41:04 UTC 2017


This commit introduces the initial keepalive support by adding
'keepalive' module and also helper and initialization functions
that will be invoked by later commits.

This commit adds new ovsdb column "keepalive" that shows the status
of the datapath threads. This is implemented for DPDK datapath and
only status of PMD threads is reported.

For eg:
  To enable keepalive feature.
  'ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true'

  To set timer interval of 5000ms for monitoring packet processing cores.
  'ovs-vsctl --no-wait set Open_vSwitch . \
     other_config:keepalive-interval="5000"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy at intel.com>
---
 lib/automake.mk            |   2 +
 lib/keepalive.c            | 183 +++++++++++++++++++++++++++++++++++++++++++++
 lib/keepalive.h            |  87 +++++++++++++++++++++
 vswitchd/bridge.c          |   3 +
 vswitchd/vswitch.ovsschema |   8 +-
 vswitchd/vswitch.xml       |  49 ++++++++++++
 6 files changed, 330 insertions(+), 2 deletions(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 2415f4c..0d99f0a 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -110,6 +110,8 @@ lib_libopenvswitch_la_SOURCES = \
 	lib/json.c \
 	lib/jsonrpc.c \
 	lib/jsonrpc.h \
+	lib/keepalive.c \
+	lib/keepalive.h \
 	lib/lacp.c \
 	lib/lacp.h \
 	lib/latch.h \
diff --git a/lib/keepalive.c b/lib/keepalive.c
new file mode 100644
index 0000000..ac73a42
--- /dev/null
+++ b/lib/keepalive.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <unistd.h>
+
+#include "keepalive.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+#include "timeval.h"
+
+VLOG_DEFINE_THIS_MODULE(keepalive);
+
+static bool keepalive_enable = false;    /* Keepalive disabled by default */
+static bool ka_init_status = ka_init_failure; /* Keepalive initialization */
+static uint32_t keepalive_timer_interval;     /* keepalive timer interval */
+static struct keepalive_info *ka_info = NULL;
+
+inline bool
+ka_is_enabled(void)
+{
+    return keepalive_enable;
+}
+
+inline int
+ka_get_pmd_tid(unsigned core_idx)
+{
+    if (ka_is_enabled()) {
+        return ka_info->thread_id[core_idx];
+    }
+
+    return -EINVAL;
+}
+
+void
+ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
+                    uint64_t last_alive)
+{
+    struct ka_process_info *pinfo;
+    int tid = ka_get_pmd_tid(core_id);
+
+    ovs_mutex_lock(&ka_info->proclist_mutex);
+    HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+                             &ka_info->process_list) {
+        if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+            pinfo->core_state = state;
+            pinfo->core_last_seen_times = last_alive;
+        }
+    }
+    ovs_mutex_unlock(&ka_info->proclist_mutex);
+}
+
+/* Retrieve and return the keepalive timer interval from OVSDB. */
+static uint32_t
+ka_get_timer_interval(const struct smap *ovs_other_config OVS_UNUSED)
+{
+#define OVS_KEEPALIVE_TIMEOUT 1000    /* Default timeout set to 1000ms */
+    uint32_t ka_interval;
+
+    /* Timer granularity in milliseconds
+     * Defaults to OVS_KEEPALIVE_TIMEOUT(ms) if not set */
+    ka_interval = smap_get_int(ovs_other_config, "keepalive-interval",
+                  OVS_KEEPALIVE_TIMEOUT);
+
+    VLOG_INFO("Keepalive timer interval set to %"PRIu32" (ms)\n", ka_interval);
+    return ka_interval;
+}
+
+/*
+ * This function shall be invoked periodically to write the core status and
+ * last seen timestamp of the cores in to keepalive info structure.
+ */
+void
+ka_update_core_state(const int core_id, const enum keepalive_state core_state,
+                     uint64_t last_alive, void *ptr_data OVS_UNUSED)
+{
+    switch (core_state) {
+    case KA_STATE_ALIVE:
+    case KA_STATE_MISSING:
+        ka_set_pmd_state_ts(core_id, KA_STATE_ALIVE, last_alive);
+        break;
+    case KA_STATE_UNUSED:
+    case KA_STATE_DOZING:
+    case KA_STATE_SLEEP:
+    case KA_STATE_DEAD:
+    case KA_STATE_GONE:
+        ka_set_pmd_state_ts(core_id, core_state, last_alive);
+        break;
+    }
+}
+
+static void
+keepalive_register_relay_cb(ka_relay_cb cb, void *aux)
+{
+    ka_info->relay_cb = cb;
+    ka_info->relay_cb_data = aux;
+}
+
+static struct keepalive_info *
+keepalive_info_create(void)
+{
+    struct keepalive_info *ka_info;
+    ka_info = xzalloc(sizeof *ka_info);
+    if (ka_info != NULL) {
+        ka_info->init_time = time_wall_msec();
+    }
+
+    return ka_info;
+}
+
+static int
+ka_init__(void)
+{
+    if ((ka_info = keepalive_info_create()) == NULL) {
+        VLOG_ERR("OvS Keepalive - initialization failed.");
+        return -1;
+    }
+
+    keepalive_register_relay_cb(ka_update_core_state, NULL);
+    ovs_mutex_init(&ka_info->proclist_mutex);
+    hmap_init(&ka_info->process_list);
+
+    return 0;
+}
+
+void
+ka_init(const struct smap *ovs_other_config)
+{
+    if (smap_get_bool(ovs_other_config, "enable-keepalive", false)) {
+        static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
+
+        if (ovsthread_once_start(&once_enable)) {
+            keepalive_enable = true;
+            VLOG_INFO("OvS Keepalive enabled.");
+
+            keepalive_timer_interval =
+                ka_get_timer_interval(ovs_other_config);
+
+            int err = ka_init__();
+            if (!err) {
+                VLOG_INFO("OvS Keepalive - initialized.");
+                ka_init_status = ka_init_success;
+            }
+
+            ovsthread_once_done(&once_enable);
+        }
+    }
+}
+
+void
+ka_destroy(void)
+{
+    if (ka_info) {
+        struct ka_process_info *pinfo;
+
+        ovs_mutex_lock(&ka_info->proclist_mutex);
+        HMAP_FOR_EACH_POP (pinfo, node, &ka_info->process_list) {
+            free(pinfo);
+        }
+        ovs_mutex_unlock(&ka_info->proclist_mutex);
+
+        hmap_destroy(&ka_info->process_list);
+        ovs_mutex_destroy(&ka_info->proclist_mutex);
+
+        free(ka_info);
+    }
+}
diff --git a/lib/keepalive.h b/lib/keepalive.h
new file mode 100644
index 0000000..d133398
--- /dev/null
+++ b/lib/keepalive.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KEEPALIVE_H
+#define KEEPALIVE_H
+
+#include <stdint.h>
+#include "openvswitch/hmap.h"
+#include "ovs-thread.h"
+
+#define KA_DP_MAXCORES 128
+
+struct smap;
+
+enum keepalive_state {
+    KA_STATE_UNUSED = 0,
+    KA_STATE_ALIVE = 1,
+    KA_STATE_MISSING = 4,
+    KA_STATE_DEAD = 2,
+    KA_STATE_GONE = 3,
+    KA_STATE_DOZING = 5,
+    KA_STATE_SLEEP = 6,
+};
+
+typedef void (*ka_relay_cb)(int, enum keepalive_state, uint64_t, void *);
+
+struct ka_process_info {
+    int tid;
+    int core_id;
+    enum keepalive_state core_state;
+    uint64_t core_last_seen_times;
+    struct hmap_node node;
+};
+
+struct keepalive_info {
+    /* Mutex for 'process_list'. */
+    struct ovs_mutex proclist_mutex;
+
+    /* List of process/threads monitored by KA framework. */
+    struct hmap process_list OVS_GUARDED;
+
+    /* Store Datapath threads 'tid'.
+     * In case of DPDK there can be max of KA_DP_MAXCORES threads. */
+    pid_t thread_id[KA_DP_MAXCORES];
+
+    /* Datapath cores state. */
+    enum keepalive_state state_flags[KA_DP_MAXCORES];
+
+    uint64_t init_time;
+    /* Last seen timestamps. */
+    uint64_t last_alive[KA_DP_MAXCORES];
+
+    /* Active cores (non-zero if the core should be monitored). */
+    uint8_t active_cores[KA_DP_MAXCORES];
+
+    /** Relay handler. */
+    ka_relay_cb relay_cb;
+    void *relay_cb_data;
+};
+
+enum keepalive_status {
+    ka_init_failure = 0,
+    ka_init_success
+};
+
+void ka_init(const struct smap *);
+void ka_destroy(void);
+void ka_set_pmd_state_ts(unsigned, enum keepalive_state, uint64_t);
+void ka_update_core_state(const int, const enum keepalive_state,
+                          uint64_t, void *);
+
+int ka_get_pmd_tid(unsigned core);
+bool ka_is_enabled(void);
+#endif /* keepalive.h */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index a8cbae7..aaa8d9b 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -34,6 +34,7 @@
 #include "hmapx.h"
 #include "if-notifier.h"
 #include "jsonrpc.h"
+#include "keepalive.h"
 #include "lacp.h"
 #include "mac-learning.h"
 #include "mcast-snooping.h"
@@ -506,6 +507,7 @@ bridge_exit(bool delete_datapath)
         bridge_destroy(br, delete_datapath);
     }
     ovsdb_idl_destroy(idl);
+    ka_destroy();
 }
 
 /* Looks at the list of managers in 'ovs_cfg' and extracts their remote IP
@@ -2953,6 +2955,7 @@ bridge_run(void)
     if (cfg) {
         netdev_set_flow_api_enabled(&cfg->other_config);
         dpdk_init(&cfg->other_config);
+        ka_init(&cfg->other_config);
     }
 
     /* Initialize the ofproto library.  This only needs to run once, but
diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 90e50b6..c56a64c 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "7.15.1",
- "cksum": "3682332033 23608",
+ "version": "7.16.0",
+ "cksum": "3631938350 23762",
  "tables": {
    "Open_vSwitch": {
      "columns": {
@@ -30,6 +30,10 @@
          "type": {"key": "string", "value": "string",
                   "min": 0, "max": "unlimited"},
          "ephemeral": true},
+       "keepalive": {
+         "type": {"key": "string", "value": "string", "min": 0,
+                  "max": "unlimited"},
+         "ephemeral": true},
        "ovs_version": {
          "type": {"key": {"type": "string"},
                   "min": 0, "max": 1}},
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 074535b..4302fd5 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -568,6 +568,55 @@
           </p>
         </column>
       </group>
+
+      <group title="Keepalive">
+        <p>
+          The <code>keepalive</code> column contains key-value pairs that
+          report health of datapath threads in Open vSwitch.  These are updated
+          periodically (based on the keepalive-interval).
+        </p>
+
+        <column name="other_config" key="enable-keepalive"
+                type='{"type": "boolean"}'>
+          Keepalive is disabled by default to avoid overhead in the common
+          case when heartbeat monitoring is not useful.  Set this value to
+          <code>true</code> to enable keepalive <ref column="keepalive"/>
+          column or to <code>false</code> to explicitly disable it.
+        </column>
+
+        <column name="other_config" key="keepalive-interval"
+                type='{"type": "integer", "minInteger": 1}'>
+          <p>
+            Specifies the keepalive interval value.
+          </p>
+          <p>
+            If not specified, this will be set to 1000 milliseconds (default
+            value). Changing this value requires restarting the daemon.
+          </p>
+        </column>
+
+        <column name="keepalive" key="PMD_ID">
+          <p>
+            One such key-value pair, with <code>ID</code> replaced by the
+            PMD thread, will exist for each active PMD thread.  The value is a
+            comma-separated list of PMD thread status, core number and the
+            last seen timestamp of PMD thread. In respective order, these
+            values are:
+          </p>
+
+          <ol>
+            <li>Status of PMD thread.  Valid values include ALIVE, MISSING,
+            DEAD, GONE, DOZING, SLEEPING.</li>
+            <li>Core id of PMD thread.</li>
+            <li>Last seen timestamp(epoch) of the PMD core.</li>
+          </ol>
+
+          <p>
+            This is only valid for OvS-DPDK Datapath and PMD threads status
+            is implemented currently.
+          </p>
+        </column>
+      </group>
     </group>
 
     <group title="Version Reporting">
-- 
2.4.11



More information about the dev mailing list