[ovs-dev] [PATCH v6 1/8] Keepalive: Add initial keepalive configuration.

Kevin Traynor ktraynor at redhat.com
Mon Jan 22 16:13:52 UTC 2018


On 12/08/2017 12:04 PM, Bhanuprakash Bodireddy wrote:
> This commit introduces the keepalive configuration by adding
> 'keepalive' module and also helper and initialization functions
> that will be invoked by later commits.
> 
> This commit adds new ovsdb column "keepalive" that shows the status
> of the datapath threads. This is implemented for DPDK datapath and
> only status of PMD threads is reported.
> 
> Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodireddy at intel.com>
> ---
>  lib/automake.mk            |   2 +
>  lib/keepalive.c            | 147 +++++++++++++++++++++++++++++++++++++++++++++
>  lib/keepalive.h            |  86 ++++++++++++++++++++++++++
>  vswitchd/bridge.c          |   3 +
>  vswitchd/vswitch.ovsschema |   8 ++-
>  vswitchd/vswitch.xml       |  49 +++++++++++++++
>  6 files changed, 293 insertions(+), 2 deletions(-)
>  create mode 100644 lib/keepalive.c
>  create mode 100644 lib/keepalive.h
> 
> diff --git a/lib/automake.mk b/lib/automake.mk
> index effe5b5..91d65be 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -110,6 +110,8 @@ lib_libopenvswitch_la_SOURCES = \
>  	lib/json.c \
>  	lib/jsonrpc.c \
>  	lib/jsonrpc.h \
> +	lib/keepalive.c \
> +	lib/keepalive.h \
>  	lib/lacp.c \
>  	lib/lacp.h \
>  	lib/latch.h \
> diff --git a/lib/keepalive.c b/lib/keepalive.c
> new file mode 100644
> index 0000000..ca8dccb
> --- /dev/null
> +++ b/lib/keepalive.c
> @@ -0,0 +1,147 @@
> +/*
> + * Copyright (c) 2017 Intel, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +
> +#include "keepalive.h"
> +#include "lib/vswitch-idl.h"
> +#include "openvswitch/vlog.h"
> +#include "seq.h"
> +#include "timeval.h"
> +
> +VLOG_DEFINE_THIS_MODULE(keepalive);
> +
> +static bool keepalive_enable = false;      /* Keepalive disabled by default. */
> +static uint32_t keepalive_timer_interval;  /* keepalive timer interval. */
> +static struct keepalive_info ka_info;
> +
> +/* Returns true if keepalive is enabled, false otherwise. */
> +bool
> +ka_is_enabled(void)
> +{
> +    return keepalive_enable;
> +}
> +
> +/* Finds the thread by 'tid' in 'process_list' map and update
> + * the thread state and last_seen_time stamp.  This is invoked
> + * periodically(based on keepalive-interval) as part of callback
> + * function in the context of keepalive thread.
> + */
> +static void
> +ka_set_thread_state_ts(pid_t tid, enum keepalive_state state,
> +                       uint64_t last_alive)
> +{
> +    struct ka_process_info *pinfo;
> +
> +    ovs_mutex_lock(&ka_info.proclist_mutex);
> +    HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
> +                             &ka_info.process_list) {
> +        if (pinfo->tid == tid) {
> +            pinfo->state = state;
> +            pinfo->last_seen_time = last_alive;
> +        }
> +    }
> +    ovs_mutex_unlock(&ka_info.proclist_mutex);
> +}
> +
> +/* Retrieve and return the keepalive timer interval from OVSDB. */
> +static uint32_t
> +ka_get_timer_interval(const struct smap *ovs_other_config)
> +{
> +    uint32_t ka_interval;
> +
> +    /* Timer granularity in milliseconds
> +     * Defaults to OVS_KEEPALIVE_TIMEOUT(ms) if not set */
> +    ka_interval = smap_get_int(ovs_other_config, "keepalive-interval",
> +                               OVS_KEEPALIVE_DEFAULT_TIMEOUT);
> +
> +    VLOG_INFO("Keepalive timer interval set to %"PRIu32" (ms)\n", ka_interval);
> +    return ka_interval;
> +}
> +
> +/* Invoke periodically to update the status and last seen timestamp
> + * of the thread in to 'process_list' map. Runs in the context of
> + * keepalive thread.
> + */
> +static void
> +ka_update_thread_state(pid_t tid, const enum keepalive_state state,
> +                       uint64_t last_alive)
> +{
> +    switch (state) {
> +    case KA_STATE_ALIVE:
> +    case KA_STATE_MISSING:
> +        ka_set_thread_state_ts(tid, KA_STATE_ALIVE, last_alive);
> +        break;
> +    case KA_STATE_UNUSED:
> +    case KA_STATE_SLEEP:
> +    case KA_STATE_DEAD:
> +    case KA_STATE_GONE:
> +        ka_set_thread_state_ts(tid, state, last_alive);
> +        break;
> +    default:
> +        OVS_NOT_REACHED();
> +    }
> +}
> +
> +/* Register relay callback function. */
> +static void
> +ka_register_relay_cb(ka_relay_cb cb, void *aux)
> +{
> +    ka_info.relay_cb = cb;
> +    ka_info.relay_cb_data = aux;
> +}
> +
> +void
> +ka_init(const struct smap *ovs_other_config)
> +{
> +    if (smap_get_bool(ovs_other_config, "enable-keepalive", false)) {
> +        static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
> +
> +        if (ovsthread_once_start(&once_enable)) {
> +            keepalive_enable =  true;

extra space

> +            VLOG_INFO("OvS Keepalive enabled.");
> +
> +            keepalive_timer_interval =
> +                ka_get_timer_interval(ovs_other_config);
> +
> +            ka_register_relay_cb(ka_update_thread_state, NULL);
> +            ovs_mutex_init(&ka_info.proclist_mutex);
> +            hmap_init(&ka_info.process_list);
> +
> +            ka_info.init_time = time_wall_msec();
> +
> +            ovsthread_once_done(&once_enable);
> +        }
> +    }
> +}
> +
> +void
> +ka_destroy(void)
> +{
> +    if (!ka_is_enabled()) {
> +       return;
> +    }
> +
> +    ovs_mutex_lock(&ka_info.proclist_mutex);
> +    struct ka_process_info *pinfo;
> +    HMAP_FOR_EACH_POP (pinfo, node, &ka_info.process_list) {
> +        free(pinfo);
> +    }
> +    ovs_mutex_unlock(&ka_info.proclist_mutex);
> +
> +    hmap_destroy(&ka_info.process_list);
> +    ovs_mutex_destroy(&ka_info.proclist_mutex);
> +}
> diff --git a/lib/keepalive.h b/lib/keepalive.h
> new file mode 100644
> index 0000000..a738daa
> --- /dev/null
> +++ b/lib/keepalive.h
> @@ -0,0 +1,86 @@
> +/*
> + * Copyright (c) 2017 Intel, Inc.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef KEEPALIVE_H
> +#define KEEPALIVE_H
> +
> +#include <stdint.h>
> +#include "openvswitch/hmap.h"
> +#include "ovs-thread.h"
> +
> +/* Default timeout set to 1000ms */
> +#define OVS_KEEPALIVE_DEFAULT_TIMEOUT 1000
> +
> +struct smap;
> +
> +/*
> + * Keepalive states with description
> + *
> + * KA_STATE_UNUSED  - Not registered to KA framework.
> + * KA_STATE_ALIVE   - Thread is alive.
> + * KA_STATE_MISSING - Thread missed first heartbeat.
> + * KA_STATE_DEAD    - Thread missed two heartbeats.
> + * KA_STATE_GONE    - Thread missed two or more heartbeats
> + *                    and is completely 'burried'.
> + * KA_STATE_SLEEP   - Thread is sleeping.
> + *
> + */

Simpler to just comment the enum entries directly.

The states are not really intuitive. According to this "gone" is worse
than "dead" - I know which I'd rather be :-)

> +enum keepalive_state {
> +    KA_STATE_UNUSED,
> +    KA_STATE_ALIVE,
> +    KA_STATE_DEAD,
> +    KA_STATE_GONE,
> +    KA_STATE_MISSING,
> +    KA_STATE_SLEEP,
> +};
> +
> +struct ka_process_info {
> +    /* Thread id of the process, retrieved using ovs_gettid(). */
> +    pid_t tid;
> +
> +    /* Core id the thread was last scheduled. */
> +    int core_id;
> +
> +    /* Last seen thread state. */
> +    enum keepalive_state state;
> +
> +    /* Last seen timestamp of the thread. */
> +    uint64_t last_seen_time;
> +    struct hmap_node node;
> +};
> +
> +typedef void (*ka_relay_cb)(int, enum keepalive_state, uint64_t);
> +
> +struct keepalive_info {
> +    /* Mutex for 'process_list'. */
> +    struct ovs_mutex proclist_mutex;
> +
> +    /* List of process/threads monitored by KA framework. */
> +    struct hmap process_list OVS_GUARDED;
> +
> +    /* Keepalive initialization time. */
> +    uint64_t init_time;
> +
> +    /* keepalive relay handler. */
> +    ka_relay_cb relay_cb;
> +    void *relay_cb_data;
> +};
> +
> +bool ka_is_enabled(void);
> +void ka_init(const struct smap *);
> +void ka_destroy(void);
> +
> +#endif /* keepalive.h */
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index 630c6fa..f70407f 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -34,6 +34,7 @@
>  #include "hmapx.h"
>  #include "if-notifier.h"
>  #include "jsonrpc.h"
> +#include "keepalive.h"
>  #include "lacp.h"
>  #include "mac-learning.h"
>  #include "mcast-snooping.h"
> @@ -506,6 +507,7 @@ bridge_exit(bool delete_datapath)
>          bridge_destroy(br, delete_datapath);
>      }
>      ovsdb_idl_destroy(idl);
> +    ka_destroy();
>  }
>  
>  /* Looks at the list of managers in 'ovs_cfg' and extracts their remote IP
> @@ -2959,6 +2961,7 @@ bridge_run(void)
>      if (cfg) {
>          netdev_set_flow_api_enabled(&cfg->other_config);
>          dpdk_init(&cfg->other_config);
> +        ka_init(&cfg->other_config);
>      }
>  
>      /* Initialize the ofproto library.  This only needs to run once, but
> diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
> index 90e50b6..c56a64c 100644
> --- a/vswitchd/vswitch.ovsschema
> +++ b/vswitchd/vswitch.ovsschema
> @@ -1,6 +1,6 @@
>  {"name": "Open_vSwitch",
> - "version": "7.15.1",
> - "cksum": "3682332033 23608",
> + "version": "7.16.0",
> + "cksum": "3631938350 23762",
>   "tables": {
>     "Open_vSwitch": {
>       "columns": {
> @@ -30,6 +30,10 @@
>           "type": {"key": "string", "value": "string",
>                    "min": 0, "max": "unlimited"},
>           "ephemeral": true},
> +       "keepalive": {
> +         "type": {"key": "string", "value": "string", "min": 0,
> +                  "max": "unlimited"},
> +         "ephemeral": true},
>         "ovs_version": {
>           "type": {"key": {"type": "string"},
>                    "min": 0, "max": 1}},
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index c145e1a..512292a 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -568,6 +568,55 @@
>            </p>
>          </column>
>        </group>
> +
> +      <group title="Keepalive">
> +        <p>
> +          The <code>keepalive</code> column contains key-value pairs that
> +          report health of datapath threads in Open vSwitch.  These are updated
> +          periodically (based on the keepalive-interval).
> +        </p>
> +
> +        <column name="other_config" key="enable-keepalive"
> +                type='{"type": "boolean"}'>
> +          Keepalive is disabled by default to avoid overhead in the common
> +          case when heartbeat monitoring is not useful.  Set this value to
> +          <code>true</code> to enable keepalive <ref column="keepalive"/>
> +          column or to <code>false</code> to explicitly disable it.
> +        </column>
> +
> +        <column name="other_config" key="keepalive-interval"
> +                type='{"type": "integer", "minInteger": 100}'>
> +          <p>
> +            Specifies the keepalive interval value in milliseconds.
> +          </p>
> +          <p>
> +            If not specified, this will be set to 1000 milliseconds (default
> +            value). Changing this value requires restarting the daemon.
> +          </p>
> +        </column>
> +
> +        <column name="keepalive" key="PMD_ID">
> +          <p>
> +            One such key-value pair, with <code>ID</code> replaced by the
> +            PMD thread, will exist for each active PMD thread.  The value is a
> +            comma-separated list of PMD thread status, core number and the
> +            last seen timestamp of PMD thread.  In respective order, these
> +            values are:
> +          </p>
> +
> +          <ol>
> +            <li>Status of PMD thread.  Valid status include ALIVE, MISSING,
> +            DEAD, GONE, SLEEP.</li>
> +            <li>Core id the PMD thread was scheduled.</li>
> +            <li>Last seen timestamp(epoch) of the PMD thread.</li>
> +          </ol>
> +
> +          <p>
> +            This is only valid for OvS-DPDK Datapath and PMD threads status
> +            is implemented currently.
> +          </p>
> +        </column>
> +      </group>
>      </group>
>  
>      <group title="Version Reporting">
> 



More information about the dev mailing list