[ovs-dev] [PATCH v3] OVN pacemaker: Add the monitor action for Master role

Russell Bryant russell at ovn.org
Mon Dec 4 13:42:09 UTC 2017


On Mon, Dec 4, 2017 at 12:29 AM,  <nusiddiq at redhat.com> wrote:
> From: Numan Siddique <nusiddiq at redhat.com>
>
> Pacemaker Resource agent periodically calls the OVN OCF's "monitor" action
> periodically to check the status. But the OVN OCF script doesn't add the
> action "monitor" for the role "Master" because of which the pacemaker
> resource agent do not call the "monitor" action at all for the master.
> In case OVN db servers exit for some reason this totally gets undetected
> and one of the standby node is not promoted to master.
>
> This patch adds the monitor action for "Master" role. Also the monitor
> action do not check for the status of the ovn-northd (if manage_northd is yes).
> This patch also checks for the status of the ovn-northd in the monitor action
> for the "Master" role. If any of the ovsdb-server or ovn-northd is not running,
> monitor action will return OCF_NOT_RUNNING and this will cause the pacemaker
> to restart the OVN OCF resource.
>
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1512568
> Signed-off-by: Numan Siddique <nusiddiq at redhat.com>
> CC: Russel Bryant <russell at ovn.org>
> ---
>
> v2 -> v3
> --------
> In the ovsdb_server_demote added the check to see the status of
> ovn-northd  if it is running as master. v2 was not working for
> pacemaker OVN docker bundle resource.
>
> v1 -> v2
> -----
> Reverted the change to use 'ocf_attribute_target' as this function is
> only availabe in pacemaker 1.1.16-12
>
>  ovn/utilities/ovndb-servers.ocf | 49 ++++++++++++++++++++++++++++++++++-------
>  1 file changed, 41 insertions(+), 8 deletions(-)
>
> diff --git a/ovn/utilities/ovndb-servers.ocf b/ovn/utilities/ovndb-servers.ocf
> index 3f3008700..389307a84 100755
> --- a/ovn/utilities/ovndb-servers.ocf
> +++ b/ovn/utilities/ovndb-servers.ocf
> @@ -120,7 +120,11 @@ ovsdb_server_metadata() {
>      <action name="stop"         timeout="20s" />
>      <action name="promote"      timeout="50s" />
>      <action name="demote"       timeout="50s" />
> -    <action name="monitor"      timeout="20s"  depth="0" interval="10s" />
> +    <action name="monitor"      timeout="20s"  depth="0" interval="30s" />

Just making sure ... did you mean to leave this third "monitor" entry
here?  I don't really know how this works, but it looked like the next
two would replace this one.

> +    <action name="monitor"      timeout="20s"  depth="0" interval="10s"
> +     role="Master" />
> +    <action name="monitor"      timeout="20s"  depth="0" interval="30s"
> +     role="Slave"/>
>      <action name="meta-data"    timeout="5s" />
>      <action name="validate-all" timeout="20s" />
>    </actions>
> @@ -247,7 +251,7 @@ ovsdb_server_master_update() {
>  }
>
>  ovsdb_server_monitor() {
> -    ovsdb_server_check_status
> +    ovsdb_server_check_status $@
>      rc=$?
>
>      ovsdb_server_master_update $rc
> @@ -262,8 +266,21 @@ ovsdb_server_check_status() {
>          return $OCF_SUCCESS
>      fi
>
> +    check_northd="no"
> +    if [ "$MANAGE_NORTHD" == "yes" ] && [ "$1" != "ignore_northd" ]; then
> +        check_northd="yes"
> +    fi
> +
>      if [[ $sb_status == "running/active" && $nb_status == "running/active" ]]; then
> -        return $OCF_RUNNING_MASTER
> +        if [ "$check_northd" == "yes" ]; then
> +            # Verify if ovn-northd is running or not.
> +            ${OVN_CTL} status_northd | grep "ovn-northd is running"

Is the grep needed?  Can you just rely on the exit code of ovn-ctl?
This script will fail if the output of ovn-ctl is changed in the
future.

> +            if [ "$?" == "0" ] ; then
> +                return $OCF_RUNNING_MASTER
> +            fi
> +        else
> +            return $OCF_RUNNING_MASTER
> +        fi
>      fi
>
>      # TODO: What about service running but not in either state above?
> @@ -317,8 +334,13 @@ ovsdb_server_start() {
>      $@ start_ovsdb
>
>      while [ 1 = 1 ]; do
> -        # It is important that we don't return until we're in a functional state
> -        ovsdb_server_monitor
> +        # It is important that we don't return until we're in a functional
> +        # state. When checking the status of the ovsdb-server's ignore northd.
> +        # It is possible that when the resource is restarted ovsdb-server's
> +        # can be started as masters and ovn-northd would not have been started.
> +        # ovn-northd will be started once a node is promoted to master and
> +        # 'manage_northd' is set to yes.
> +        ovsdb_server_monitor ignore_northd
>          rc=$?
>          case $rc in
>              $OCF_SUCCESS)        return $rc;;
> @@ -350,7 +372,7 @@ ovsdb_server_stop() {
>          ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd
>      fi
>
> -    ovsdb_server_check_status
> +    ovsdb_server_check_status ignore_northd
>      case $? in
>          $OCF_NOT_RUNNING)    return ${OCF_SUCCESS};;
>      esac
> @@ -360,7 +382,7 @@ ovsdb_server_stop() {
>
>      while [ 1 = 1 ]; do
>          # It is important that we don't return until we're stopped
> -        ovsdb_server_check_status
> +        ovsdb_server_check_status ignore_northd
>          rc=$?
>          case $rc in
>          $OCF_SUCCESS)
> @@ -381,7 +403,7 @@ ovsdb_server_stop() {
>  }
>
>  ovsdb_server_promote() {
> -    ovsdb_server_check_status
> +    ovsdb_server_check_status ignore_northd
>      rc=$?
>      case $rc in
>          ${OCF_SUCCESS}) ;;
> @@ -395,6 +417,11 @@ ovsdb_server_promote() {
>      ${OVN_CTL} promote_ovnnb
>      ${OVN_CTL} promote_ovnsb
>
> +    if [ "$MANAGE_NORTHD" = "yes" ]; then
> +        # Startup ovn-northd service
> +        ${OVN_CTL} --ovn-manage-ovsdb=no start_northd
> +    fi
> +
>      ocf_log debug "ovndb_servers: Promoting $host_name as the master"
>      # Record ourselves so that the agent has a better chance of doing
>      # the right thing at startup
> @@ -404,6 +431,8 @@ ovsdb_server_promote() {
>  }
>
>  ovsdb_server_demote() {
> +    # While demoting, check the status of ovn_northd.
> +    # In case ovn_northd is not running, we should return OCF_NOT_RUNNING.
>      ovsdb_server_check_status
>      if [ $? = $OCF_NOT_RUNNING ]; then
>          return $OCF_NOT_RUNNING
> @@ -452,6 +481,10 @@ ovsdb_server_demote() {
>          ${OVN_CTL} demote_ovnsb --db-sb-sync-from-addr=${INVALID_IP_ADDRESS}
>      fi
>
> +    if [ "$MANAGE_NORTHD" = "yes" ]; then
> +        # Stop ovn-northd service
> +        ${OVN_CTL} --ovn-manage-ovsdb=no stop_northd
> +    fi
>      ovsdb_server_master_update $OCF_SUCCESS
>      return $OCF_SUCCESS
>  }
> --
> 2.14.3
>



-- 
Russell Bryant


More information about the dev mailing list