openstack-resource-agents/ocf/nova-compute-wait
Andrew Beekhof 5b5c080a0b Ensure nova-compute unfences itself after starting
Change-Id: I687d01b346a45b0df96b66b767017356b0cf63c2
2016-11-22 21:07:37 +11:00

346 lines
9.3 KiB
Bash

#!/bin/sh
#
#
# nova-compute-wait agent manages compute daemons.
#
# Copyright (c) 2015
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
###
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
###
: ${__OCF_ACTION=$1}
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="nova-compute-wait" version="1.0">
<version>1.0</version>
<longdesc lang="en">
OpenStack Nova Compute Server.
</longdesc>
<shortdesc lang="en">OpenStack Nova Compute Server</shortdesc>
<parameters>
<parameter name="auth_url" unique="0" required="1">
<longdesc lang="en">
Deprecated option not in use
</longdesc>
<shortdesc lang="en">Deprecated</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="username" unique="0" required="1">
<longdesc lang="en">
Deprecated option not in use
</longdesc>
<shortdesc lang="en">Deprecated</shortdesc>
</parameter>
<parameter name="password" unique="0" required="1">
<longdesc lang="en">
Deprecated option not in use
</longdesc>
<shortdesc lang="en">Deprecated</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="tenant_name" unique="0" required="1">
<longdesc lang="en">
Deprecated option not in use
</longdesc>
<shortdesc lang="en">Deprecated</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="domain" unique="0" required="0">
<longdesc lang="en">
DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN
</longdesc>
<shortdesc lang="en">DNS domain</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="endpoint_type" unique="0" required="0">
<longdesc lang="en">
Deprecated option not in use
</longdesc>
<shortdesc lang="en">Deprecated</shortdesc>
<content type="string" default="" />
</parameter>
<parameter name="no_shared_storage" unique="0" required="0">
<longdesc lang="en">
Deprecated option not in use
</longdesc>
<shortdesc lang="en">Deprecated</shortdesc>
<content type="boolean" default="0" />
</parameter>
<parameter name="evacuation_delay" unique="0" required="0">
<longdesc lang="en">
How long to wait for nova to finish evacuating instances elsewhere
before starting nova-compute. Only used when the agent detects
evacuations might be in progress.
You may need to increase the start timeout when increasing this value.
</longdesc>
<shortdesc lang="en">Delay to allow evacuations time to complete</shortdesc>
<content type="integer" default="120" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="600" />
<action name="stop" timeout="300" />
<action name="monitor" timeout="20" interval="10" depth="0"/>
<action name="validate-all" timeout="20" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# don't exit on TERM, to test that lrmd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
ocf_log info "They use TERM to bring us down. No such luck."
return
}
nova_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
nova_start() {
build_unfence_overlay
state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
if [ "x$state" = x ]; then
: never been fenced
elif [ "x$state" = xno ]; then
: has been evacuated, however it could have been 1s ago
ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
sleep ${OCF_RESKEY_evacuation_delay}
else
while [ "x$state" != "xno" ]; do
ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}"
state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
sleep 5
done
ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
sleep ${OCF_RESKEY_evacuation_delay}
fi
touch "$statefile"
return $OCF_SUCCESS
}
nova_stop() {
rm -f "$statefile"
return $OCF_SUCCESS
}
nova_monitor() {
if [ ! -f "$statefile" ]; then
return $OCF_NOT_RUNNING
fi
return $OCF_SUCCESS
}
nova_notify() {
return $OCF_SUCCESS
}
build_unfence_overlay() {
fence_options=""
if [ -z "${OCF_RESKEY_auth_url}" ]; then
candidates=$(/usr/sbin/stonith_admin -l ${NOVA_HOST})
for candidate in ${candidates}; do
pcs stonith show $d | grep -q fence_compute
if [ $? = 0 ]; then
ocf_log info "Unfencing nova based on: $candidate"
fence_auth=$(pcs stonith show $candidate | grep Attributes: | sed -e s/Attributes:// -e s/-/_/g -e 's/[^ ]\+=/OCF_RESKEY_\0/g' -e s/passwd/password/g)
eval "export $fence_auth"
break
fi
done
fi
# Copied from NovaEvacuate
if [ -z "${OCF_RESKEY_auth_url}" ]; then
ocf_exit_reason "auth_url not configured"
exit $OCF_ERR_CONFIGURED
fi
fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
if [ -z "${OCF_RESKEY_username}" ]; then
ocf_exit_reason "username not configured"
exit $OCF_ERR_CONFIGURED
fi
fence_options="${fence_options} -l ${OCF_RESKEY_username}"
if [ -z "${OCF_RESKEY_password}" ]; then
ocf_exit_reason "password not configured"
exit $OCF_ERR_CONFIGURED
fi
fence_options="${fence_options} -p ${OCF_RESKEY_password}"
if [ -z "${OCF_RESKEY_tenant_name}" ]; then
ocf_exit_reason "tenant_name not configured"
exit $OCF_ERR_CONFIGURED
fi
fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
if [ -n "${OCF_RESKEY_domain}" ]; then
fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
fi
if [ -n "${OCF_RESKEY_region_name}" ]; then
fence_options="${fence_options} \
--region-name ${OCF_RESKEY_region_name}"
fi
if [ -n "${OCF_RESKEY_insecure}" ]; then
if ocf_is_true "${OCF_RESKEY_insecure}"; then
fence_options="${fence_options} --insecure"
fi
fi
if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
fence_options="${fence_options} --no-shared-storage"
fi
fi
if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
case ${OCF_RESKEY_endpoint_type} in
adminURL|publicURL|internalURL)
;;
*)
ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type}" \
"not valid. Use adminURL or publicURL or internalURL"
exit $OCF_ERR_CONFIGURED
;;
esac
fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
fi
mkdir -p /run/systemd/system/openstack-nova-compute.service.d
cat<<EOF>/run/systemd/system/openstack-nova-compute.service.d/unfence-20.conf
[Service]
ExecStartPost=/sbin/fence_compute ${fence_options} -o on -n ${NOVA_HOST}
EOF
}
nova_validate() {
rc=$OCF_SUCCESS
check_binary crudini
check_binary nova-compute
check_binary fence_compute
if [ ! -f /etc/nova/nova.conf ]; then
ocf_exit_reason "/etc/nova/nova.conf not found"
exit $OCF_ERR_CONFIGURED
fi
# Is the state directory writable?
state_dir=$(dirname $statefile)
touch "$state_dir/$$"
if [ $? != 0 ]; then
ocf_exit_reason "Invalid state directory: $state_dir"
return $OCF_ERR_ARGS
fi
rm -f "$state_dir/$$"
NOVA_HOST=$(crudini --get /etc/nova/nova.conf DEFAULT host 2>/dev/null)
if [ $? = 1 ]; then
short_host=$(uname -n | awk -F. '{print $1}')
if [ "x${OCF_RESKEY_domain}" != x ]; then
NOVA_HOST=${short_host}.${OCF_RESKEY_domain}
else
NOVA_HOST=$(uname -n)
fi
fi
if [ $rc != $OCF_SUCCESS ]; then
exit $rc
fi
return $rc
}
statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
: ${OCF_RESKEY_evacuation_delay=120}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
usage|help) nova_usage
exit $OCF_SUCCESS
;;
esac
case $__OCF_ACTION in
start) nova_validate; nova_start;;
stop) nova_stop;;
monitor) nova_validate; nova_monitor;;
notify) nova_notify;;
validate-all) exit $OCF_SUCCESS;;
*) nova_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc