08b85da715
Neutron already takes care of the HA for dhcp agents. See neutron setting `dhcp_agents_per_network` in `neutron.conf`. Having the OCF script call dhcp replication function of neutron-ha-tool would mean that the tool will try to plug each network to each DHCP agent, which contradicts neutron's settings. Change-Id: I87d9f7010092178c1677e14456b5e2606e5830dc
330 lines
10 KiB
Bash
330 lines
10 KiB
Bash
#!/bin/sh
|
|
#
|
|
#
|
|
# OpenStack HA tool for Neutron (neutron-ha-tool)
|
|
#
|
|
# This resource agent wraps the neutron-ha-tool Python script.
|
|
# It can be used to monitor neutron for the availability of the
|
|
# l3-agents and migrate routers away from agents that are
|
|
# currently offline.
|
|
# The neutron-ha-tool was originally part of the openstack-network
|
|
# cookbook for Chef. However as of icehouse it got dropped
|
|
# from upstream, and is now maintained here:
|
|
#
|
|
# https://github.com/SUSE-Cloud/cookbook-openstack-network/blob/neutron-ha-tool-maintenance/files/default/neutron-ha-tool.py
|
|
#
|
|
# You can see a brief explanation of how this RA works in this
|
|
# video:
|
|
#
|
|
# https://youtu.be/vBZgtHgSdOY?t=33m39s
|
|
#
|
|
# Authors: Ralf Haferkamp
|
|
# Mainly inspired by the Neutron L3 resource agent written by Emilien Macchi
|
|
#
|
|
# Support: openstack@lists.openstack.org
|
|
# License: Apache Software License (ASL) 2.0
|
|
#
|
|
#
|
|
# See usage() function below for more details ...
|
|
#
|
|
# OCF instance parameters:
|
|
# OCF_RESKEY_binary
|
|
# OCF_RESKEY_os_auth_url
|
|
# OCF_RESKEY_os_region_name
|
|
# OCF_RESKEY_os_username
|
|
# OCF_RESKEY_os_password
|
|
# OCF_RESKEY_os_tenant_name
|
|
#######################################################################
|
|
# Initialization:
|
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
|
|
|
#######################################################################
|
|
|
|
# Fill in some defaults if no values are specified
|
|
|
|
OCF_RESKEY_binary_default="neutron-ha-tool"
|
|
OCF_RESKEY_os_auth_url_default="http://localhost:5000/v2"
|
|
OCF_RESKEY_os_region_name_default=""
|
|
OCF_RESKEY_os_username_default="admin"
|
|
OCF_RESKEY_os_password_default=""
|
|
OCF_RESKEY_os_tenant_name_default="admin"
|
|
OCF_RESKEY_os_insecure_default="0"
|
|
OCF_RESKEY_os_cacert_default=""
|
|
|
|
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
|
: ${OCF_RESKEY_os_auth_url=${OCF_RESKEY_os_auth_url_default}}
|
|
: ${OCF_RESKEY_os_region_name=${OCF_RESKEY_os_region_name_default}}
|
|
: ${OCF_RESKEY_os_tenant_name=${OCF_RESKEY_os_tenant_name_default}}
|
|
: ${OCF_RESKEY_os_username=${OCF_RESKEY_os_username_default}}
|
|
: ${OCF_RESKEY_os_password=${OCF_RESKEY_os_password_default}}
|
|
: ${OCF_RESKEY_os_insecure=${OCF_RESKEY_os_insecure_default}}
|
|
: ${OCF_RESKEY_os_cacert=${OCF_RESKEY_os_cacert_default}}
|
|
|
|
#######################################################################
|
|
|
|
usage() {
|
|
cat <<UEND
|
|
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
|
|
|
|
$0 manages the Neutron HA tool (neutron-ha-tool) as an HA resource
|
|
|
|
The 'start' operation triggers a migrations of all routers on offline
|
|
l3-agents to l3-agents that are actually online.
|
|
The 'stop' is basically noop.
|
|
The 'validate-all' operation reports whether the parameters are valid.
|
|
The 'meta-data' operation reports this RA's meta-data information.
|
|
The 'status' operation reports whether the networking service is running.
|
|
The 'monitor' operation reports whether there are some routers assigned
|
|
to l3-agents that are currently offline.
|
|
|
|
UEND
|
|
}
|
|
|
|
meta_data() {
|
|
cat <<END
|
|
<?xml version="1.0"?>
|
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
|
<resource-agent name="neutron-ha-tool">
|
|
<version>1.0</version>
|
|
|
|
<longdesc lang="en">
|
|
This resource agent wraps the Neutron HA Tool (neutron-ha-tool)
|
|
and can be used to check neutron for offline l3-agents that
|
|
have routers assigend and migrate those routers to a different
|
|
(online) l3-agent.
|
|
</longdesc>
|
|
<shortdesc lang="en">Manages the OpenStack Neutron HA Tool (neutron-ha-tool)</shortdesc>
|
|
<parameters>
|
|
|
|
<parameter name="binary" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Location of the OpenStack Neutron HA Tool binary (neutron-ha-tool)
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack Neutron HA Tool binary (neutron-ha-tool)</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_binary_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="os_auth_url" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The URL pointing to the Keystone instance to use for authentication.
|
|
</longdesc>
|
|
<shortdesc lang="en">Keystone URL</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_os_auth_url_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="os_region_name" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The region name to use for authentication against keystone.
|
|
</longdesc>
|
|
<shortdesc lang="en">Keystone region name</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_os_region_name_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="os_password" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The password to use for authentication against keystone.
|
|
</longdesc>
|
|
<shortdesc lang="en">Password for authentication</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_os_password_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="os_tenant_name" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
The Tenant to use for authentication against keystone.
|
|
</longdesc>
|
|
<shortdesc lang="en">Tenant name for authentication</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_os_tenant_name_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="os_username" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
OpenStack Username for authentication.
|
|
</longdesc>
|
|
<shortdesc lang="en">OpenStack Username</shortdesc>
|
|
<content type="string" default="${OCF_RESKEY_os_username_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="os_insecure" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Disable SSL certificate verification.
|
|
</longdesc>
|
|
<shortdesc lang="en">Disable SSL certificate verification</shortdesc>
|
|
<content type="boolean" default="${OCF_RESKEY_os_insecure_default}" />
|
|
</parameter>
|
|
|
|
<parameter name="os_cacert" unique="0" required="0">
|
|
<longdesc lang="en">
|
|
Filename of a SSL CA Certificate Bundle to use for Server Certificate
|
|
verification.
|
|
</longdesc>
|
|
<shortdesc lang="en">SSL CA Bundle file</shortdesc>
|
|
<content type="boolean" default="${OCF_RESKEY_os_cacert_default}" />
|
|
</parameter>
|
|
</parameters>
|
|
|
|
<actions>
|
|
<action name="start" timeout="20" />
|
|
<action name="stop" timeout="20" />
|
|
<action name="status" timeout="20" />
|
|
<action name="monitor" timeout="30" interval="20" />
|
|
<action name="validate-all" timeout="5" />
|
|
<action name="meta-data" timeout="5" />
|
|
</actions>
|
|
</resource-agent>
|
|
END
|
|
}
|
|
|
|
#######################################################################
|
|
# Functions invoked by resource manager actions
|
|
|
|
neutron_ha_tool_validate() {
|
|
check_binary $OCF_RESKEY_binary
|
|
if [ -n "$OCF_RESKEY_os_cacert" ]; then
|
|
if [ ! -f "$OCF_RESKEY_os_cacert" ]; then
|
|
ocf_log err "Failed to verify CA Certifcate Bundle" \
|
|
"($OCF_RESKEY_os_cacert)"
|
|
return 1
|
|
fi
|
|
fi
|
|
true
|
|
}
|
|
|
|
neutron_ha_tool_status() {
|
|
# There is not much to do here, since there is no daemon to check for.
|
|
# Just pretend we're running successfully
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
neutron_ha_tool_monitor() {
|
|
if ! [ -e "$statefile" ]; then
|
|
# neutron-ha-tool is run on a single node at a time, i.e. in
|
|
# active/passive mode. So we use this state file to keep
|
|
# track of whether it's active on the current node, and if
|
|
# Pacemaker does a probe on a node where it's not active, we
|
|
# skip the l3-agent check and always return OCF_NOT_RUNNING,
|
|
# otherwise we'd get messages from pengine like:
|
|
#
|
|
# error: Resource neutron-ha-tool (ocf::neutron-ha-tool) is active on
|
|
# 2 nodes attempting recovery
|
|
# warning: See http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active
|
|
# for more information.
|
|
#
|
|
# and Pacemaker could attempt unnecessary recovery according to the
|
|
# value of the cluster-wide "multiple-active" option.
|
|
ocf_log debug "neutron-ha-tool not currently active on this node; " \
|
|
"skipping l3-agent check"
|
|
return $OCF_NOT_RUNNING
|
|
fi
|
|
|
|
INSECURE=""
|
|
if ocf_is_true $OCF_RESKEY_os_insecure; then
|
|
INSECURE="--insecure"
|
|
fi
|
|
|
|
${OCF_RESKEY_binary} --l3-agent-check --quiet $INSECURE
|
|
|
|
rc=$?
|
|
if [ $rc -eq 2 ]; then
|
|
ocf_log err "Some Neutron routers need migration."
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
ocf_log debug "Neutron HA Tool (neutron-ha-tool) monitor succeeded"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
neutron_ha_tool_start() {
|
|
touch "$statefile"
|
|
if ! [ -e "$statefile" ]; then
|
|
ocf_log err "Failed to create $statefile - aborting!"
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
INSECURE=""
|
|
if ocf_is_true $OCF_RESKEY_os_insecure; then
|
|
INSECURE="--insecure"
|
|
fi
|
|
|
|
# Remain backwards-compatible with older neutron-ha-tool.py which
|
|
# don't support --retry.
|
|
retry=""
|
|
if ${OCF_RESKEY_binary} --help | grep -q -- --retry; then
|
|
retry="--retry"
|
|
fi
|
|
|
|
${OCF_RESKEY_binary} --l3-agent-migrate $retry --now $INSECURE
|
|
|
|
rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log err "Neutron HA Tool failed to migrate routers away from" \
|
|
"offline L3 agents."
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
ocf_log debug "Neutron HA Tool (neutron-ha-tool) router migration" \
|
|
"succeeded."
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
neutron_ha_tool_stop() {
|
|
rm -f "$statefile"
|
|
if [ -e "$statefile" ]; then
|
|
ocf_log err "Uh-oh - failed to remove $statefile!"
|
|
# If we can't even remove a file in tmpfs (/run), something
|
|
# is *really* badly wrong, so fence the node.
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
#######################################################################
|
|
|
|
case "$1" in
|
|
meta-data)
|
|
meta_data
|
|
exit $OCF_SUCCESS
|
|
;;
|
|
usage|help)
|
|
usage
|
|
exit $OCF_SUCCESS
|
|
;;
|
|
esac
|
|
|
|
# Anything except meta-data and help must pass validation
|
|
neutron_ha_tool_validate || exit $?
|
|
|
|
# OPENSTACK env variables
|
|
export OS_AUTH_URL=$OCF_RESKEY_os_auth_url
|
|
export OS_REGION_NAME=$OCF_RESKEY_os_region_name
|
|
export OS_TENANT_NAME=$OCF_RESKEY_os_tenant_name
|
|
export OS_USERNAME=$OCF_RESKEY_os_username
|
|
export OS_PASSWORD=$OCF_RESKEY_os_password
|
|
if [ -n "$OCF_RESKEY_os_cacert" ]; then
|
|
export OS_CACERT=$OCF_RESKEY_os_cacert
|
|
fi
|
|
|
|
statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
|
|
|
|
# What kind of method was invoked?
|
|
case "$1" in
|
|
start)
|
|
neutron_ha_tool_start
|
|
;;
|
|
stop)
|
|
neutron_ha_tool_stop
|
|
;;
|
|
status)
|
|
neutron_ha_tool_status
|
|
;;
|
|
monitor)
|
|
neutron_ha_tool_monitor
|
|
;;
|
|
validate-all)
|
|
;;
|
|
*)
|
|
usage
|
|
exit $OCF_ERR_UNIMPLEMENTED
|
|
;;
|
|
esac
|