#!/bin/sh # # # OpenStack HA tool for Neutron (neutron-ha-tool) # # This resource agent wraps the neutron-ha-tool Python script. # It can be used to monitor neutron for the availability of the # l3-agents and migrate routers away from agents that are # currently offline. Additionally it makes sure that dns and dhcp # configuration is synchronized across all dhcp-agents. The # neutron-ha-tool was originally part of the openstack-network # cookbook for Chef. However as of icehouse it got dropped # from upstream, and is now maintained here: # # https://github.com/SUSE-Cloud/cookbook-openstack-network/blob/neutron-ha-tool-maintenance/files/default/neutron-ha-tool.py # # You can see a brief explanation of how this RA works in this # video: # # https://youtu.be/vBZgtHgSdOY?t=33m39s # # Authors: Ralf Haferkamp # Mainly inspired by the Neutron L3 resource agent written by Emilien Macchi # # Support: openstack@lists.openstack.org # License: Apache Software License (ASL) 2.0 # # # See usage() function below for more details ... # # OCF instance parameters: # OCF_RESKEY_binary # OCF_RESKEY_os_auth_url # OCF_RESKEY_os_region_name # OCF_RESKEY_os_username # OCF_RESKEY_os_password # OCF_RESKEY_os_tenant_name ####################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs ####################################################################### # Fill in some defaults if no values are specified OCF_RESKEY_binary_default="neutron-ha-tool" OCF_RESKEY_os_auth_url_default="http://localhost:5000/v2" OCF_RESKEY_os_region_name_default="" OCF_RESKEY_os_username_default="admin" OCF_RESKEY_os_password_default="" OCF_RESKEY_os_tenant_name_default="admin" OCF_RESKEY_os_insecure_default="0" OCF_RESKEY_os_cacert_default="" : ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} : ${OCF_RESKEY_os_auth_url=${OCF_RESKEY_os_auth_url_default}} : ${OCF_RESKEY_os_region_name=${OCF_RESKEY_os_region_name_default}} : ${OCF_RESKEY_os_tenant_name=${OCF_RESKEY_os_tenant_name_default}} : ${OCF_RESKEY_os_username=${OCF_RESKEY_os_username_default}} : ${OCF_RESKEY_os_password=${OCF_RESKEY_os_password_default}} : ${OCF_RESKEY_os_insecure=${OCF_RESKEY_os_insecure_default}} : ${OCF_RESKEY_os_cacert=${OCF_RESKEY_os_cacert_default}} ####################################################################### usage() { cat < 1.0 This resource agent wraps the Neutron HA Tool (neutron-ha-tool) and can be used to check neutron for offline l3-agents that have routers assigend and migrate those routers to a different (online) l3-agent. Manages the OpenStack Neutron HA Tool (neutron-ha-tool) Location of the OpenStack Neutron HA Tool binary (neutron-ha-tool) OpenStack Neutron HA Tool binary (neutron-ha-tool) The URL pointing to the Keystone instance to use for authentication. Keystone URL The region name to use for authentication against keystone. Keystone region name The password to use for authentication against keystone. Password for authentication The Tenant to use for authentication against keystone. Tenant name for authentication OpenStack Username for authentication. OpenStack Username Disable SSL certificate verification. Disable SSL certificate verification Filename of a SSL CA Certificate Bundle to use for Server Certificate verification. SSL CA Bundle file END } ####################################################################### # Functions invoked by resource manager actions neutron_ha_tool_validate() { check_binary $OCF_RESKEY_binary if [ -n "$OCF_RESKEY_os_cacert" ]; then if [ ! -f "$OCF_RESKEY_os_cacert" ]; then ocf_log err "Failed to verify CA Certifcate Bundle" \ "($OCF_RESKEY_os_cacert)" return 1 fi fi true } neutron_ha_tool_status() { # There is not much to do here, since there is no daemon to check for. # Just pretend we're running successfully return $OCF_SUCCESS } neutron_ha_tool_monitor() { if ! [ -e "$statefile" ]; then # neutron-ha-tool is run on a single node at a time, i.e. in # active/passive mode. So we use this state file to keep # track of whether it's active on the current node, and if # Pacemaker does a probe on a node where it's not active, we # skip the l3-agent check and always return OCF_NOT_RUNNING, # otherwise we'd get messages from pengine like: # # error: Resource neutron-ha-tool (ocf::neutron-ha-tool) is active on # 2 nodes attempting recovery # warning: See http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active # for more information. # # and Pacemaker could attempt unnecessary recovery according to the # value of the cluster-wide "multiple-active" option. ocf_log debug "neutron-ha-tool not currently active on this node; " \ "skipping l3-agent check" return $OCF_NOT_RUNNING fi INSECURE="" if ocf_is_true $OCF_RESKEY_os_insecure; then INSECURE="--insecure" fi ${OCF_RESKEY_binary} --l3-agent-check --quiet $INSECURE rc=$? if [ $rc -eq 2 ]; then ocf_log err "Some Neutron routers need migration." return $OCF_ERR_GENERIC fi ocf_log debug "Neutron HA Tool (neutron-ha-tool) monitor succeeded" return $OCF_SUCCESS } neutron_ha_tool_start() { touch "$statefile" if ! [ -e "$statefile" ]; then ocf_log err "Failed to create $statefile - aborting!" return $OCF_ERR_GENERIC fi INSECURE="" if ocf_is_true $OCF_RESKEY_os_insecure; then INSECURE="--insecure" fi # Remain backwards-compatible with older neutron-ha-tool.py which # don't support --retry. retry="" if ${OCF_RESKEY_binary} --help | grep -q -- --retry; then retry="--retry" fi ${OCF_RESKEY_binary} --replicate-dhcp $retry $INSECURE rc=$? if [ $rc -ne 0 ]; then ocf_log err "Neutron HA Tool failed to replicate networks to DHCP" \ "agents." return $OCF_ERR_GENERIC fi ${OCF_RESKEY_binary} --l3-agent-migrate $retry --now $INSECURE rc=$? if [ $rc -ne 0 ]; then ocf_log err "Neutron HA Tool failed to migrate routers away from" \ "offline L3 agents." return $OCF_ERR_GENERIC fi ocf_log debug "Neutron HA Tool (neutron-ha-tool) router migration" \ "succeeded." return $OCF_SUCCESS } neutron_ha_tool_stop() { rm -f "$statefile" if [ -e "$statefile" ]; then ocf_log err "Uh-oh - failed to remove $statefile!" # If we can't even remove a file in tmpfs (/run), something # is *really* badly wrong, so fence the node. return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } ####################################################################### case "$1" in meta-data) meta_data exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac # Anything except meta-data and help must pass validation neutron_ha_tool_validate || exit $? # OPENSTACK env variables export OS_AUTH_URL=$OCF_RESKEY_os_auth_url export OS_REGION_NAME=$OCF_RESKEY_os_region_name export OS_TENANT_NAME=$OCF_RESKEY_os_tenant_name export OS_USERNAME=$OCF_RESKEY_os_username export OS_PASSWORD=$OCF_RESKEY_os_password if [ -n "$OCF_RESKEY_os_cacert" ]; then export OS_CACERT=$OCF_RESKEY_os_cacert fi statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active" # What kind of method was invoked? case "$1" in start) neutron_ha_tool_start ;; stop) neutron_ha_tool_stop ;; status) neutron_ha_tool_status ;; monitor) neutron_ha_tool_monitor ;; validate-all) ;; *) usage exit $OCF_ERR_UNIMPLEMENTED ;; esac