Fix a lot.
This commit is contained in:
parent
291554610d
commit
52f27b1c38
@ -1,171 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
#
|
||||
# Neutron_Legacy_HA OCF
|
||||
#
|
||||
# Copyright (c) 2014 Hui Xiang
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of version 2 of the GNU General Public License as
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
# This program is distributed in the hope that it would be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
# Further, this software is distributed without any warranty that it is
|
||||
# free of the rightful claim of any third person regarding infringement
|
||||
# or the like. Any license provided herein, whether implied or
|
||||
# otherwise, applies only to this software file. Patent licenses, if
|
||||
# any, provided herein do not apply to combinations of this program with
|
||||
# other software, or any other product whatsoever.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write the Free Software Foundation,
|
||||
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
|
||||
#
|
||||
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
|
||||
: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
|
||||
. ${OCF_FUNCTIONS}
|
||||
: ${__OCF_ACTION=$1}
|
||||
|
||||
#######################################################################
|
||||
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
<resource-agent name="MonitorNeutron" version="1.0">
|
||||
<version>1.0</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
Monitor Neutron L3/DHCP agents for legacy HA.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Monitor Neutron L3/DHCP agents for legacy HA</shortdesc>
|
||||
|
||||
<parameters>
|
||||
<parameter name="debug" unique="0">
|
||||
<longdesc lang="en">
|
||||
Enables to use default attrd_updater verbose logging on every call.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Verbose logging</shortdesc>
|
||||
<content type="string" default="false"/>
|
||||
</parameter>
|
||||
</parameters>
|
||||
|
||||
<actions>
|
||||
<action name="start" timeout="60" />
|
||||
<action name="stop" timeout="20" />
|
||||
<action name="reload" timeout="100" />
|
||||
<action name="monitor" depth="0" timeout="60" interval="10"/>
|
||||
<action name="meta-data" timeout="5" />
|
||||
<action name="validate-all" timeout="30" />
|
||||
</actions>
|
||||
</resource-agent>
|
||||
END
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
OCF_RESKEY_binary_default="/usr/local/bin/monitor.py"
|
||||
OCF_RESKEY_user_default="root"
|
||||
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
|
||||
|
||||
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
|
||||
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
|
||||
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
|
||||
|
||||
#######################################################################
|
||||
|
||||
MonitorNeutron_conditional_log() {
|
||||
level=$1; shift
|
||||
if [ ${OCF_RESKEY_debug} = "true" ]; then
|
||||
ocf_log $level "$*"
|
||||
fi
|
||||
}
|
||||
|
||||
MonitorNeutron_usage() {
|
||||
cat <<END
|
||||
#usage: $0 {start|stop|migrate_to|migrate_from|validate-all|meta-data}
|
||||
usage: $0 {start|stop|monitor|validate-all|meta-data}
|
||||
|
||||
Expects to have a fully populated OCF RA-compliant environment set.
|
||||
END
|
||||
}
|
||||
|
||||
MonitorNeutron_start() {
|
||||
ocf_log info "MonitorNeutron_start"
|
||||
su ${OCF_RESKEY_user} -s /bin/sh -c "python ${OCF_RESKEY_binary} \
|
||||
$OCF_RESKEY_additional_parameters"' \
|
||||
>> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
|
||||
|
||||
if [ $? = $OCF_SUCCESS ]; then
|
||||
return $OCF_SUCCESS
|
||||
fi
|
||||
|
||||
ocf_log info "MonitorNeutron started"
|
||||
}
|
||||
|
||||
MonitorNeutron_stop() {
|
||||
# Try SIGTERM
|
||||
if [ ! -f $OCF_RESKEY_pid ]; then
|
||||
ocf_log info "MonitorNeutron already stopped"
|
||||
#return $OCF_NOT_RUNNING
|
||||
return $OCF_SUCCESS
|
||||
fi
|
||||
|
||||
pid=`cat $OCF_RESKEY_pid`
|
||||
if [ -z "$pid" ]; then
|
||||
ocf_log err "MonitorNeutron pid is empty"
|
||||
exit $OCF_ERR_GENERIC
|
||||
fi
|
||||
|
||||
ocf_run kill -s TERM $pid
|
||||
rc=$?
|
||||
if [ $rc -ne 0 ]; then
|
||||
ocf_log err "MonitorNeutron couldn't be stopped"
|
||||
return $OCF_SUCCESS
|
||||
#exit $OCF_ERR_GENERIC
|
||||
fi
|
||||
|
||||
ocf_log info "MonitorNeutron stopped"
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
MonitorNeutron_validate() {
|
||||
# Is the state directory writable?
|
||||
if [ ! -f $OCF_RESKEY_binary ]; then
|
||||
ocf_log err "No file $OCF_RESKEY_binary exists !"
|
||||
return $OCF_ERRARGS
|
||||
fi
|
||||
return $OCF_SUCCESS
|
||||
}
|
||||
|
||||
MonitorNeutron_monitor() {
|
||||
return $OCF_SUCCESS
|
||||
if [ -f ${OCF_RESKEY_pid} ]; then
|
||||
return $OCF_SUCCESS
|
||||
fi
|
||||
return $OCF_ERR_GENERIC
|
||||
}
|
||||
|
||||
case $__OCF_ACTION in
|
||||
meta-data) meta_data
|
||||
exit $OCF_SUCCESS
|
||||
;;
|
||||
start) MonitorNeutron_start;;
|
||||
stop) MonitorNeutron_stop;;
|
||||
reload) MonitorNeutron_start;;
|
||||
monitor) MonitorNeutron_monitor;;
|
||||
validate-all) MonitorNeutron_validate;;
|
||||
usage|help) MonitorNeutron_usage
|
||||
exit $OCF_SUCCESS
|
||||
;;
|
||||
*) MonitorNeutron_usage
|
||||
exit $OCF_ERR_UNIMPLEMENTED
|
||||
;;
|
||||
esac
|
||||
exit $?
|
3
files/monitor.conf
Normal file
3
files/monitor.conf
Normal file
@ -0,0 +1,3 @@
|
||||
[DEFAULT]
|
||||
verbose=True
|
||||
check_interval=15
|
109
files/monitor.py
109
files/monitor.py
@ -17,13 +17,15 @@ import fcntl
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import logging as LOG
|
||||
|
||||
from oslo.config import cfg
|
||||
from neutron.agent.linux import ovs_lib
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.openstack.common import log as logging
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Daemon(object):
|
||||
@ -88,9 +90,9 @@ class Daemon(object):
|
||||
|
||||
|
||||
class MonitorNeutronAgentsDaemon(Daemon):
|
||||
def __init__(self, check_interval=None):
|
||||
def __init__(self):
|
||||
super(MonitorNeutronAgentsDaemon, self).__init__()
|
||||
self.check_interval = check_interval
|
||||
logging.setup('Neuron-HA-Monitor')
|
||||
LOG.info('Monitor Neutron Agent Loop Init')
|
||||
self.env = {}
|
||||
|
||||
@ -113,13 +115,13 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
||||
raise Exception("OpenStack env data uncomplete.")
|
||||
return self.env
|
||||
|
||||
def get_hostname():
|
||||
def get_hostname(self):
|
||||
return subprocess.check_output(['uname', '-n'])
|
||||
|
||||
def get_root_helper():
|
||||
def get_root_helper(self):
|
||||
return 'sudo'
|
||||
|
||||
def unplug_device(conf, device):
|
||||
def unplug_device(self, conf, device):
|
||||
try:
|
||||
device.link.delete()
|
||||
except RuntimeError:
|
||||
@ -132,7 +134,7 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
||||
else:
|
||||
LOG.debug(_('Unable to find bridge for device: %s'), device.name)
|
||||
|
||||
def cleanup_dhcp(networks):
|
||||
def cleanup_dhcp(self, networks):
|
||||
namespaces = []
|
||||
for network, agent in networks.iteritems():
|
||||
namespaces.append('qdhcp-' + network)
|
||||
@ -141,7 +143,7 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
||||
LOG.info('Namespaces: %s is going to be deleted.' % namespaces)
|
||||
destroy_namespaces(namespaces)
|
||||
|
||||
def cleanup_router(routers):
|
||||
def cleanup_router(self, routers):
|
||||
namespaces = []
|
||||
for router, agent in routers.iteritems():
|
||||
namespaces.append('qrouter-' + router)
|
||||
@ -150,7 +152,7 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
||||
LOG.info('Namespaces: %s is going to be deleted.' % namespaces)
|
||||
destroy_namespaces(namespaces)
|
||||
|
||||
def destroy_namespaces(namespaces):
|
||||
def destroy_namespaces(self, namespaces):
|
||||
try:
|
||||
root_helper = self.get_root_helper()
|
||||
for namespace in namespaces:
|
||||
@ -163,6 +165,40 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
||||
except Exception:
|
||||
LOG.exception(_('Error unable to destroy namespace: %s'), namespace)
|
||||
|
||||
def l3_agents_reschedule(self, l3_agents, routers):
|
||||
if l3_agents[0] != self.get_hostname():
|
||||
LOG.info('Only the first agent could reschedule. l3 agents: %s '
|
||||
'dhcp agents: %s' % (l3_agents))
|
||||
return
|
||||
|
||||
index = 0
|
||||
for router_id in routers:
|
||||
agent = index % len(l3_agents)
|
||||
LOG.info('Moving router %s from %s to %s' %
|
||||
(router_id, routers[router_id], l3_agents[agent]))
|
||||
quantum.remove_router_from_l3_agent(l3_agent=routers[router_id],
|
||||
router_id=router_id)
|
||||
quantum.add_router_to_l3_agent(l3_agent=l3_agents[agent],
|
||||
body={'router_id': router_id})
|
||||
index += 1
|
||||
|
||||
def dhcp_agents_reschedule(self, dhcp_agents, networks):
|
||||
if dhcp_agents[0] != self.get_hostname():
|
||||
LOG.info('Only the first agent could reschedule. '
|
||||
'dhcp agents: %s' % dhcp_agents)
|
||||
return
|
||||
|
||||
index = 0
|
||||
for network_id in networks:
|
||||
agent = index % len(dhcp_agents)
|
||||
LOG.info('Moving network %s from %s to %s' %
|
||||
(network_id, networks[network_id], dhcp_agents[agent]))
|
||||
quantum.remove_network_from_dhcp_agent(
|
||||
dhcp_agent=networks[network_id], network_id=network_id)
|
||||
quantum.add_network_to_dhcp_agent(dhcp_agent=dhcp_agents[agent],
|
||||
body={'network_id': network_id})
|
||||
index += 1
|
||||
|
||||
def reassign_agent_resources(self):
|
||||
''' Use agent scheduler API to detect down agents and re-schedule '''
|
||||
DHCP_AGENT = "DHCP Agent"
|
||||
@ -219,44 +255,27 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
||||
l3_agents.append(agent['id'])
|
||||
LOG.info('Active l3 agents: %s' % l3_agents)
|
||||
|
||||
if len(dhcp_agents) == 0 or len(l3_agents) == 0:
|
||||
LOG.info('Unable to relocate resources, there are %s dhcp_agents '
|
||||
'and %s l3_agents in this cluster' % (len(dhcp_agents),
|
||||
if not networks and not routers:
|
||||
LOG.info('No failed agents found, return.')
|
||||
return
|
||||
|
||||
if len(dhcp_agents) == 0 and len(l3_agents) == 0:
|
||||
LOG.error('Unable to relocate resources, there are %s dhcp_agents '
|
||||
'and %s l3_agents in this cluster' % (len(dhcp_agents),
|
||||
len(l3_agents)))
|
||||
return
|
||||
|
||||
if l3_agents[0] != self.get_hostname() or \
|
||||
dhcp_agents[0] != self.get_hostname():
|
||||
LOG.info('Only the first agent could reschedule. l3 agents: %s '
|
||||
'dhcp agents: %s' % (l3_agents, dhcp_agents))
|
||||
return
|
||||
if len(l3_agents) != 0:
|
||||
self.l3_agents_reschedule(l3_agents, routers)
|
||||
|
||||
index = 0
|
||||
for router_id in routers:
|
||||
agent = index % len(l3_agents)
|
||||
LOG.info('Moving router %s from %s to %s' %
|
||||
(router_id, routers[router_id], l3_agents[agent]))
|
||||
quantum.remove_router_from_l3_agent(l3_agent=routers[router_id],
|
||||
router_id=router_id)
|
||||
quantum.add_router_to_l3_agent(l3_agent=l3_agents[agent],
|
||||
body={'router_id': router_id})
|
||||
index += 1
|
||||
|
||||
index = 0
|
||||
for network_id in networks:
|
||||
agent = index % len(dhcp_agents)
|
||||
LOG.info('Moving network %s from %s to %s' %
|
||||
(network_id, networks[network_id], dhcp_agents[agent]))
|
||||
quantum.remove_network_from_dhcp_agent(
|
||||
dhcp_agent=networks[network_id], network_id=network_id)
|
||||
quantum.add_network_to_dhcp_agent(dhcp_agent=dhcp_agents[agent],
|
||||
body={'network_id': network_id})
|
||||
index += 1
|
||||
if len(dhcp_agents) != 0:
|
||||
self.dhcp_agents_reschedule(dhcp_agents, networks)
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
LOG.info('Monitor Neutron Agent Loop Start')
|
||||
time.sleep(15)
|
||||
LOG.info('sleep %s' % cfg.CONF.check_interval)
|
||||
time.sleep(float(cfg.CONF.check_interval))
|
||||
self.reassign_agent_resources()
|
||||
|
||||
|
||||
@ -265,16 +284,10 @@ if __name__ == '__main__':
|
||||
cfg.StrOpt('check_interval',
|
||||
default=15,
|
||||
help='Check Neutron Agents interval.'),
|
||||
# cfg.StrOpt('log_file',
|
||||
# default='/var/log/monitor.log',
|
||||
# help='log file'),
|
||||
]
|
||||
|
||||
cfg.CONF.register_cli_opts(opts)
|
||||
cfg.CONF(project='monitor_neutron_agents', default_config_files=[])
|
||||
log_file = '/tmp/monitor.log'
|
||||
print "log file: %s" % cfg.CONF.log_file
|
||||
LOG.basicConfig(filename=log_file, level=LOG.INFO)
|
||||
monitor_daemon = MonitorNeutronAgentsDaemon(
|
||||
check_interval=cfg.CONF.check_interval)
|
||||
logging.setup('Neuron-HA-Monitor')
|
||||
monitor_daemon = MonitorNeutronAgentsDaemon()
|
||||
monitor_daemon.start()
|
||||
|
@ -2,12 +2,8 @@
|
||||
|
||||
logger " ** "
|
||||
logger "Start running ns_ovs_cleanup.sh..."
|
||||
logger " ** "
|
||||
|
||||
logger "CRM_notify_task: $CRM_notify_task"
|
||||
logger "CRM_notify_desc: $CRM_notify_desc"
|
||||
logger "CRM_notify_rsc: $CRM_notify_rsc"
|
||||
logger "CRM_notify_node: $CRM_notify_node"
|
||||
logger "CRM_notify_task: $CRM_notify_task, CRM_notify_desc: $CRM_notify_desc"
|
||||
logger "CRM_notify_rsc: $CRM_notify_rsc, CRM_notify_node: $CRM_notify_node"
|
||||
logger " ** "
|
||||
|
||||
set -x
|
||||
@ -50,8 +46,8 @@ if [[ $CRM_notify_rsc == 'res_PingCheck' && ${CRM_notify_task} == 'start' ]]; th
|
||||
check_pid
|
||||
if [ $? -ne 0 ]; then
|
||||
logger "Executing monitor to reschedule Neutron agents..."
|
||||
#sudo python /usr/local/bin/monitor.py >> /dev/null 2>&1 & echo $! > $DEFAULT_PIDFILE
|
||||
sudo python /usr/local/bin/monitor.py >> /dev/null 2>&1 & echo $!
|
||||
sudo python /usr/local/bin/monitor.py --config-file /tmp/monitor.conf \
|
||||
--log-file /tmp/monitor.log >> /dev/null 2>&1 & echo $!
|
||||
sleep 3
|
||||
pid=`ps -aux | grep m\[o\]nitor.py | awk -F' ' '{print $2}'`
|
||||
if [ ! -z "$pid" ]; then
|
@ -1,8 +0,0 @@
|
||||
# vim: set ft=upstart et ts=2:
|
||||
description "Reassign Agent Resources for Legacy HA"
|
||||
author "Hui Xiang <hui.xiang@canonical.com>"
|
||||
|
||||
start on runlevel [2345]
|
||||
stop on runlevel [!2345]
|
||||
|
||||
exec start-stop-daemon --start --chuid neutron --exec /usr/local/bin/reassign_agent_services
|
@ -629,12 +629,6 @@ def copy_file(source_dir, des_dir, f, f_mod=None, update=False):
|
||||
raise
|
||||
|
||||
|
||||
def init_upstart_f_4_reassign_agent_resources():
|
||||
upstart_f = 'reassign_agent_resources.conf'
|
||||
exec_dir = '/etc/init'
|
||||
copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir, upstart_f)
|
||||
|
||||
|
||||
def init_ocf_MonitorNeutron_f(update=False):
|
||||
ocf_f = 'MonitorNeutron'
|
||||
exec_dir = '/usr/lib/ocf/resource.d/pacemaker'
|
||||
@ -642,19 +636,19 @@ def init_ocf_MonitorNeutron_f(update=False):
|
||||
ocf_f, stat.S_IEXEC, update=update)
|
||||
|
||||
|
||||
def get_external_agent_f():
|
||||
agent = 'monitor_neutron_ha.sh'
|
||||
exec_dir = '/usr/lib/ocf/resource.d/canonical'
|
||||
return os.path.join(exec_dir, agent)
|
||||
|
||||
|
||||
def init_external_agent_f(update=False):
|
||||
agent = 'ns_ovs_cleanup.sh'
|
||||
exec_dir = '/usr/lib/ocf/resource.d/openstack'
|
||||
agent = 'monitor_neutron_ha.sh'
|
||||
exec_dir = '/usr/lib/ocf/resource.d/canonical'
|
||||
copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir,
|
||||
agent, stat.S_IEXEC, update=update)
|
||||
|
||||
|
||||
def init_reassign_agent_services_binary():
|
||||
service = 'reassign_agent_services'
|
||||
exec_dir = '/usr/local/bin/'
|
||||
copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir, service, stat.S_IEXEC)
|
||||
|
||||
|
||||
def init_monitor_daemon(update=False):
|
||||
service = 'monitor.py'
|
||||
exec_dir = '/usr/local/bin/'
|
||||
@ -662,20 +656,20 @@ def init_monitor_daemon(update=False):
|
||||
service, stat.S_IEXEC, update=update)
|
||||
|
||||
|
||||
def init_monitor_conf_files(update=False):
|
||||
conf = 'monitor.conf'
|
||||
exec_dir = '/tmp'
|
||||
copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir,
|
||||
conf, update=update)
|
||||
|
||||
|
||||
def install_legacy_ha_files(update=False):
|
||||
if config('ha-legacy-mode'):
|
||||
init_ocf_MonitorNeutron_f(update=update)
|
||||
init_external_agent_f(update=update)
|
||||
# init_reassign_agent_services_binary()
|
||||
init_monitor_daemon(update=update)
|
||||
|
||||
|
||||
def get_external_agent_f():
|
||||
agent = 'ns_ovs_cleanup.sh'
|
||||
exec_dir = '/usr/lib/ocf/resource.d/openstack'
|
||||
return os.path.join(exec_dir, agent)
|
||||
|
||||
|
||||
def cache_env_data():
|
||||
env = NetworkServiceContext()()
|
||||
if not env:
|
||||
|
Loading…
Reference in New Issue
Block a user