Fix a lot.

This commit is contained in:
Hui Xiang 2014-12-16 15:50:05 +08:00
parent 291554610d
commit 52f27b1c38
6 changed files with 83 additions and 256 deletions

View File

@ -1,171 +0,0 @@
#!/bin/sh
#
#
# Neutron_Legacy_HA OCF
#
# Copyright (c) 2014 Hui Xiang
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
. ${OCF_FUNCTIONS}
: ${__OCF_ACTION=$1}
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="MonitorNeutron" version="1.0">
<version>1.0</version>
<longdesc lang="en">
Monitor Neutron L3/DHCP agents for legacy HA.
</longdesc>
<shortdesc lang="en">Monitor Neutron L3/DHCP agents for legacy HA</shortdesc>
<parameters>
<parameter name="debug" unique="0">
<longdesc lang="en">
Enables to use default attrd_updater verbose logging on every call.
</longdesc>
<shortdesc lang="en">Verbose logging</shortdesc>
<content type="string" default="false"/>
</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="20" />
<action name="reload" timeout="100" />
<action name="monitor" depth="0" timeout="60" interval="10"/>
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="30" />
</actions>
</resource-agent>
END
}
#######################################################################
OCF_RESKEY_binary_default="/usr/local/bin/monitor.py"
OCF_RESKEY_user_default="root"
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
#######################################################################
MonitorNeutron_conditional_log() {
level=$1; shift
if [ ${OCF_RESKEY_debug} = "true" ]; then
ocf_log $level "$*"
fi
}
MonitorNeutron_usage() {
cat <<END
#usage: $0 {start|stop|migrate_to|migrate_from|validate-all|meta-data}
usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
MonitorNeutron_start() {
ocf_log info "MonitorNeutron_start"
su ${OCF_RESKEY_user} -s /bin/sh -c "python ${OCF_RESKEY_binary} \
$OCF_RESKEY_additional_parameters"' \
>> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
if [ $? = $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
ocf_log info "MonitorNeutron started"
}
MonitorNeutron_stop() {
# Try SIGTERM
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log info "MonitorNeutron already stopped"
#return $OCF_NOT_RUNNING
return $OCF_SUCCESS
fi
pid=`cat $OCF_RESKEY_pid`
if [ -z "$pid" ]; then
ocf_log err "MonitorNeutron pid is empty"
exit $OCF_ERR_GENERIC
fi
ocf_run kill -s TERM $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "MonitorNeutron couldn't be stopped"
return $OCF_SUCCESS
#exit $OCF_ERR_GENERIC
fi
ocf_log info "MonitorNeutron stopped"
return $OCF_SUCCESS
}
MonitorNeutron_validate() {
# Is the state directory writable?
if [ ! -f $OCF_RESKEY_binary ]; then
ocf_log err "No file $OCF_RESKEY_binary exists !"
return $OCF_ERRARGS
fi
return $OCF_SUCCESS
}
MonitorNeutron_monitor() {
return $OCF_SUCCESS
if [ -f ${OCF_RESKEY_pid} ]; then
return $OCF_SUCCESS
fi
return $OCF_ERR_GENERIC
}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) MonitorNeutron_start;;
stop) MonitorNeutron_stop;;
reload) MonitorNeutron_start;;
monitor) MonitorNeutron_monitor;;
validate-all) MonitorNeutron_validate;;
usage|help) MonitorNeutron_usage
exit $OCF_SUCCESS
;;
*) MonitorNeutron_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
exit $?

3
files/monitor.conf Normal file
View File

@ -0,0 +1,3 @@
[DEFAULT]
verbose=True
check_interval=15

View File

@ -17,13 +17,15 @@ import fcntl
import os
import signal
import sys
import subprocess
import time
import logging as LOG
from oslo.config import cfg
from neutron.agent.linux import ovs_lib
from neutron.agent.linux import ip_lib
from neutron.openstack.common import log as logging
LOG = logging.getLogger(__name__)
class Daemon(object):
@ -88,9 +90,9 @@ class Daemon(object):
class MonitorNeutronAgentsDaemon(Daemon):
def __init__(self, check_interval=None):
def __init__(self):
super(MonitorNeutronAgentsDaemon, self).__init__()
self.check_interval = check_interval
logging.setup('Neuron-HA-Monitor')
LOG.info('Monitor Neutron Agent Loop Init')
self.env = {}
@ -113,13 +115,13 @@ class MonitorNeutronAgentsDaemon(Daemon):
raise Exception("OpenStack env data uncomplete.")
return self.env
def get_hostname():
def get_hostname(self):
return subprocess.check_output(['uname', '-n'])
def get_root_helper():
def get_root_helper(self):
return 'sudo'
def unplug_device(conf, device):
def unplug_device(self, conf, device):
try:
device.link.delete()
except RuntimeError:
@ -132,7 +134,7 @@ class MonitorNeutronAgentsDaemon(Daemon):
else:
LOG.debug(_('Unable to find bridge for device: %s'), device.name)
def cleanup_dhcp(networks):
def cleanup_dhcp(self, networks):
namespaces = []
for network, agent in networks.iteritems():
namespaces.append('qdhcp-' + network)
@ -141,7 +143,7 @@ class MonitorNeutronAgentsDaemon(Daemon):
LOG.info('Namespaces: %s is going to be deleted.' % namespaces)
destroy_namespaces(namespaces)
def cleanup_router(routers):
def cleanup_router(self, routers):
namespaces = []
for router, agent in routers.iteritems():
namespaces.append('qrouter-' + router)
@ -150,7 +152,7 @@ class MonitorNeutronAgentsDaemon(Daemon):
LOG.info('Namespaces: %s is going to be deleted.' % namespaces)
destroy_namespaces(namespaces)
def destroy_namespaces(namespaces):
def destroy_namespaces(self, namespaces):
try:
root_helper = self.get_root_helper()
for namespace in namespaces:
@ -163,6 +165,40 @@ class MonitorNeutronAgentsDaemon(Daemon):
except Exception:
LOG.exception(_('Error unable to destroy namespace: %s'), namespace)
def l3_agents_reschedule(self, l3_agents, routers):
if l3_agents[0] != self.get_hostname():
LOG.info('Only the first agent could reschedule. l3 agents: %s '
'dhcp agents: %s' % (l3_agents))
return
index = 0
for router_id in routers:
agent = index % len(l3_agents)
LOG.info('Moving router %s from %s to %s' %
(router_id, routers[router_id], l3_agents[agent]))
quantum.remove_router_from_l3_agent(l3_agent=routers[router_id],
router_id=router_id)
quantum.add_router_to_l3_agent(l3_agent=l3_agents[agent],
body={'router_id': router_id})
index += 1
def dhcp_agents_reschedule(self, dhcp_agents, networks):
if dhcp_agents[0] != self.get_hostname():
LOG.info('Only the first agent could reschedule. '
'dhcp agents: %s' % dhcp_agents)
return
index = 0
for network_id in networks:
agent = index % len(dhcp_agents)
LOG.info('Moving network %s from %s to %s' %
(network_id, networks[network_id], dhcp_agents[agent]))
quantum.remove_network_from_dhcp_agent(
dhcp_agent=networks[network_id], network_id=network_id)
quantum.add_network_to_dhcp_agent(dhcp_agent=dhcp_agents[agent],
body={'network_id': network_id})
index += 1
def reassign_agent_resources(self):
''' Use agent scheduler API to detect down agents and re-schedule '''
DHCP_AGENT = "DHCP Agent"
@ -219,44 +255,27 @@ class MonitorNeutronAgentsDaemon(Daemon):
l3_agents.append(agent['id'])
LOG.info('Active l3 agents: %s' % l3_agents)
if len(dhcp_agents) == 0 or len(l3_agents) == 0:
LOG.info('Unable to relocate resources, there are %s dhcp_agents '
'and %s l3_agents in this cluster' % (len(dhcp_agents),
if not networks and not routers:
LOG.info('No failed agents found, return.')
return
if len(dhcp_agents) == 0 and len(l3_agents) == 0:
LOG.error('Unable to relocate resources, there are %s dhcp_agents '
'and %s l3_agents in this cluster' % (len(dhcp_agents),
len(l3_agents)))
return
if l3_agents[0] != self.get_hostname() or \
dhcp_agents[0] != self.get_hostname():
LOG.info('Only the first agent could reschedule. l3 agents: %s '
'dhcp agents: %s' % (l3_agents, dhcp_agents))
return
if len(l3_agents) != 0:
self.l3_agents_reschedule(l3_agents, routers)
index = 0
for router_id in routers:
agent = index % len(l3_agents)
LOG.info('Moving router %s from %s to %s' %
(router_id, routers[router_id], l3_agents[agent]))
quantum.remove_router_from_l3_agent(l3_agent=routers[router_id],
router_id=router_id)
quantum.add_router_to_l3_agent(l3_agent=l3_agents[agent],
body={'router_id': router_id})
index += 1
index = 0
for network_id in networks:
agent = index % len(dhcp_agents)
LOG.info('Moving network %s from %s to %s' %
(network_id, networks[network_id], dhcp_agents[agent]))
quantum.remove_network_from_dhcp_agent(
dhcp_agent=networks[network_id], network_id=network_id)
quantum.add_network_to_dhcp_agent(dhcp_agent=dhcp_agents[agent],
body={'network_id': network_id})
index += 1
if len(dhcp_agents) != 0:
self.dhcp_agents_reschedule(dhcp_agents, networks)
def run(self):
while True:
LOG.info('Monitor Neutron Agent Loop Start')
time.sleep(15)
LOG.info('sleep %s' % cfg.CONF.check_interval)
time.sleep(float(cfg.CONF.check_interval))
self.reassign_agent_resources()
@ -265,16 +284,10 @@ if __name__ == '__main__':
cfg.StrOpt('check_interval',
default=15,
help='Check Neutron Agents interval.'),
# cfg.StrOpt('log_file',
# default='/var/log/monitor.log',
# help='log file'),
]
cfg.CONF.register_cli_opts(opts)
cfg.CONF(project='monitor_neutron_agents', default_config_files=[])
log_file = '/tmp/monitor.log'
print "log file: %s" % cfg.CONF.log_file
LOG.basicConfig(filename=log_file, level=LOG.INFO)
monitor_daemon = MonitorNeutronAgentsDaemon(
check_interval=cfg.CONF.check_interval)
logging.setup('Neuron-HA-Monitor')
monitor_daemon = MonitorNeutronAgentsDaemon()
monitor_daemon.start()

View File

@ -2,12 +2,8 @@
logger " ** "
logger "Start running ns_ovs_cleanup.sh..."
logger " ** "
logger "CRM_notify_task: $CRM_notify_task"
logger "CRM_notify_desc: $CRM_notify_desc"
logger "CRM_notify_rsc: $CRM_notify_rsc"
logger "CRM_notify_node: $CRM_notify_node"
logger "CRM_notify_task: $CRM_notify_task, CRM_notify_desc: $CRM_notify_desc"
logger "CRM_notify_rsc: $CRM_notify_rsc, CRM_notify_node: $CRM_notify_node"
logger " ** "
set -x
@ -50,8 +46,8 @@ if [[ $CRM_notify_rsc == 'res_PingCheck' && ${CRM_notify_task} == 'start' ]]; th
check_pid
if [ $? -ne 0 ]; then
logger "Executing monitor to reschedule Neutron agents..."
#sudo python /usr/local/bin/monitor.py >> /dev/null 2>&1 & echo $! > $DEFAULT_PIDFILE
sudo python /usr/local/bin/monitor.py >> /dev/null 2>&1 & echo $!
sudo python /usr/local/bin/monitor.py --config-file /tmp/monitor.conf \
--log-file /tmp/monitor.log >> /dev/null 2>&1 & echo $!
sleep 3
pid=`ps -aux | grep m\[o\]nitor.py | awk -F' ' '{print $2}'`
if [ ! -z "$pid" ]; then

View File

@ -1,8 +0,0 @@
# vim: set ft=upstart et ts=2:
description "Reassign Agent Resources for Legacy HA"
author "Hui Xiang <hui.xiang@canonical.com>"
start on runlevel [2345]
stop on runlevel [!2345]
exec start-stop-daemon --start --chuid neutron --exec /usr/local/bin/reassign_agent_services

View File

@ -629,12 +629,6 @@ def copy_file(source_dir, des_dir, f, f_mod=None, update=False):
raise
def init_upstart_f_4_reassign_agent_resources():
upstart_f = 'reassign_agent_resources.conf'
exec_dir = '/etc/init'
copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir, upstart_f)
def init_ocf_MonitorNeutron_f(update=False):
ocf_f = 'MonitorNeutron'
exec_dir = '/usr/lib/ocf/resource.d/pacemaker'
@ -642,19 +636,19 @@ def init_ocf_MonitorNeutron_f(update=False):
ocf_f, stat.S_IEXEC, update=update)
def get_external_agent_f():
agent = 'monitor_neutron_ha.sh'
exec_dir = '/usr/lib/ocf/resource.d/canonical'
return os.path.join(exec_dir, agent)
def init_external_agent_f(update=False):
agent = 'ns_ovs_cleanup.sh'
exec_dir = '/usr/lib/ocf/resource.d/openstack'
agent = 'monitor_neutron_ha.sh'
exec_dir = '/usr/lib/ocf/resource.d/canonical'
copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir,
agent, stat.S_IEXEC, update=update)
def init_reassign_agent_services_binary():
service = 'reassign_agent_services'
exec_dir = '/usr/local/bin/'
copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir, service, stat.S_IEXEC)
def init_monitor_daemon(update=False):
service = 'monitor.py'
exec_dir = '/usr/local/bin/'
@ -662,20 +656,20 @@ def init_monitor_daemon(update=False):
service, stat.S_IEXEC, update=update)
def init_monitor_conf_files(update=False):
conf = 'monitor.conf'
exec_dir = '/tmp'
copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir,
conf, update=update)
def install_legacy_ha_files(update=False):
if config('ha-legacy-mode'):
init_ocf_MonitorNeutron_f(update=update)
init_external_agent_f(update=update)
# init_reassign_agent_services_binary()
init_monitor_daemon(update=update)
def get_external_agent_f():
agent = 'ns_ovs_cleanup.sh'
exec_dir = '/usr/lib/ocf/resource.d/openstack'
return os.path.join(exec_dir, agent)
def cache_env_data():
env = NetworkServiceContext()()
if not env: