From 52f27b1c382bd8799580810860f39ba2d06e9d29 Mon Sep 17 00:00:00 2001 From: Hui Xiang Date: Tue, 16 Dec 2014 15:50:05 +0800 Subject: [PATCH] Fix a lot. --- files/MonitorNeutron | 171 ------------------ files/monitor.conf | 3 + files/monitor.py | 109 ++++++----- ...s_ovs_cleanup.sh => monitor_neutron_ha.sh} | 12 +- files/reassign_agent_resources.conf | 8 - hooks/quantum_utils.py | 36 ++-- 6 files changed, 83 insertions(+), 256 deletions(-) delete mode 100644 files/MonitorNeutron create mode 100644 files/monitor.conf rename files/{ns_ovs_cleanup.sh => monitor_neutron_ha.sh} (83%) delete mode 100644 files/reassign_agent_resources.conf diff --git a/files/MonitorNeutron b/files/MonitorNeutron deleted file mode 100644 index b589cb3e..00000000 --- a/files/MonitorNeutron +++ /dev/null @@ -1,171 +0,0 @@ -#!/bin/sh -# -# -# Neutron_Legacy_HA OCF -# -# Copyright (c) 2014 Hui Xiang -# All Rights Reserved. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it would be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# -# Further, this software is distributed without any warranty that it is -# free of the rightful claim of any third person regarding infringement -# or the like. Any license provided herein, whether implied or -# otherwise, applies only to this software file. Patent licenses, if -# any, provided herein do not apply to combinations of this program with -# other software, or any other product whatsoever. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write the Free Software Foundation, -# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. -# - -####################################################################### -# Initialization: - -: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs} -. ${OCF_FUNCTIONS} -: ${__OCF_ACTION=$1} - -####################################################################### - -meta_data() { - cat < - - -1.0 - - -Monitor Neutron L3/DHCP agents for legacy HA. - -Monitor Neutron L3/DHCP agents for legacy HA - - - - -Enables to use default attrd_updater verbose logging on every call. - -Verbose logging - - - - - - - - - - - - - -END -} - -####################################################################### -OCF_RESKEY_binary_default="/usr/local/bin/monitor.py" -OCF_RESKEY_user_default="root" -OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid" - -: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} -: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} -: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} - -####################################################################### - -MonitorNeutron_conditional_log() { - level=$1; shift - if [ ${OCF_RESKEY_debug} = "true" ]; then - ocf_log $level "$*" - fi -} - -MonitorNeutron_usage() { - cat <> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid - - if [ $? = $OCF_SUCCESS ]; then - return $OCF_SUCCESS - fi - - ocf_log info "MonitorNeutron started" -} - -MonitorNeutron_stop() { - # Try SIGTERM - if [ ! -f $OCF_RESKEY_pid ]; then - ocf_log info "MonitorNeutron already stopped" - #return $OCF_NOT_RUNNING - return $OCF_SUCCESS - fi - - pid=`cat $OCF_RESKEY_pid` - if [ -z "$pid" ]; then - ocf_log err "MonitorNeutron pid is empty" - exit $OCF_ERR_GENERIC - fi - - ocf_run kill -s TERM $pid - rc=$? - if [ $rc -ne 0 ]; then - ocf_log err "MonitorNeutron couldn't be stopped" - return $OCF_SUCCESS - #exit $OCF_ERR_GENERIC - fi - - ocf_log info "MonitorNeutron stopped" - return $OCF_SUCCESS -} - -MonitorNeutron_validate() { - # Is the state directory writable? - if [ ! -f $OCF_RESKEY_binary ]; then - ocf_log err "No file $OCF_RESKEY_binary exists !" - return $OCF_ERRARGS - fi - return $OCF_SUCCESS -} - -MonitorNeutron_monitor() { - return $OCF_SUCCESS - if [ -f ${OCF_RESKEY_pid} ]; then - return $OCF_SUCCESS - fi - return $OCF_ERR_GENERIC -} - -case $__OCF_ACTION in -meta-data) meta_data - exit $OCF_SUCCESS - ;; -start) MonitorNeutron_start;; -stop) MonitorNeutron_stop;; -reload) MonitorNeutron_start;; -monitor) MonitorNeutron_monitor;; -validate-all) MonitorNeutron_validate;; -usage|help) MonitorNeutron_usage - exit $OCF_SUCCESS - ;; -*) MonitorNeutron_usage - exit $OCF_ERR_UNIMPLEMENTED - ;; -esac -exit $? diff --git a/files/monitor.conf b/files/monitor.conf new file mode 100644 index 00000000..e938f789 --- /dev/null +++ b/files/monitor.conf @@ -0,0 +1,3 @@ +[DEFAULT] +verbose=True +check_interval=15 diff --git a/files/monitor.py b/files/monitor.py index f1178b62..ab71fc6d 100644 --- a/files/monitor.py +++ b/files/monitor.py @@ -17,13 +17,15 @@ import fcntl import os import signal import sys +import subprocess import time -import logging as LOG - from oslo.config import cfg from neutron.agent.linux import ovs_lib from neutron.agent.linux import ip_lib +from neutron.openstack.common import log as logging + +LOG = logging.getLogger(__name__) class Daemon(object): @@ -88,9 +90,9 @@ class Daemon(object): class MonitorNeutronAgentsDaemon(Daemon): - def __init__(self, check_interval=None): + def __init__(self): super(MonitorNeutronAgentsDaemon, self).__init__() - self.check_interval = check_interval + logging.setup('Neuron-HA-Monitor') LOG.info('Monitor Neutron Agent Loop Init') self.env = {} @@ -113,13 +115,13 @@ class MonitorNeutronAgentsDaemon(Daemon): raise Exception("OpenStack env data uncomplete.") return self.env - def get_hostname(): + def get_hostname(self): return subprocess.check_output(['uname', '-n']) - def get_root_helper(): + def get_root_helper(self): return 'sudo' - def unplug_device(conf, device): + def unplug_device(self, conf, device): try: device.link.delete() except RuntimeError: @@ -132,7 +134,7 @@ class MonitorNeutronAgentsDaemon(Daemon): else: LOG.debug(_('Unable to find bridge for device: %s'), device.name) - def cleanup_dhcp(networks): + def cleanup_dhcp(self, networks): namespaces = [] for network, agent in networks.iteritems(): namespaces.append('qdhcp-' + network) @@ -141,7 +143,7 @@ class MonitorNeutronAgentsDaemon(Daemon): LOG.info('Namespaces: %s is going to be deleted.' % namespaces) destroy_namespaces(namespaces) - def cleanup_router(routers): + def cleanup_router(self, routers): namespaces = [] for router, agent in routers.iteritems(): namespaces.append('qrouter-' + router) @@ -150,7 +152,7 @@ class MonitorNeutronAgentsDaemon(Daemon): LOG.info('Namespaces: %s is going to be deleted.' % namespaces) destroy_namespaces(namespaces) - def destroy_namespaces(namespaces): + def destroy_namespaces(self, namespaces): try: root_helper = self.get_root_helper() for namespace in namespaces: @@ -163,6 +165,40 @@ class MonitorNeutronAgentsDaemon(Daemon): except Exception: LOG.exception(_('Error unable to destroy namespace: %s'), namespace) + def l3_agents_reschedule(self, l3_agents, routers): + if l3_agents[0] != self.get_hostname(): + LOG.info('Only the first agent could reschedule. l3 agents: %s ' + 'dhcp agents: %s' % (l3_agents)) + return + + index = 0 + for router_id in routers: + agent = index % len(l3_agents) + LOG.info('Moving router %s from %s to %s' % + (router_id, routers[router_id], l3_agents[agent])) + quantum.remove_router_from_l3_agent(l3_agent=routers[router_id], + router_id=router_id) + quantum.add_router_to_l3_agent(l3_agent=l3_agents[agent], + body={'router_id': router_id}) + index += 1 + + def dhcp_agents_reschedule(self, dhcp_agents, networks): + if dhcp_agents[0] != self.get_hostname(): + LOG.info('Only the first agent could reschedule. ' + 'dhcp agents: %s' % dhcp_agents) + return + + index = 0 + for network_id in networks: + agent = index % len(dhcp_agents) + LOG.info('Moving network %s from %s to %s' % + (network_id, networks[network_id], dhcp_agents[agent])) + quantum.remove_network_from_dhcp_agent( + dhcp_agent=networks[network_id], network_id=network_id) + quantum.add_network_to_dhcp_agent(dhcp_agent=dhcp_agents[agent], + body={'network_id': network_id}) + index += 1 + def reassign_agent_resources(self): ''' Use agent scheduler API to detect down agents and re-schedule ''' DHCP_AGENT = "DHCP Agent" @@ -219,44 +255,27 @@ class MonitorNeutronAgentsDaemon(Daemon): l3_agents.append(agent['id']) LOG.info('Active l3 agents: %s' % l3_agents) - if len(dhcp_agents) == 0 or len(l3_agents) == 0: - LOG.info('Unable to relocate resources, there are %s dhcp_agents ' - 'and %s l3_agents in this cluster' % (len(dhcp_agents), + if not networks and not routers: + LOG.info('No failed agents found, return.') + return + + if len(dhcp_agents) == 0 and len(l3_agents) == 0: + LOG.error('Unable to relocate resources, there are %s dhcp_agents ' + 'and %s l3_agents in this cluster' % (len(dhcp_agents), len(l3_agents))) return - if l3_agents[0] != self.get_hostname() or \ - dhcp_agents[0] != self.get_hostname(): - LOG.info('Only the first agent could reschedule. l3 agents: %s ' - 'dhcp agents: %s' % (l3_agents, dhcp_agents)) - return + if len(l3_agents) != 0: + self.l3_agents_reschedule(l3_agents, routers) - index = 0 - for router_id in routers: - agent = index % len(l3_agents) - LOG.info('Moving router %s from %s to %s' % - (router_id, routers[router_id], l3_agents[agent])) - quantum.remove_router_from_l3_agent(l3_agent=routers[router_id], - router_id=router_id) - quantum.add_router_to_l3_agent(l3_agent=l3_agents[agent], - body={'router_id': router_id}) - index += 1 - - index = 0 - for network_id in networks: - agent = index % len(dhcp_agents) - LOG.info('Moving network %s from %s to %s' % - (network_id, networks[network_id], dhcp_agents[agent])) - quantum.remove_network_from_dhcp_agent( - dhcp_agent=networks[network_id], network_id=network_id) - quantum.add_network_to_dhcp_agent(dhcp_agent=dhcp_agents[agent], - body={'network_id': network_id}) - index += 1 + if len(dhcp_agents) != 0: + self.dhcp_agents_reschedule(dhcp_agents, networks) def run(self): while True: LOG.info('Monitor Neutron Agent Loop Start') - time.sleep(15) + LOG.info('sleep %s' % cfg.CONF.check_interval) + time.sleep(float(cfg.CONF.check_interval)) self.reassign_agent_resources() @@ -265,16 +284,10 @@ if __name__ == '__main__': cfg.StrOpt('check_interval', default=15, help='Check Neutron Agents interval.'), -# cfg.StrOpt('log_file', -# default='/var/log/monitor.log', -# help='log file'), ] cfg.CONF.register_cli_opts(opts) cfg.CONF(project='monitor_neutron_agents', default_config_files=[]) - log_file = '/tmp/monitor.log' - print "log file: %s" % cfg.CONF.log_file - LOG.basicConfig(filename=log_file, level=LOG.INFO) - monitor_daemon = MonitorNeutronAgentsDaemon( - check_interval=cfg.CONF.check_interval) + logging.setup('Neuron-HA-Monitor') + monitor_daemon = MonitorNeutronAgentsDaemon() monitor_daemon.start() diff --git a/files/ns_ovs_cleanup.sh b/files/monitor_neutron_ha.sh similarity index 83% rename from files/ns_ovs_cleanup.sh rename to files/monitor_neutron_ha.sh index d2d7e2db..fb288ff9 100755 --- a/files/ns_ovs_cleanup.sh +++ b/files/monitor_neutron_ha.sh @@ -2,12 +2,8 @@ logger " ** " logger "Start running ns_ovs_cleanup.sh..." -logger " ** " - -logger "CRM_notify_task: $CRM_notify_task" -logger "CRM_notify_desc: $CRM_notify_desc" -logger "CRM_notify_rsc: $CRM_notify_rsc" -logger "CRM_notify_node: $CRM_notify_node" +logger "CRM_notify_task: $CRM_notify_task, CRM_notify_desc: $CRM_notify_desc" +logger "CRM_notify_rsc: $CRM_notify_rsc, CRM_notify_node: $CRM_notify_node" logger " ** " set -x @@ -50,8 +46,8 @@ if [[ $CRM_notify_rsc == 'res_PingCheck' && ${CRM_notify_task} == 'start' ]]; th check_pid if [ $? -ne 0 ]; then logger "Executing monitor to reschedule Neutron agents..." - #sudo python /usr/local/bin/monitor.py >> /dev/null 2>&1 & echo $! > $DEFAULT_PIDFILE - sudo python /usr/local/bin/monitor.py >> /dev/null 2>&1 & echo $! + sudo python /usr/local/bin/monitor.py --config-file /tmp/monitor.conf \ + --log-file /tmp/monitor.log >> /dev/null 2>&1 & echo $! sleep 3 pid=`ps -aux | grep m\[o\]nitor.py | awk -F' ' '{print $2}'` if [ ! -z "$pid" ]; then diff --git a/files/reassign_agent_resources.conf b/files/reassign_agent_resources.conf deleted file mode 100644 index 55615f47..00000000 --- a/files/reassign_agent_resources.conf +++ /dev/null @@ -1,8 +0,0 @@ -# vim: set ft=upstart et ts=2: -description "Reassign Agent Resources for Legacy HA" -author "Hui Xiang " - -start on runlevel [2345] -stop on runlevel [!2345] - -exec start-stop-daemon --start --chuid neutron --exec /usr/local/bin/reassign_agent_services diff --git a/hooks/quantum_utils.py b/hooks/quantum_utils.py index 0de7ed86..78e12552 100644 --- a/hooks/quantum_utils.py +++ b/hooks/quantum_utils.py @@ -629,12 +629,6 @@ def copy_file(source_dir, des_dir, f, f_mod=None, update=False): raise -def init_upstart_f_4_reassign_agent_resources(): - upstart_f = 'reassign_agent_resources.conf' - exec_dir = '/etc/init' - copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir, upstart_f) - - def init_ocf_MonitorNeutron_f(update=False): ocf_f = 'MonitorNeutron' exec_dir = '/usr/lib/ocf/resource.d/pacemaker' @@ -642,19 +636,19 @@ def init_ocf_MonitorNeutron_f(update=False): ocf_f, stat.S_IEXEC, update=update) +def get_external_agent_f(): + agent = 'monitor_neutron_ha.sh' + exec_dir = '/usr/lib/ocf/resource.d/canonical' + return os.path.join(exec_dir, agent) + + def init_external_agent_f(update=False): - agent = 'ns_ovs_cleanup.sh' - exec_dir = '/usr/lib/ocf/resource.d/openstack' + agent = 'monitor_neutron_ha.sh' + exec_dir = '/usr/lib/ocf/resource.d/canonical' copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir, agent, stat.S_IEXEC, update=update) -def init_reassign_agent_services_binary(): - service = 'reassign_agent_services' - exec_dir = '/usr/local/bin/' - copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir, service, stat.S_IEXEC) - - def init_monitor_daemon(update=False): service = 'monitor.py' exec_dir = '/usr/local/bin/' @@ -662,20 +656,20 @@ def init_monitor_daemon(update=False): service, stat.S_IEXEC, update=update) +def init_monitor_conf_files(update=False): + conf = 'monitor.conf' + exec_dir = '/tmp' + copy_file(LEGACY_HA_TEMPLATE_FILES, exec_dir, + conf, update=update) + + def install_legacy_ha_files(update=False): if config('ha-legacy-mode'): init_ocf_MonitorNeutron_f(update=update) init_external_agent_f(update=update) - # init_reassign_agent_services_binary() init_monitor_daemon(update=update) -def get_external_agent_f(): - agent = 'ns_ovs_cleanup.sh' - exec_dir = '/usr/lib/ocf/resource.d/openstack' - return os.path.join(exec_dir, agent) - - def cache_env_data(): env = NetworkServiceContext()() if not env: