Add: Schedule agents when the execute agent is the first active one.
This commit is contained in:
parent
b0e705caaa
commit
06e6957394
@ -19,9 +19,12 @@ import signal
|
|||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from oslo.config import cfg
|
|
||||||
import logging as LOG
|
import logging as LOG
|
||||||
|
|
||||||
|
from oslo.config import cfg
|
||||||
|
from neutron.agent.linux import ovs_lib
|
||||||
|
from neutron.agent.linux import ip_lib
|
||||||
|
|
||||||
|
|
||||||
class Daemon(object):
|
class Daemon(object):
|
||||||
"""A generic daemon class.
|
"""A generic daemon class.
|
||||||
@ -110,6 +113,56 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
|||||||
raise Exception("OpenStack env data uncomplete.")
|
raise Exception("OpenStack env data uncomplete.")
|
||||||
return self.env
|
return self.env
|
||||||
|
|
||||||
|
def get_hostname():
|
||||||
|
return subprocess.check_output(['uname', '-n'])
|
||||||
|
|
||||||
|
def get_root_helper():
|
||||||
|
return 'sudo'
|
||||||
|
|
||||||
|
def unplug_device(conf, device):
|
||||||
|
try:
|
||||||
|
device.link.delete()
|
||||||
|
except RuntimeError:
|
||||||
|
root_helper = self.get_root_helper()
|
||||||
|
# Maybe the device is OVS port, so try to delete
|
||||||
|
bridge_name = ovs_lib.get_bridge_for_iface(root_helper, device.name)
|
||||||
|
if bridge_name:
|
||||||
|
bridge = ovs_lib.OVSBridge(bridge_name, root_helper)
|
||||||
|
bridge.delete_port(device.name)
|
||||||
|
else:
|
||||||
|
LOG.debug(_('Unable to find bridge for device: %s'), device.name)
|
||||||
|
|
||||||
|
def cleanup_dhcp(networks):
|
||||||
|
namespaces = []
|
||||||
|
for network, agent in networks.iteritems():
|
||||||
|
namespaces.append('qdhcp-' + network)
|
||||||
|
|
||||||
|
if namespaces:
|
||||||
|
LOG.info('Namespaces: %s is going to be deleted.' % namespaces)
|
||||||
|
destroy_namespaces(namespaces)
|
||||||
|
|
||||||
|
def cleanup_router(routers):
|
||||||
|
namespaces = []
|
||||||
|
for router, agent in routers.iteritems():
|
||||||
|
namespaces.append('qrouter-' + router)
|
||||||
|
|
||||||
|
if namespaces:
|
||||||
|
LOG.info('Namespaces: %s is going to be deleted.' % namespaces)
|
||||||
|
destroy_namespaces(namespaces)
|
||||||
|
|
||||||
|
def destroy_namespaces(namespaces):
|
||||||
|
try:
|
||||||
|
root_helper = self.get_root_helper()
|
||||||
|
for namespace in namespaces:
|
||||||
|
ip = ip_lib.IPWrapper(root_helper, namespace)
|
||||||
|
if ip.netns.exists(namespace):
|
||||||
|
for device in ip.get_devices(exclude_loopback=True):
|
||||||
|
unplug_device(device)
|
||||||
|
|
||||||
|
ip.garbage_collect_namespace()
|
||||||
|
except Exception:
|
||||||
|
LOG.exception(_('Error unable to destroy namespace: %s'), namespace)
|
||||||
|
|
||||||
def reassign_agent_resources(self):
|
def reassign_agent_resources(self):
|
||||||
''' Use agent scheduler API to detect down agents and re-schedule '''
|
''' Use agent scheduler API to detect down agents and re-schedule '''
|
||||||
DHCP_AGENT = "DHCP Agent"
|
DHCP_AGENT = "DHCP Agent"
|
||||||
@ -145,8 +198,11 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
|||||||
quantum.list_networks_on_dhcp_agent(
|
quantum.list_networks_on_dhcp_agent(
|
||||||
agent['id'])['networks']:
|
agent['id'])['networks']:
|
||||||
networks[network['id']] = agent['id']
|
networks[network['id']] = agent['id']
|
||||||
|
if agent['id'] == self.get_hostname():
|
||||||
|
self.cleanup_dhcp(networks)
|
||||||
else:
|
else:
|
||||||
dhcp_agents.append(agent['id'])
|
dhcp_agents.append(agent['id'])
|
||||||
|
LOG.info('Active dhcp agents: %s' % dhcp_agents)
|
||||||
|
|
||||||
agents = quantum.list_agents(agent_type=L3_AGENT)
|
agents = quantum.list_agents(agent_type=L3_AGENT)
|
||||||
routers = {}
|
routers = {}
|
||||||
@ -157,8 +213,11 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
|||||||
quantum.list_routers_on_l3_agent(
|
quantum.list_routers_on_l3_agent(
|
||||||
agent['id'])['routers']:
|
agent['id'])['routers']:
|
||||||
routers[router['id']] = agent['id']
|
routers[router['id']] = agent['id']
|
||||||
|
if agent['id'] == self.get_hostname():
|
||||||
|
self.cleanup_router(routers)
|
||||||
else:
|
else:
|
||||||
l3_agents.append(agent['id'])
|
l3_agents.append(agent['id'])
|
||||||
|
LOG.info('Active l3 agents: %s' % l3_agents)
|
||||||
|
|
||||||
if len(dhcp_agents) == 0 or len(l3_agents) == 0:
|
if len(dhcp_agents) == 0 or len(l3_agents) == 0:
|
||||||
LOG.info('Unable to relocate resources, there are %s dhcp_agents '
|
LOG.info('Unable to relocate resources, there are %s dhcp_agents '
|
||||||
@ -166,6 +225,12 @@ class MonitorNeutronAgentsDaemon(Daemon):
|
|||||||
len(l3_agents)))
|
len(l3_agents)))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if l3_agents[0] != self.get_hostname() or \
|
||||||
|
dhcp_agents[0] != self.get_hostname():
|
||||||
|
LOG.info('Only the first agent could reschedule. l3 agents: %s '
|
||||||
|
'dhcp agents: %s' % (l3_agents, dhcp_agents))
|
||||||
|
return
|
||||||
|
|
||||||
index = 0
|
index = 0
|
||||||
for router_id in routers:
|
for router_id in routers:
|
||||||
agent = index % len(l3_agents)
|
agent = index % len(l3_agents)
|
||||||
@ -200,15 +265,16 @@ if __name__ == '__main__':
|
|||||||
cfg.StrOpt('check_interval',
|
cfg.StrOpt('check_interval',
|
||||||
default=15,
|
default=15,
|
||||||
help='Check Neutron Agents interval.'),
|
help='Check Neutron Agents interval.'),
|
||||||
cfg.StrOpt('log_file',
|
# cfg.StrOpt('log_file',
|
||||||
default='/var/log/monitor.log',
|
# default='/var/log/monitor.log',
|
||||||
help='log file'),
|
# help='log file'),
|
||||||
]
|
]
|
||||||
|
|
||||||
cfg.CONF.register_cli_opts(opts)
|
cfg.CONF.register_cli_opts(opts)
|
||||||
cfg.CONF(project='monitor_neutron_agents', default_config_files=[])
|
cfg.CONF(project='monitor_neutron_agents', default_config_files=[])
|
||||||
|
log_file = '/tmp/monitor.log'
|
||||||
LOG.basicConfig(filename=cfg.CONF.log_file, level=LOG.INFO)
|
print "log file: %s" % cfg.CONF.log_file
|
||||||
|
LOG.basicConfig(filename=log_file, level=LOG.INFO)
|
||||||
monitor_daemon = MonitorNeutronAgentsDaemon(
|
monitor_daemon = MonitorNeutronAgentsDaemon(
|
||||||
check_interval=cfg.CONF.check_interval)
|
check_interval=cfg.CONF.check_interval)
|
||||||
monitor_daemon.start()
|
monitor_daemon.start()
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#! /bin/bash
|
#! /bin/bash
|
||||||
|
|
||||||
logger "Start running ns_ovs_cleanup.sh..."
|
|
||||||
logger " ** "
|
logger " ** "
|
||||||
|
logger "Start running ns_ovs_cleanup.sh..."
|
||||||
logger " ** "
|
logger " ** "
|
||||||
|
|
||||||
logger "CRM_notify_task: $CRM_notify_task"
|
logger "CRM_notify_task: $CRM_notify_task"
|
||||||
@ -9,15 +9,52 @@ logger "CRM_notify_desc: $CRM_notify_desc"
|
|||||||
logger "CRM_notify_rsc: $CRM_notify_rsc"
|
logger "CRM_notify_rsc: $CRM_notify_rsc"
|
||||||
logger "CRM_notify_node: $CRM_notify_node"
|
logger "CRM_notify_node: $CRM_notify_node"
|
||||||
logger " ** "
|
logger " ** "
|
||||||
logger " ** "
|
|
||||||
|
|
||||||
if [[ ${CRM_notify_task} == 'start' && $CRM_notify_rsc == 'res_PingCheck' ]]; then
|
set -x
|
||||||
|
|
||||||
|
DEFAULT_PIDFILE="/tmp/monitor.pid"
|
||||||
|
|
||||||
|
function clean_pid
|
||||||
|
{
|
||||||
|
logger "Clean pid."
|
||||||
|
if [ -f $DEFAULT_PIDFILE ]; then
|
||||||
|
pid=`cat $DEFAULT_PIDFILE`
|
||||||
|
if [ ! -z $pid ]; then
|
||||||
|
sudo kill -s 9 $pid
|
||||||
|
rm -f $DEFAULT_PIDFILE
|
||||||
|
logger "pidfile $DEFAULT_PIDFILE is removed."
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
pid=`ps -aux | grep m\[o\]nitor.py | awk -F' ' '{print $2}'`
|
||||||
|
if [ ! -z $pid ]; then
|
||||||
|
sudo kill -s 9 $pid
|
||||||
|
fi
|
||||||
|
logger "pid $pid is killed."
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
#if [[ ${CRM_notify_task} == 'start' && $CRM_notify_rsc == 'res_PingCheck' ]]; then
|
||||||
|
if [[ $CRM_notify_rsc == 'res_PingCheck' && ${CRM_notify_task} == 'start' ]]; then
|
||||||
if [[ ${CRM_notify_desc} == 'OK' ]]; then
|
if [[ ${CRM_notify_desc} == 'OK' ]]; then
|
||||||
hostname=`hostname`
|
hostname=`hostname`
|
||||||
logger "monitor error hostname: $CRM_notify_node"
|
clean_pid
|
||||||
logger "hostname: $hostname"
|
|
||||||
logger "Executing monitor to reschedule Neutron agents..."
|
logger "Executing monitor to reschedule Neutron agents..."
|
||||||
sudo python /usr/local/bin/monitor.py
|
#sudo python /usr/local/bin/monitor.py >> /dev/null 2>&1 & echo $! > $DEFAULT_PIDFILE
|
||||||
|
sudo python monitor.py >> /dev/null 2>&1 & echo $!
|
||||||
|
sleep 3
|
||||||
|
pid=`ps -aux | grep m\[o\]nitor.py | awk -F' ' '{print $2}'`
|
||||||
|
if [ ! -z "$pid" ]; then
|
||||||
|
echo $pid > $DEFAULT_PIDFILE
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
elif [[ $CRM_notify_rsc == 'res_PingCheck' && ${CRM_notify_task} == 'stop' ]]; then
|
||||||
|
if [[ ${CRM_notify_desc} == 'OK' ]]; then
|
||||||
|
clean_pid
|
||||||
|
fi
|
||||||
|
elif [[ $CRM_notify_rsc == 'res_PingCheck' && ${CRM_notify_task} == 'monitor' ]]; then
|
||||||
|
if [[ ${CRM_notify_desc} == 'unknown error' ]]; then
|
||||||
|
logger "TODO"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -236,7 +236,6 @@ def ha_relation_joined():
|
|||||||
resource_params = {
|
resource_params = {
|
||||||
'res_PingCheck': 'params host_list="{host}" dampen="5s" '
|
'res_PingCheck': 'params host_list="{host}" dampen="5s" '
|
||||||
'debug={debug} multiplier="1000" '
|
'debug={debug} multiplier="1000" '
|
||||||
'failure_score="10" '
|
|
||||||
'op monitor on-fail="restart" interval="10s" '
|
'op monitor on-fail="restart" interval="10s" '
|
||||||
'timeout="40s" '.format(host=dns_hosts,
|
'timeout="40s" '.format(host=dns_hosts,
|
||||||
debug=debug),
|
debug=debug),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user