Fix more.

This commit is contained in:
Hui Xiang 2014-12-12 17:57:19 +08:00
commit 6d1d866c0e
4 changed files with 72 additions and 48 deletions

View File

@ -98,8 +98,9 @@ END
MonitorNeutron_start() { MonitorNeutron_start() {
ocf_log info "MonitorNeutron_start" ocf_log info "MonitorNeutron_start"
su ${OCF_RESKEY_user} -s /bin/sh -c "python ${OCF_RESKEY_binary} $OCF_RESKEY_additional_parameters" \ su ${OCF_RESKEY_user} -s /bin/sh -c "python ${OCF_RESKEY_binary} \
' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid $OCF_RESKEY_additional_parameters"' \
>> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
if [ $? = $OCF_SUCCESS ]; then if [ $? = $OCF_SUCCESS ]; then
return $OCF_SUCCESS return $OCF_SUCCESS

View File

@ -20,9 +20,7 @@ import sys
import time import time
from oslo.config import cfg from oslo.config import cfg
from neutron.openstack.common import log as logging import logging as LOG
LOG = logging.getLogger(__name__)
class Daemon(object): class Daemon(object):
@ -58,18 +56,16 @@ class Daemon(object):
# fork second time # fork second time
self._fork() self._fork()
# redirect standard file descriptors
# redirect standard file descriptors # redirect standard file descriptors
sys.stdout.flush() sys.stdout.flush()
sys.stderr.flush() sys.stderr.flush()
stdin = open(self.stdin, 'r') stdin = open(self.stdin, 'r')
stdout = open(self.stdout, 'a+') stdout = open(self.stdout, 'a+')
stderr = open(self.stderr, 'a+', 0) stderr = open(self.stderr, 'a+', 0)
#os.dup2(stdin.fileno(), sys.stdin.fileno()) os.dup2(stdin.fileno(), sys.stdin.fileno())
#os.dup2(stdout.fileno(), sys.stdout.fileno()) os.dup2(stdout.fileno(), sys.stdout.fileno())
#os.dup2(stderr.fileno(), sys.stderr.fileno()) os.dup2(stderr.fileno(), sys.stderr.fileno())
#atexit.register(self.delete_pid)
signal.signal(signal.SIGTERM, self.handle_sigterm) signal.signal(signal.SIGTERM, self.handle_sigterm)
def handle_sigterm(self, signum, frame): def handle_sigterm(self, signum, frame):
@ -93,19 +89,26 @@ class MonitorNeutronAgentsDaemon(Daemon):
super(MonitorNeutronAgentsDaemon, self).__init__() super(MonitorNeutronAgentsDaemon, self).__init__()
self.check_interval = check_interval self.check_interval = check_interval
LOG.info('Monitor Neutron Agent Loop Init') LOG.info('Monitor Neutron Agent Loop Init')
self.env = {}
def get_env(self): def get_env(self):
env = {} envrc_f = '/etc/legacy_ha_envrc'
env_data = '/etc/legacy_ha_env_data' envrc_f_m = False
if os.path.isfile(env_data): if os.path.isfile(envrc_f):
with open(env_data, 'r') as f: ctime = time.ctime(os.stat(envrc_f).st_ctime)
line = f.readline() mtime = time.ctime(os.stat(envrc_f).st_mtime)
data = line.split('=').strip() if ctime != mtime:
if data and data[0] and data[1]: envrc_f_m = True
env[data[0]] = env[data[1]]
else: if not self.env or envrc_f_m:
raise Exception("OpenStack env data uncomplete.") with open(envrc_f, 'r') as f:
return env for line in f:
data = line.strip().split('=')
if data and data[0] and data[1]:
self.env[data[0]] = data[1]
else:
raise Exception("OpenStack env data uncomplete.")
return self.env
def reassign_agent_resources(self): def reassign_agent_resources(self):
''' Use agent scheduler API to detect down agents and re-schedule ''' ''' Use agent scheduler API to detect down agents and re-schedule '''
@ -130,10 +133,6 @@ class MonitorNeutronAgentsDaemon(Daemon):
region_name=env['region']) region_name=env['region'])
partner_gateways = [] partner_gateways = []
#partner_gateways = [unit_private_ip().split('.')[0]]
#for partner_gateway in relations_of_type(reltype='cluster'):
# gateway_hostname = get_hostname(partner_gateway['private-address'])
# partner_gateways.append(gateway_hostname.partition('.')[0])
agents = quantum.list_agents(agent_type=DHCP_AGENT) agents = quantum.list_agents(agent_type=DHCP_AGENT)
dhcp_agents = [] dhcp_agents = []
@ -147,8 +146,7 @@ class MonitorNeutronAgentsDaemon(Daemon):
agent['id'])['networks']: agent['id'])['networks']:
networks[network['id']] = agent['id'] networks[network['id']] = agent['id']
else: else:
if agent['host'].partition('.')[0] in partner_gateways: dhcp_agents.append(agent['id'])
dhcp_agents.append(agent['id'])
agents = quantum.list_agents(agent_type=L3_AGENT) agents = quantum.list_agents(agent_type=L3_AGENT)
routers = {} routers = {}
@ -160,8 +158,7 @@ class MonitorNeutronAgentsDaemon(Daemon):
agent['id'])['routers']: agent['id'])['routers']:
routers[router['id']] = agent['id'] routers[router['id']] = agent['id']
else: else:
if agent['host'].split('.')[0] in partner_gateways: l3_agents.append(agent['id'])
l3_agents.append(agent['id'])
if len(dhcp_agents) == 0 or len(l3_agents) == 0: if len(dhcp_agents) == 0 or len(l3_agents) == 0:
LOG.info('Unable to relocate resources, there are %s dhcp_agents ' LOG.info('Unable to relocate resources, there are %s dhcp_agents '
@ -194,11 +191,9 @@ class MonitorNeutronAgentsDaemon(Daemon):
def run(self): def run(self):
while True: while True:
LOG.info('Monitor Neutron Agent Loop Start') LOG.info('Monitor Neutron Agent Loop Start')
print "Monitor Neutron Agent Loop Start" LOG.info("Start : %s" % time.ctime())
print "Start : %s" % time.ctime() time.sleep(self.check_interval)
#time.sleep(self.check_interval) LOG.info("End : %s" % time.ctime())
time.sleep( 15 )
print "End : %s" % time.ctime()
self.reassign_agent_resources() self.reassign_agent_resources()
@ -207,11 +202,15 @@ if __name__ == '__main__':
cfg.StrOpt('check_interval', cfg.StrOpt('check_interval',
default=15, default=15,
help='Check Neutron Agents interval.'), help='Check Neutron Agents interval.'),
cfg.StrOpt('log_file',
default='/var/log/monitor.log',
help='log file'),
] ]
cfg.CONF.register_cli_opts(opts) cfg.CONF.register_cli_opts(opts)
cfg.CONF(project='monitor_neutron_agents', default_config_files=[]) cfg.CONF(project='monitor_neutron_agents', default_config_files=[])
LOG.basicConfig(filename=cfg.CONF.log_file, level=LOG.INFO)
monitor_daemon = MonitorNeutronAgentsDaemon( monitor_daemon = MonitorNeutronAgentsDaemon(
check_interval=cfg.CONF.check_interval) check_interval=cfg.CONF.check_interval)
monitor_daemon.start() monitor_daemon.start()

View File

@ -192,6 +192,9 @@ def nm_changed():
ca_crt = b64decode(relation_get('ca_cert')) ca_crt = b64decode(relation_get('ca_cert'))
install_ca_cert(ca_crt) install_ca_cert(ca_crt)
if config('ha-legacy-mode'):
cache_env_data()
@hooks.hook("cluster-relation-departed") @hooks.hook("cluster-relation-departed")
@restart_on_change(restart_map()) @restart_on_change(restart_map())
@ -243,21 +246,20 @@ def ha_relation_joined():
'op monitor on-fail="restart" interval="10s"' 'op monitor on-fail="restart" interval="10s"'
.format(external_agent=external_agent), .format(external_agent=external_agent),
'res_MonitorHA': 'op monitor interval="5s"', 'res_MonitorHA': 'op monitor interval="5s"',
'nees_connectivity': 'location res_MonitorHA '
'rule pingd: defined pingd'
#'rule -inf: not_defined pingd or pingd lte 0'
} }
clones = { clones = {
'cl_PingCheck': 'res_PingCheck', 'cl_PingCheck': 'res_PingCheck',
'cl_ClusterMon': 'res_ClusterMon' 'cl_ClusterMon': 'res_ClusterMon'
} }
constraints = {'location': 'nees_connectivity res_MonitorHA '
'rule pingd: defined pingd'}
relation_set(corosync_bindiface=cluster_config['ha-bindiface'], relation_set(corosync_bindiface=cluster_config['ha-bindiface'],
corosync_mcastport=cluster_config['ha-mcastport'], corosync_mcastport=cluster_config['ha-mcastport'],
resources=resources, resources=resources,
resource_params=resource_params, resource_params=resource_params,
clones=clones) clones=clones,
constraints=constraints)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -473,11 +473,7 @@ def reassign_agent_resources():
auth_url=auth_url, auth_url=auth_url,
region_name=env['region']) region_name=env['region'])
partner_gateways = [unit_private_ip().split('.')[0]] partner_gateways = get_quantum_gateway_cluster_nodes()
for partner_gateway in relations_of_type(reltype='cluster'):
gateway_hostname = get_hostname(partner_gateway['private-address'])
partner_gateways.append(gateway_hostname.partition('.')[0])
agents = quantum.list_agents(agent_type=DHCP_AGENT) agents = quantum.list_agents(agent_type=DHCP_AGENT)
dhcp_agents = [] dhcp_agents = []
l3_agents = [] l3_agents = []
@ -608,6 +604,14 @@ def get_dns_host():
return ' '.join(dns_hosts) return ' '.join(dns_hosts)
def get_quantum_gateway_cluster_nodes():
partner_gateways = get_hostname(unit_private_ip())
for partner_gateway in relations_of_type(reltype='cluster'):
gateway_hostname = get_hostname(partner_gateway['private-address'])
partner_gateways.append(gateway_hostname.partition('.')[0])
return partner_gateways
def copy_file(source_dir, des_dir, f, f_mod=None, update=False): def copy_file(source_dir, des_dir, f, f_mod=None, update=False):
if not os.path.isdir(des_dir): if not os.path.isdir(des_dir):
mkdir(des_dir) mkdir(des_dir)
@ -663,7 +667,7 @@ def install_legacy_ha_files(update=False):
if config('ha-legacy-mode'): if config('ha-legacy-mode'):
init_ocf_MonitorNeutron_f(update=update) init_ocf_MonitorNeutron_f(update=update)
init_external_agent_f(update=update) init_external_agent_f(update=update)
#init_reassign_agent_services_binary() # init_reassign_agent_services_binary()
init_monitor_daemon(update=update) init_monitor_daemon(update=update)
@ -679,6 +683,24 @@ def cache_env_data():
log('Unable to get NetworkServiceContext at this time', level=ERROR) log('Unable to get NetworkServiceContext at this time', level=ERROR)
return return
with open('/etc/legacy_ha_env_data', 'w') as f: no_envrc = False
for k, v in env.items(): envrc_f = '/etc/legacy_ha_envrc'
f.write(''.join([k, '=', v, '\n'])) if os.path.isfile(envrc_f):
with open(envrc_f, 'r') as f:
data = f.read()
data = data.strip().split('\n')
diff = False
for line in data:
k = line.split('=')[0]
v = line.split('=')[1]
if k not in env or v != env[k]:
diff = True
break
else:
no_envrc = True
if no_envrc or diff:
with open(envrc_f, 'w') as f:
for k, v in env.items():
f.write(''.join([k, '=', v, '\n']))