Merge from quantum-gateway chunk
This commit is contained in:
commit
e777443c5a
@ -9,3 +9,4 @@ include:
|
||||
- contrib.python.packages
|
||||
- contrib.storage.linux
|
||||
- payload.execd
|
||||
- contrib.charmsupport
|
||||
|
10
config.yaml
10
config.yaml
@ -104,6 +104,16 @@ options:
|
||||
default: nova
|
||||
type: string
|
||||
description: Database name
|
||||
nagios_context:
|
||||
default: "juju"
|
||||
type: string
|
||||
description: |
|
||||
Used by the nrpe-external-master subordinate charm.
|
||||
A string that will be prepended to instance name to set the host name
|
||||
in nagios. So for instance the hostname would be something like:
|
||||
juju-myservice-0
|
||||
If you're running multiple environments with the same services in them
|
||||
this allows you to differentiate between them.
|
||||
# Network configuration options
|
||||
# by default all access is over 'private-address'
|
||||
os-data-network:
|
||||
|
@ -1,155 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
#
|
||||
# NeutronAgentMon OCF RA.
|
||||
# Starts crm_mon in background which logs cluster status as
|
||||
# html to the specified file.
|
||||
#
|
||||
# Copyright 2014 Canonical Ltd.
|
||||
#
|
||||
# Authors: Hui Xiang <hui.xiang@canonical.com>
|
||||
# Edward Hope-Morley <edward.hope-morley@canonical.com>
|
||||
#
|
||||
# OCF instance parameters:
|
||||
# OCF_RESKEY_file
|
||||
|
||||
#######################################################################
|
||||
# Initialization:
|
||||
: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
|
||||
. ${OCF_FUNCTIONS}
|
||||
: ${__OCF_ACTION=$1}
|
||||
|
||||
#######################################################################
|
||||
|
||||
meta_data() {
|
||||
cat <<END
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||
<resource-agent name="NeutronAgentMon">
|
||||
<version>1.0</version>
|
||||
|
||||
<longdesc lang="en">
|
||||
This is a NeutronAgentMon Resource Agent.
|
||||
It monitors the 'neutron-ha-monitor daemon' status.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">Monitor '/usr/local/bin/neutron-ha-monitor.py' in the background.</shortdesc>
|
||||
|
||||
<parameters>
|
||||
|
||||
<parameter name="file" unique="0">
|
||||
<longdesc lang="en">
|
||||
The file we want to run as a daemon.
|
||||
</longdesc>
|
||||
<shortdesc lang="en">The file we want to run as a daemon.</shortdesc>
|
||||
<content type="string" default="/usr/local/bin/neutron-ha-monitor.py" />
|
||||
</parameter>
|
||||
|
||||
</parameters>
|
||||
|
||||
<actions>
|
||||
<action name="start" timeout="20" />
|
||||
<action name="stop" timeout="20" />
|
||||
<action name="monitor" depth="0" timeout="20" interval="60" />
|
||||
<action name="meta-data" timeout="5" />
|
||||
<action name="validate-all" timeout="30" />
|
||||
</actions>
|
||||
</resource-agent>
|
||||
END
|
||||
}
|
||||
|
||||
#######################################################################
|
||||
|
||||
NeutronAgentMon_usage() {
|
||||
cat <<END
|
||||
usage: $0 {start|stop|monitor|validate-all|meta-data}
|
||||
|
||||
Expects to have a fully populated OCF RA-compliant environment set.
|
||||
END
|
||||
}
|
||||
|
||||
NeutronAgentMon_exit() {
|
||||
if [ $1 != 0 ]; then
|
||||
exit $OCF_ERR_GENERIC
|
||||
else
|
||||
exit $OCF_SUCCESS
|
||||
fi
|
||||
}
|
||||
|
||||
NeutronAgentMon_start() {
|
||||
pid=`sudo ps -aux | grep neutron-ha-m\[o\]nitor.py | awk -F' ' '{print $2}'`
|
||||
if [ -z $pid ]; then
|
||||
ocf_log info "[NeutronAgentMon_start] Start Monitor daemon."
|
||||
sudo mkdir -p /var/log/neutron-ha
|
||||
sudo python /usr/local/bin/neutron-ha-monitor.py \
|
||||
--config-file /var/lib/juju-neutron-ha/neutron-ha-monitor.conf \
|
||||
--log-file /var/log/neutron-ha/monitor.log >> /dev/null 2>&1 & echo $!
|
||||
sleep 5
|
||||
else
|
||||
ocf_log warn "[NeutronAgentMon_start] Monitor daemon already running."
|
||||
fi
|
||||
NeutronAgentMon_exit $?
|
||||
}
|
||||
|
||||
NeutronAgentMon_stop() {
|
||||
pid=`sudo ps -aux | grep neutron-ha-m\[o\]nitor.py | awk -F' ' '{print $2}'`
|
||||
if [ ! -z $pid ]; then
|
||||
sudo kill -s 9 $pid
|
||||
ocf_log info "[NeutronAgentMon_stop] Pid $pid is killed."
|
||||
else
|
||||
ocf_log warn "[NeutronAgentMon_stop] Monitor daemon already stopped."
|
||||
fi
|
||||
NeutronAgentMon_exit 0
|
||||
}
|
||||
|
||||
NeutronAgentMon_monitor() {
|
||||
pid=`sudo ps -aux | grep neutron-ha-m\[o\]nitor.py | awk -F' ' '{print $2}'`
|
||||
if [ ! -z $pid ]; then
|
||||
ocf_log info "[NeutronAgentMon_monitor] success."
|
||||
exit $OCF_SUCCESS
|
||||
fi
|
||||
exit $OCF_NOT_RUNNING
|
||||
}
|
||||
|
||||
NeutronAgentMon_validate() {
|
||||
# Existence of the user
|
||||
if [ -f $OCF_RESKEY_file ]; then
|
||||
echo "Validate OK"
|
||||
return $OCF_SUCCESS
|
||||
else
|
||||
ocf_log err "The file $OCF_RESKEY_file does not exist!"
|
||||
exit $OCF_ERR_ARGS
|
||||
fi
|
||||
}
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
NeutronAgentMon_usage
|
||||
exit $OCF_ERR_ARGS
|
||||
fi
|
||||
|
||||
: ${OCF_RESKEY_update:="15000"}
|
||||
: ${OCF_RESKEY_pidfile:="/tmp/NeutronAgentMon_${OCF_RESOURCE_INSTANCE}.pid"}
|
||||
: ${OCF_RESKEY_htmlfile:="/tmp/NeutronAgentMon_${OCF_RESOURCE_INSTANCE}.html"}
|
||||
|
||||
OCF_RESKEY_update=`expr $OCF_RESKEY_update / 1000`
|
||||
|
||||
case $__OCF_ACTION in
|
||||
meta-data) meta_data
|
||||
exit $OCF_SUCCESS
|
||||
;;
|
||||
start) NeutronAgentMon_start
|
||||
;;
|
||||
stop) NeutronAgentMon_stop
|
||||
;;
|
||||
monitor) NeutronAgentMon_monitor
|
||||
;;
|
||||
validate-all) NeutronAgentMon_validate
|
||||
;;
|
||||
usage|help) NeutronAgentMon_usage
|
||||
exit $OCF_SUCCESS
|
||||
;;
|
||||
*) NeutronAgentMon_usage
|
||||
exit $OCF_ERR_UNIMPLEMENTED
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $?
|
@ -1,4 +0,0 @@
|
||||
[DEFAULT]
|
||||
verbose=True
|
||||
#debug=True
|
||||
check_interval=8
|
@ -1,430 +0,0 @@
|
||||
# Copyright 2014 Canonical Ltd.
|
||||
#
|
||||
# Authors: Hui Xiang <hui.xiang@canonical.com>
|
||||
# Joshua Zhang <joshua.zhang@canonical.com>
|
||||
# Edward Hope-Morley <edward.hope-morley@canonical.com>
|
||||
#
|
||||
|
||||
"""
|
||||
Helpers for monitoring Neutron agents, reschedule failed agents,
|
||||
cleaned resources on failed nodes.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from oslo.config import cfg
|
||||
from neutron.agent.linux import ovs_lib
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.common import exceptions
|
||||
from neutron.openstack.common import log as logging
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Daemon(object):
|
||||
"""A generic daemon class.
|
||||
|
||||
Usage: subclass the Daemon class and override the run() method
|
||||
"""
|
||||
def __init__(self, stdin='/dev/null', stdout='/dev/null',
|
||||
stderr='/dev/null', procname='python'):
|
||||
self.stdin = stdin
|
||||
self.stdout = stdout
|
||||
self.stderr = stderr
|
||||
self.procname = procname
|
||||
|
||||
def _fork(self):
|
||||
try:
|
||||
pid = os.fork()
|
||||
if pid > 0:
|
||||
sys.exit(0)
|
||||
except OSError:
|
||||
LOG.exception('Fork failed')
|
||||
sys.exit(1)
|
||||
|
||||
def daemonize(self):
|
||||
"""Daemonize process by doing Stevens double fork."""
|
||||
# fork first time
|
||||
self._fork()
|
||||
|
||||
# decouple from parent environment
|
||||
os.chdir("/")
|
||||
os.setsid()
|
||||
os.umask(0)
|
||||
# fork second time
|
||||
self._fork()
|
||||
|
||||
# redirect standard file descriptors
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
stdin = open(self.stdin, 'r')
|
||||
stdout = open(self.stdout, 'a+')
|
||||
stderr = open(self.stderr, 'a+', 0)
|
||||
os.dup2(stdin.fileno(), sys.stdin.fileno())
|
||||
os.dup2(stdout.fileno(), sys.stdout.fileno())
|
||||
os.dup2(stderr.fileno(), sys.stderr.fileno())
|
||||
|
||||
signal.signal(signal.SIGTERM, self.handle_sigterm)
|
||||
|
||||
def handle_sigterm(self, signum, frame):
|
||||
sys.exit(0)
|
||||
|
||||
def start(self):
|
||||
"""Start the daemon."""
|
||||
self.daemonize()
|
||||
self.run()
|
||||
|
||||
def run(self):
|
||||
"""Override this method when subclassing Daemon.
|
||||
|
||||
start() will call this method after the process has daemonized.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class MonitorNeutronAgentsDaemon(Daemon):
|
||||
def __init__(self):
|
||||
super(MonitorNeutronAgentsDaemon, self).__init__()
|
||||
logging.setup('Neuron-HA-Monitor')
|
||||
LOG.info('Monitor Neutron Agent Loop Init')
|
||||
self.hostname = None
|
||||
self.env = {}
|
||||
|
||||
def get_env(self):
|
||||
envrc_f = '/etc/legacy_ha_envrc'
|
||||
envrc_f_m = False
|
||||
if os.path.isfile(envrc_f):
|
||||
ctime = time.ctime(os.stat(envrc_f).st_ctime)
|
||||
mtime = time.ctime(os.stat(envrc_f).st_mtime)
|
||||
if ctime != mtime:
|
||||
envrc_f_m = True
|
||||
|
||||
if not self.env or envrc_f_m:
|
||||
with open(envrc_f, 'r') as f:
|
||||
for line in f:
|
||||
data = line.strip().split('=')
|
||||
if data and data[0] and data[1]:
|
||||
self.env[data[0]] = data[1]
|
||||
else:
|
||||
raise Exception("OpenStack env data uncomplete.")
|
||||
return self.env
|
||||
|
||||
def get_hostname(self):
|
||||
if not self.hostname:
|
||||
self.hostname = socket.gethostname()
|
||||
return self.hostname
|
||||
|
||||
def get_root_helper(self):
|
||||
return 'sudo'
|
||||
|
||||
def list_monitor_res(self):
|
||||
# List crm resource 'cl_monitor' running node
|
||||
nodes = []
|
||||
cmd = ['crm', 'resource', 'show', 'cl_monitor']
|
||||
output = subprocess.check_output(cmd)
|
||||
pattern = re.compile('resource cl_monitor is running on: (.*) ')
|
||||
nodes = pattern.findall(output)
|
||||
return nodes
|
||||
|
||||
def get_crm_res_lead_node(self):
|
||||
nodes = self.list_monitor_res()
|
||||
if nodes:
|
||||
return nodes[0].strip()
|
||||
else:
|
||||
LOG.error('Failed to get crm resource.')
|
||||
return None
|
||||
|
||||
def unplug_device(self, device):
|
||||
try:
|
||||
device.link.delete()
|
||||
except RuntimeError:
|
||||
root_helper = self.get_root_helper()
|
||||
# Maybe the device is OVS port, so try to delete
|
||||
bridge_name = ovs_lib.get_bridge_for_iface(root_helper,
|
||||
device.name)
|
||||
if bridge_name:
|
||||
bridge = ovs_lib.OVSBridge(bridge_name, root_helper)
|
||||
bridge.delete_port(device.name)
|
||||
else:
|
||||
LOG.debug('Unable to find bridge for device: %s', device.name)
|
||||
|
||||
def get_pattern(self, key, text):
|
||||
if not key or not text:
|
||||
LOG.debug('Invalid key(%s) or text(%s)' % (key, text))
|
||||
return None
|
||||
|
||||
pattern = re.compile('%s' % key)
|
||||
result = pattern.findall(text)
|
||||
return result
|
||||
|
||||
def _cleanup(self, key1, key2):
|
||||
namespaces = []
|
||||
if key1:
|
||||
for k in key1.iterkeys():
|
||||
namespaces.append(key2 + '-' + k)
|
||||
else:
|
||||
try:
|
||||
cmd = ['sudo', 'ip', 'netns']
|
||||
ns = subprocess.check_output(cmd)
|
||||
namespaces = self.get_pattern('(%s.*)' % key2, ns)
|
||||
except RuntimeError as e:
|
||||
LOG.error('Failed to list namespace, (%s)' % e)
|
||||
|
||||
if namespaces:
|
||||
LOG.info('Namespaces: %s is going to be deleted.' % namespaces)
|
||||
self.destroy_namespaces(namespaces)
|
||||
|
||||
def cleanup_dhcp(self, networks):
|
||||
self._cleanup(networks, 'qdhcp')
|
||||
|
||||
def cleanup_router(self, routers):
|
||||
self._cleanup(routers, 'qrouter')
|
||||
|
||||
def destroy_namespaces(self, namespaces):
|
||||
try:
|
||||
root_helper = self.get_root_helper()
|
||||
for namespace in namespaces:
|
||||
ip = ip_lib.IPWrapper(root_helper, namespace)
|
||||
if ip.netns.exists(namespace):
|
||||
for device in ip.get_devices(exclude_loopback=True):
|
||||
self.unplug_device(device)
|
||||
|
||||
ip.garbage_collect_namespace()
|
||||
except Exception:
|
||||
LOG.exception('Error unable to destroy namespace: %s', namespace)
|
||||
|
||||
def is_same_host(self, host):
|
||||
return str(host).strip() == self.get_hostname()
|
||||
|
||||
def validate_reschedule(self):
|
||||
crm_no_1_node = self.get_crm_res_lead_node()
|
||||
if not crm_no_1_node:
|
||||
LOG.error('No crm first node could be found.')
|
||||
return False
|
||||
|
||||
if not self.is_same_host(crm_no_1_node):
|
||||
LOG.warn('Only the first crm node %s could reschedule. '
|
||||
% crm_no_1_node)
|
||||
return False
|
||||
return True
|
||||
|
||||
def l3_agents_reschedule(self, l3_agents, routers, quantum):
|
||||
if not self.validate_reschedule():
|
||||
return
|
||||
|
||||
index = 0
|
||||
for router_id in routers:
|
||||
agent = index % len(l3_agents)
|
||||
LOG.info('Moving router %s from %s to %s' %
|
||||
(router_id, routers[router_id], l3_agents[agent]))
|
||||
try:
|
||||
quantum.remove_router_from_l3_agent(l3_agent=routers[router_id],
|
||||
router_id=router_id)
|
||||
except exceptions.NeutronException as e:
|
||||
LOG.error('Remove router raised exception: %s' % e)
|
||||
try:
|
||||
quantum.add_router_to_l3_agent(l3_agent=l3_agents[agent],
|
||||
body={'router_id': router_id})
|
||||
except exceptions.NeutronException as e:
|
||||
LOG.error('Add router raised exception: %s' % e)
|
||||
index += 1
|
||||
|
||||
def dhcp_agents_reschedule(self, dhcp_agents, networks, quantum):
|
||||
if not self.validate_reschedule():
|
||||
return
|
||||
|
||||
index = 0
|
||||
for network_id in networks:
|
||||
agent = index % len(dhcp_agents)
|
||||
LOG.info('Moving network %s from %s to %s' % (network_id,
|
||||
networks[network_id], dhcp_agents[agent]))
|
||||
try:
|
||||
quantum.remove_network_from_dhcp_agent(
|
||||
dhcp_agent=networks[network_id], network_id=network_id)
|
||||
except exceptions.NeutronException as e:
|
||||
LOG.error('Remove network raised exception: %s' % e)
|
||||
try:
|
||||
quantum.add_network_to_dhcp_agent(
|
||||
dhcp_agent=dhcp_agents[agent],
|
||||
body={'network_id': network_id})
|
||||
except exceptions.NeutronException as e:
|
||||
LOG.error('Add network raised exception: %s' % e)
|
||||
index += 1
|
||||
|
||||
def get_quantum_client(self):
|
||||
env = self.get_env()
|
||||
if not env:
|
||||
LOG.info('Unable to re-assign resources at this time')
|
||||
return None
|
||||
|
||||
try:
|
||||
from quantumclient.v2_0 import client
|
||||
except ImportError:
|
||||
# Try to import neutronclient instead for havana+
|
||||
from neutronclient.v2_0 import client
|
||||
|
||||
auth_url = '%(auth_protocol)s://%(keystone_host)s:%(auth_port)s/v2.0' \
|
||||
% env
|
||||
quantum = client.Client(username=env['service_username'],
|
||||
password=env['service_password'],
|
||||
tenant_name=env['service_tenant'],
|
||||
auth_url=auth_url,
|
||||
region_name=env['region'])
|
||||
return quantum
|
||||
|
||||
def reassign_agent_resources(self, quantum=None):
|
||||
"""Use agent scheduler API to detect down agents and re-schedule"""
|
||||
if not quantum:
|
||||
LOG.error('Failed to get quantum client.')
|
||||
return
|
||||
|
||||
try:
|
||||
DHCP_AGENT = "DHCP Agent"
|
||||
L3_AGENT = "L3 Agent"
|
||||
agents = quantum.list_agents(agent_type=DHCP_AGENT)
|
||||
except exceptions.NeutronException as e:
|
||||
LOG.error('Failed to get quantum agents, %s' % e)
|
||||
return
|
||||
|
||||
dhcp_agents = []
|
||||
l3_agents = []
|
||||
networks = {}
|
||||
for agent in agents['agents']:
|
||||
hosted_networks = quantum.list_networks_on_dhcp_agent(
|
||||
agent['id'])['networks']
|
||||
if not agent['alive']:
|
||||
LOG.info('DHCP Agent %s down' % agent['id'])
|
||||
for network in hosted_networks:
|
||||
networks[network['id']] = agent['id']
|
||||
if self.is_same_host(agent['host']):
|
||||
self.cleanup_dhcp(networks)
|
||||
else:
|
||||
dhcp_agents.append(agent['id'])
|
||||
LOG.info('Active dhcp agents: %s' % agent['id'])
|
||||
if not hosted_networks and self.is_same_host(agent['host']):
|
||||
self.cleanup_dhcp(None)
|
||||
|
||||
agents = quantum.list_agents(agent_type=L3_AGENT)
|
||||
routers = {}
|
||||
for agent in agents['agents']:
|
||||
hosted_routers = quantum.list_routers_on_l3_agent(
|
||||
agent['id'])['routers']
|
||||
if not agent['alive']:
|
||||
LOG.info('L3 Agent %s down' % agent['id'])
|
||||
for router in hosted_routers:
|
||||
routers[router['id']] = agent['id']
|
||||
if self.is_same_host(agent['host']):
|
||||
self.cleanup_router(routers)
|
||||
else:
|
||||
l3_agents.append(agent['id'])
|
||||
LOG.info('Active l3 agents: %s' % agent['id'])
|
||||
if not hosted_routers and self.is_same_host(agent['host']):
|
||||
self.cleanup_router(None)
|
||||
|
||||
if not networks and not routers:
|
||||
LOG.info('No networks and routers hosted on failed agents.')
|
||||
return
|
||||
|
||||
if len(dhcp_agents) == 0 and len(l3_agents) == 0:
|
||||
LOG.error('Unable to relocate resources, there are %s dhcp_agents '
|
||||
'and %s l3_agents in this cluster' % (len(dhcp_agents),
|
||||
len(l3_agents)))
|
||||
return
|
||||
|
||||
if len(l3_agents) > 0:
|
||||
self.l3_agents_reschedule(l3_agents, routers, quantum)
|
||||
# new l3 node will not create a tunnel if don't restart ovs process
|
||||
|
||||
if len(dhcp_agents) > 0:
|
||||
self.dhcp_agents_reschedule(dhcp_agents, networks, quantum)
|
||||
|
||||
|
||||
def check_ovs_tunnel(self, quantum=None):
|
||||
if not quantum:
|
||||
LOG.error('Failed to get quantum client.')
|
||||
return
|
||||
|
||||
try:
|
||||
OVS_AGENT = 'Open vSwitch agent'
|
||||
agents = quantum.list_agents(agent_type=OVS_AGENT)
|
||||
except exceptions.NeutronException as e:
|
||||
LOG.error('No ovs agent found on localhost, error:%s.' % e)
|
||||
return
|
||||
|
||||
for agent in agents['agents']:
|
||||
if self.is_same_host(agent['host']):
|
||||
conf = agent['configurations']
|
||||
if 'gre' in conf['tunnel_types'] and conf['l2_population'] \
|
||||
and conf['devices']:
|
||||
LOG.warning('local ovs agent:%s' % agent)
|
||||
ovs_output = subprocess.check_output(['ovs-vsctl',
|
||||
'list-ports', 'br-tun'])
|
||||
ports = ovs_output.strip().split('\n')
|
||||
look_up_gre_port = False
|
||||
for port in ports:
|
||||
if port.startswith('gre-'):
|
||||
look_up_gre_port = True
|
||||
break
|
||||
if not look_up_gre_port:
|
||||
try:
|
||||
LOG.error('Found namespace, but no ovs tunnel is created,'
|
||||
'restart ovs agent.')
|
||||
cmd = ['sudo', 'service', 'neutron-plugin-openvswitch-agent',
|
||||
'restart']
|
||||
subprocess.call(cmd)
|
||||
except subprocess.CalledProcessError:
|
||||
LOG.error('Failed to restart neutron-plugin-openvswitch-agent.')
|
||||
|
||||
def check_local_agents(self):
|
||||
services = ['openvswitch-switch', 'neutron-dhcp-agent',
|
||||
'neutron-metadata-agent', 'neutron-vpn-agent']
|
||||
for s in services:
|
||||
status = ['sudo', 'service', s, 'status']
|
||||
restart = ['sudo', 'service', s, 'restart']
|
||||
start = ['sudo', 'service', s, 'start']
|
||||
stop = 'neutron-vpn-agent stop/waiting'
|
||||
try:
|
||||
output = subprocess.check_output(status)
|
||||
if output.strip() == stop:
|
||||
subprocess.check_output(start)
|
||||
if s == 'neutron-metadata-agent':
|
||||
subprocess.check_output(['sudo', 'service',
|
||||
'neutron-vpn-agent',
|
||||
'restart'])
|
||||
except subprocess.CalledProcessError:
|
||||
LOG.error('Restart service: %s' % s)
|
||||
subprocess.check_output(restart)
|
||||
if s == 'neutron-metadata-agent':
|
||||
subprocess.check_output(['sudo', 'service',
|
||||
'neutron-vpn-agent',
|
||||
'restart'])
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
LOG.info('Monitor Neutron HA Agent Loop Start')
|
||||
quantum = self.get_quantum_client()
|
||||
self.reassign_agent_resources(quantum=quantum)
|
||||
self.check_ovs_tunnel(quantum=quantum)
|
||||
self.check_local_agents()
|
||||
LOG.info('sleep %s' % cfg.CONF.check_interval)
|
||||
time.sleep(float(cfg.CONF.check_interval))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
opts = [
|
||||
cfg.StrOpt('check_interval',
|
||||
default=8,
|
||||
help='Check Neutron Agents interval.'),
|
||||
]
|
||||
|
||||
cfg.CONF.register_cli_opts(opts)
|
||||
cfg.CONF(project='monitor_neutron_agents', default_config_files=[])
|
||||
logging.setup('Neuron-HA-Monitor')
|
||||
monitor_daemon = MonitorNeutronAgentsDaemon()
|
||||
monitor_daemon.start()
|
0
hooks/charmhelpers/contrib/charmsupport/__init__.py
Normal file
0
hooks/charmhelpers/contrib/charmsupport/__init__.py
Normal file
308
hooks/charmhelpers/contrib/charmsupport/nrpe.py
Normal file
308
hooks/charmhelpers/contrib/charmsupport/nrpe.py
Normal file
@ -0,0 +1,308 @@
|
||||
"""Compatibility with the nrpe-external-master charm"""
|
||||
# Copyright 2012 Canonical Ltd.
|
||||
#
|
||||
# Authors:
|
||||
# Matthew Wedgwood <matthew.wedgwood@canonical.com>
|
||||
|
||||
import subprocess
|
||||
import pwd
|
||||
import grp
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import yaml
|
||||
|
||||
from charmhelpers.core.hookenv import (
|
||||
config,
|
||||
local_unit,
|
||||
log,
|
||||
relation_ids,
|
||||
relation_set,
|
||||
relations_of_type,
|
||||
)
|
||||
|
||||
from charmhelpers.core.host import service
|
||||
|
||||
# This module adds compatibility with the nrpe-external-master and plain nrpe
|
||||
# subordinate charms. To use it in your charm:
|
||||
#
|
||||
# 1. Update metadata.yaml
|
||||
#
|
||||
# provides:
|
||||
# (...)
|
||||
# nrpe-external-master:
|
||||
# interface: nrpe-external-master
|
||||
# scope: container
|
||||
#
|
||||
# and/or
|
||||
#
|
||||
# provides:
|
||||
# (...)
|
||||
# local-monitors:
|
||||
# interface: local-monitors
|
||||
# scope: container
|
||||
|
||||
#
|
||||
# 2. Add the following to config.yaml
|
||||
#
|
||||
# nagios_context:
|
||||
# default: "juju"
|
||||
# type: string
|
||||
# description: |
|
||||
# Used by the nrpe subordinate charms.
|
||||
# A string that will be prepended to instance name to set the host name
|
||||
# in nagios. So for instance the hostname would be something like:
|
||||
# juju-myservice-0
|
||||
# If you're running multiple environments with the same services in them
|
||||
# this allows you to differentiate between them.
|
||||
# nagios_servicegroups:
|
||||
# default: ""
|
||||
# type: string
|
||||
# description: |
|
||||
# A comma-separated list of nagios servicegroups.
|
||||
# If left empty, the nagios_context will be used as the servicegroup
|
||||
#
|
||||
# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
|
||||
#
|
||||
# 4. Update your hooks.py with something like this:
|
||||
#
|
||||
# from charmsupport.nrpe import NRPE
|
||||
# (...)
|
||||
# def update_nrpe_config():
|
||||
# nrpe_compat = NRPE()
|
||||
# nrpe_compat.add_check(
|
||||
# shortname = "myservice",
|
||||
# description = "Check MyService",
|
||||
# check_cmd = "check_http -w 2 -c 10 http://localhost"
|
||||
# )
|
||||
# nrpe_compat.add_check(
|
||||
# "myservice_other",
|
||||
# "Check for widget failures",
|
||||
# check_cmd = "/srv/myapp/scripts/widget_check"
|
||||
# )
|
||||
# nrpe_compat.write()
|
||||
#
|
||||
# def config_changed():
|
||||
# (...)
|
||||
# update_nrpe_config()
|
||||
#
|
||||
# def nrpe_external_master_relation_changed():
|
||||
# update_nrpe_config()
|
||||
#
|
||||
# def local_monitors_relation_changed():
|
||||
# update_nrpe_config()
|
||||
#
|
||||
# 5. ln -s hooks.py nrpe-external-master-relation-changed
|
||||
# ln -s hooks.py local-monitors-relation-changed
|
||||
|
||||
|
||||
class CheckException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Check(object):
|
||||
shortname_re = '[A-Za-z0-9-_]+$'
|
||||
service_template = ("""
|
||||
#---------------------------------------------------
|
||||
# This file is Juju managed
|
||||
#---------------------------------------------------
|
||||
define service {{
|
||||
use active-service
|
||||
host_name {nagios_hostname}
|
||||
service_description {nagios_hostname}[{shortname}] """
|
||||
"""{description}
|
||||
check_command check_nrpe!{command}
|
||||
servicegroups {nagios_servicegroup}
|
||||
}}
|
||||
""")
|
||||
|
||||
def __init__(self, shortname, description, check_cmd):
|
||||
super(Check, self).__init__()
|
||||
# XXX: could be better to calculate this from the service name
|
||||
if not re.match(self.shortname_re, shortname):
|
||||
raise CheckException("shortname must match {}".format(
|
||||
Check.shortname_re))
|
||||
self.shortname = shortname
|
||||
self.command = "check_{}".format(shortname)
|
||||
# Note: a set of invalid characters is defined by the
|
||||
# Nagios server config
|
||||
# The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
|
||||
self.description = description
|
||||
self.check_cmd = self._locate_cmd(check_cmd)
|
||||
|
||||
def _locate_cmd(self, check_cmd):
|
||||
search_path = (
|
||||
'/usr/lib/nagios/plugins',
|
||||
'/usr/local/lib/nagios/plugins',
|
||||
)
|
||||
parts = shlex.split(check_cmd)
|
||||
for path in search_path:
|
||||
if os.path.exists(os.path.join(path, parts[0])):
|
||||
command = os.path.join(path, parts[0])
|
||||
if len(parts) > 1:
|
||||
command += " " + " ".join(parts[1:])
|
||||
return command
|
||||
log('Check command not found: {}'.format(parts[0]))
|
||||
return ''
|
||||
|
||||
def write(self, nagios_context, hostname, nagios_servicegroups=None):
|
||||
nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format(
|
||||
self.command)
|
||||
with open(nrpe_check_file, 'w') as nrpe_check_config:
|
||||
nrpe_check_config.write("# check {}\n".format(self.shortname))
|
||||
nrpe_check_config.write("command[{}]={}\n".format(
|
||||
self.command, self.check_cmd))
|
||||
|
||||
if not os.path.exists(NRPE.nagios_exportdir):
|
||||
log('Not writing service config as {} is not accessible'.format(
|
||||
NRPE.nagios_exportdir))
|
||||
else:
|
||||
self.write_service_config(nagios_context, hostname,
|
||||
nagios_servicegroups)
|
||||
|
||||
def write_service_config(self, nagios_context, hostname,
|
||||
nagios_servicegroups=None):
|
||||
for f in os.listdir(NRPE.nagios_exportdir):
|
||||
if re.search('.*{}.cfg'.format(self.command), f):
|
||||
os.remove(os.path.join(NRPE.nagios_exportdir, f))
|
||||
|
||||
if not nagios_servicegroups:
|
||||
nagios_servicegroups = nagios_context
|
||||
|
||||
templ_vars = {
|
||||
'nagios_hostname': hostname,
|
||||
'nagios_servicegroup': nagios_servicegroups,
|
||||
'description': self.description,
|
||||
'shortname': self.shortname,
|
||||
'command': self.command,
|
||||
}
|
||||
nrpe_service_text = Check.service_template.format(**templ_vars)
|
||||
nrpe_service_file = '{}/service__{}_{}.cfg'.format(
|
||||
NRPE.nagios_exportdir, hostname, self.command)
|
||||
with open(nrpe_service_file, 'w') as nrpe_service_config:
|
||||
nrpe_service_config.write(str(nrpe_service_text))
|
||||
|
||||
def run(self):
|
||||
subprocess.call(self.check_cmd)
|
||||
|
||||
|
||||
class NRPE(object):
|
||||
nagios_logdir = '/var/log/nagios'
|
||||
nagios_exportdir = '/var/lib/nagios/export'
|
||||
nrpe_confdir = '/etc/nagios/nrpe.d'
|
||||
|
||||
def __init__(self, hostname=None):
|
||||
super(NRPE, self).__init__()
|
||||
self.config = config()
|
||||
self.nagios_context = self.config['nagios_context']
|
||||
if 'nagios_servicegroups' in self.config:
|
||||
self.nagios_servicegroups = self.config['nagios_servicegroups']
|
||||
else:
|
||||
self.nagios_servicegroups = 'juju'
|
||||
self.unit_name = local_unit().replace('/', '-')
|
||||
if hostname:
|
||||
self.hostname = hostname
|
||||
else:
|
||||
self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
|
||||
self.checks = []
|
||||
|
||||
def add_check(self, *args, **kwargs):
|
||||
self.checks.append(Check(*args, **kwargs))
|
||||
|
||||
def write(self):
|
||||
try:
|
||||
nagios_uid = pwd.getpwnam('nagios').pw_uid
|
||||
nagios_gid = grp.getgrnam('nagios').gr_gid
|
||||
except:
|
||||
log("Nagios user not set up, nrpe checks not updated")
|
||||
return
|
||||
|
||||
if not os.path.exists(NRPE.nagios_logdir):
|
||||
os.mkdir(NRPE.nagios_logdir)
|
||||
os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
|
||||
|
||||
nrpe_monitors = {}
|
||||
monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
|
||||
for nrpecheck in self.checks:
|
||||
nrpecheck.write(self.nagios_context, self.hostname,
|
||||
self.nagios_servicegroups)
|
||||
nrpe_monitors[nrpecheck.shortname] = {
|
||||
"command": nrpecheck.command,
|
||||
}
|
||||
|
||||
service('restart', 'nagios-nrpe-server')
|
||||
|
||||
for rid in relation_ids("local-monitors"):
|
||||
relation_set(relation_id=rid, monitors=yaml.dump(monitors))
|
||||
|
||||
|
||||
def get_nagios_hostcontext(relation_name='nrpe-external-master'):
|
||||
"""
|
||||
Query relation with nrpe subordinate, return the nagios_host_context
|
||||
|
||||
:param str relation_name: Name of relation nrpe sub joined to
|
||||
"""
|
||||
for rel in relations_of_type(relation_name):
|
||||
if 'nagios_hostname' in rel:
|
||||
return rel['nagios_host_context']
|
||||
|
||||
|
||||
def get_nagios_hostname(relation_name='nrpe-external-master'):
|
||||
"""
|
||||
Query relation with nrpe subordinate, return the nagios_hostname
|
||||
|
||||
:param str relation_name: Name of relation nrpe sub joined to
|
||||
"""
|
||||
for rel in relations_of_type(relation_name):
|
||||
if 'nagios_hostname' in rel:
|
||||
return rel['nagios_hostname']
|
||||
|
||||
|
||||
def get_nagios_unit_name(relation_name='nrpe-external-master'):
|
||||
"""
|
||||
Return the nagios unit name prepended with host_context if needed
|
||||
|
||||
:param str relation_name: Name of relation nrpe sub joined to
|
||||
"""
|
||||
host_context = get_nagios_hostcontext(relation_name)
|
||||
if host_context:
|
||||
unit = "%s:%s" % (host_context, local_unit())
|
||||
else:
|
||||
unit = local_unit()
|
||||
return unit
|
||||
|
||||
|
||||
def add_init_service_checks(nrpe, services, unit_name):
|
||||
"""
|
||||
Add checks for each service in list
|
||||
|
||||
:param NRPE nrpe: NRPE object to add check to
|
||||
:param list services: List of services to check
|
||||
:param str unit_name: Unit name to use in check description
|
||||
"""
|
||||
for svc in services:
|
||||
upstart_init = '/etc/init/%s.conf' % svc
|
||||
sysv_init = '/etc/init.d/%s' % svc
|
||||
if os.path.exists(upstart_init):
|
||||
nrpe.add_check(
|
||||
shortname=svc,
|
||||
description='process check {%s}' % unit_name,
|
||||
check_cmd='check_upstart_job %s' % svc
|
||||
)
|
||||
elif os.path.exists(sysv_init):
|
||||
cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
|
||||
cron_file = ('*/5 * * * * root '
|
||||
'/usr/local/lib/nagios/plugins/check_exit_status.pl '
|
||||
'-s /etc/init.d/%s status > '
|
||||
'/var/lib/nagios/service-check-%s.txt\n' % (svc,
|
||||
svc)
|
||||
)
|
||||
f = open(cronpath, 'w')
|
||||
f.write(cron_file)
|
||||
f.close()
|
||||
nrpe.add_check(
|
||||
shortname=svc,
|
||||
description='process check {%s}' % unit_name,
|
||||
check_cmd='check_status_file.py -f '
|
||||
'/var/lib/nagios/service-check-%s.txt' % svc,
|
||||
)
|
159
hooks/charmhelpers/contrib/charmsupport/volumes.py
Normal file
159
hooks/charmhelpers/contrib/charmsupport/volumes.py
Normal file
@ -0,0 +1,159 @@
|
||||
'''
|
||||
Functions for managing volumes in juju units. One volume is supported per unit.
|
||||
Subordinates may have their own storage, provided it is on its own partition.
|
||||
|
||||
Configuration stanzas::
|
||||
|
||||
volume-ephemeral:
|
||||
type: boolean
|
||||
default: true
|
||||
description: >
|
||||
If false, a volume is mounted as sepecified in "volume-map"
|
||||
If true, ephemeral storage will be used, meaning that log data
|
||||
will only exist as long as the machine. YOU HAVE BEEN WARNED.
|
||||
volume-map:
|
||||
type: string
|
||||
default: {}
|
||||
description: >
|
||||
YAML map of units to device names, e.g:
|
||||
"{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }"
|
||||
Service units will raise a configure-error if volume-ephemeral
|
||||
is 'true' and no volume-map value is set. Use 'juju set' to set a
|
||||
value and 'juju resolved' to complete configuration.
|
||||
|
||||
Usage::
|
||||
|
||||
from charmsupport.volumes import configure_volume, VolumeConfigurationError
|
||||
from charmsupport.hookenv import log, ERROR
|
||||
def post_mount_hook():
|
||||
stop_service('myservice')
|
||||
def post_mount_hook():
|
||||
start_service('myservice')
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
configure_volume(before_change=pre_mount_hook,
|
||||
after_change=post_mount_hook)
|
||||
except VolumeConfigurationError:
|
||||
log('Storage could not be configured', ERROR)
|
||||
|
||||
'''
|
||||
|
||||
# XXX: Known limitations
|
||||
# - fstab is neither consulted nor updated
|
||||
|
||||
import os
|
||||
from charmhelpers.core import hookenv
|
||||
from charmhelpers.core import host
|
||||
import yaml
|
||||
|
||||
|
||||
MOUNT_BASE = '/srv/juju/volumes'
|
||||
|
||||
|
||||
class VolumeConfigurationError(Exception):
|
||||
'''Volume configuration data is missing or invalid'''
|
||||
pass
|
||||
|
||||
|
||||
def get_config():
|
||||
'''Gather and sanity-check volume configuration data'''
|
||||
volume_config = {}
|
||||
config = hookenv.config()
|
||||
|
||||
errors = False
|
||||
|
||||
if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'):
|
||||
volume_config['ephemeral'] = True
|
||||
else:
|
||||
volume_config['ephemeral'] = False
|
||||
|
||||
try:
|
||||
volume_map = yaml.safe_load(config.get('volume-map', '{}'))
|
||||
except yaml.YAMLError as e:
|
||||
hookenv.log("Error parsing YAML volume-map: {}".format(e),
|
||||
hookenv.ERROR)
|
||||
errors = True
|
||||
if volume_map is None:
|
||||
# probably an empty string
|
||||
volume_map = {}
|
||||
elif not isinstance(volume_map, dict):
|
||||
hookenv.log("Volume-map should be a dictionary, not {}".format(
|
||||
type(volume_map)))
|
||||
errors = True
|
||||
|
||||
volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME'])
|
||||
if volume_config['device'] and volume_config['ephemeral']:
|
||||
# asked for ephemeral storage but also defined a volume ID
|
||||
hookenv.log('A volume is defined for this unit, but ephemeral '
|
||||
'storage was requested', hookenv.ERROR)
|
||||
errors = True
|
||||
elif not volume_config['device'] and not volume_config['ephemeral']:
|
||||
# asked for permanent storage but did not define volume ID
|
||||
hookenv.log('Ephemeral storage was requested, but there is no volume '
|
||||
'defined for this unit.', hookenv.ERROR)
|
||||
errors = True
|
||||
|
||||
unit_mount_name = hookenv.local_unit().replace('/', '-')
|
||||
volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name)
|
||||
|
||||
if errors:
|
||||
return None
|
||||
return volume_config
|
||||
|
||||
|
||||
def mount_volume(config):
|
||||
if os.path.exists(config['mountpoint']):
|
||||
if not os.path.isdir(config['mountpoint']):
|
||||
hookenv.log('Not a directory: {}'.format(config['mountpoint']))
|
||||
raise VolumeConfigurationError()
|
||||
else:
|
||||
host.mkdir(config['mountpoint'])
|
||||
if os.path.ismount(config['mountpoint']):
|
||||
unmount_volume(config)
|
||||
if not host.mount(config['device'], config['mountpoint'], persist=True):
|
||||
raise VolumeConfigurationError()
|
||||
|
||||
|
||||
def unmount_volume(config):
|
||||
if os.path.ismount(config['mountpoint']):
|
||||
if not host.umount(config['mountpoint'], persist=True):
|
||||
raise VolumeConfigurationError()
|
||||
|
||||
|
||||
def managed_mounts():
|
||||
'''List of all mounted managed volumes'''
|
||||
return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts())
|
||||
|
||||
|
||||
def configure_volume(before_change=lambda: None, after_change=lambda: None):
|
||||
'''Set up storage (or don't) according to the charm's volume configuration.
|
||||
Returns the mount point or "ephemeral". before_change and after_change
|
||||
are optional functions to be called if the volume configuration changes.
|
||||
'''
|
||||
|
||||
config = get_config()
|
||||
if not config:
|
||||
hookenv.log('Failed to read volume configuration', hookenv.CRITICAL)
|
||||
raise VolumeConfigurationError()
|
||||
|
||||
if config['ephemeral']:
|
||||
if os.path.ismount(config['mountpoint']):
|
||||
before_change()
|
||||
unmount_volume(config)
|
||||
after_change()
|
||||
return 'ephemeral'
|
||||
else:
|
||||
# persistent storage
|
||||
if os.path.ismount(config['mountpoint']):
|
||||
mounts = dict(managed_mounts())
|
||||
if mounts.get(config['mountpoint']) != config['device']:
|
||||
before_change()
|
||||
unmount_volume(config)
|
||||
mount_volume(config)
|
||||
after_change()
|
||||
else:
|
||||
before_change()
|
||||
mount_volume(config)
|
||||
after_change()
|
||||
return config['mountpoint']
|
@ -53,6 +53,7 @@ UBUNTU_OPENSTACK_RELEASE = OrderedDict([
|
||||
('saucy', 'havana'),
|
||||
('trusty', 'icehouse'),
|
||||
('utopic', 'juno'),
|
||||
('vivid', 'kilo'),
|
||||
])
|
||||
|
||||
|
||||
@ -64,6 +65,7 @@ OPENSTACK_CODENAMES = OrderedDict([
|
||||
('2013.2', 'havana'),
|
||||
('2014.1', 'icehouse'),
|
||||
('2014.2', 'juno'),
|
||||
('2015.1', 'kilo'),
|
||||
])
|
||||
|
||||
# The ugly duckling
|
||||
@ -84,6 +86,7 @@ SWIFT_CODENAMES = OrderedDict([
|
||||
('2.0.0', 'juno'),
|
||||
('2.1.0', 'juno'),
|
||||
('2.2.0', 'juno'),
|
||||
('2.2.1', 'kilo'),
|
||||
])
|
||||
|
||||
DEFAULT_LOOPBACK_SIZE = '5G'
|
||||
@ -289,6 +292,9 @@ def configure_installation_source(rel):
|
||||
'juno': 'trusty-updates/juno',
|
||||
'juno/updates': 'trusty-updates/juno',
|
||||
'juno/proposed': 'trusty-proposed/juno',
|
||||
'kilo': 'trusty-updates/kilo',
|
||||
'kilo/updates': 'trusty-updates/kilo',
|
||||
'kilo/proposed': 'trusty-proposed/kilo',
|
||||
}
|
||||
|
||||
try:
|
||||
|
@ -64,9 +64,16 @@ CLOUD_ARCHIVE_POCKETS = {
|
||||
'trusty-juno/updates': 'trusty-updates/juno',
|
||||
'trusty-updates/juno': 'trusty-updates/juno',
|
||||
'juno/proposed': 'trusty-proposed/juno',
|
||||
'juno/proposed': 'trusty-proposed/juno',
|
||||
'trusty-juno/proposed': 'trusty-proposed/juno',
|
||||
'trusty-proposed/juno': 'trusty-proposed/juno',
|
||||
# Kilo
|
||||
'kilo': 'trusty-updates/kilo',
|
||||
'trusty-kilo': 'trusty-updates/kilo',
|
||||
'trusty-kilo/updates': 'trusty-updates/kilo',
|
||||
'trusty-updates/kilo': 'trusty-updates/kilo',
|
||||
'kilo/proposed': 'trusty-proposed/kilo',
|
||||
'trusty-kilo/proposed': 'trusty-proposed/kilo',
|
||||
'trusty-proposed/kilo': 'trusty-proposed/kilo',
|
||||
}
|
||||
|
||||
# The order of this list is very important. Handlers should be listed in from
|
||||
|
1
hooks/nrpe-external-master-relation-changed
Symbolic link
1
hooks/nrpe-external-master-relation-changed
Symbolic link
@ -0,0 +1 @@
|
||||
quantum_hooks.py
|
1
hooks/nrpe-external-master-relation-joined
Symbolic link
1
hooks/nrpe-external-master-relation-joined
Symbolic link
@ -0,0 +1 @@
|
||||
quantum_hooks.py
|
@ -36,10 +36,13 @@ from charmhelpers.contrib.openstack.utils import (
|
||||
from charmhelpers.payload.execd import execd_preinstall
|
||||
from charmhelpers.core.sysctl import create as create_sysctl
|
||||
|
||||
from charmhelpers.contrib.charmsupport import nrpe
|
||||
|
||||
import sys
|
||||
from quantum_utils import (
|
||||
register_configs,
|
||||
restart_map,
|
||||
services,
|
||||
do_openstack_upgrade,
|
||||
get_packages,
|
||||
get_early_packages,
|
||||
@ -92,6 +95,7 @@ def config_changed():
|
||||
global CONFIGS
|
||||
if openstack_upgrade_available(get_common_package()):
|
||||
CONFIGS = do_openstack_upgrade()
|
||||
update_nrpe_config()
|
||||
|
||||
sysctl_dict = config('sysctl')
|
||||
if sysctl_dict:
|
||||
@ -234,6 +238,32 @@ def stop():
|
||||
stop_services()
|
||||
|
||||
|
||||
@hooks.hook('nrpe-external-master-relation-joined',
|
||||
'nrpe-external-master-relation-changed')
|
||||
def update_nrpe_config():
|
||||
# python-dbus is used by check_upstart_job
|
||||
apt_install('python-dbus')
|
||||
hostname = nrpe.get_nagios_hostname()
|
||||
current_unit = nrpe.get_nagios_unit_name()
|
||||
nrpe_setup = nrpe.NRPE(hostname=hostname)
|
||||
nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
|
||||
|
||||
cronpath = '/etc/cron.d/nagios-netns-check'
|
||||
cron_template = ('*/5 * * * * root '
|
||||
'/usr/local/lib/nagios/plugins/check_netns.sh '
|
||||
'> /var/lib/nagios/netns-check.txt\n'
|
||||
)
|
||||
f = open(cronpath, 'w')
|
||||
f.write(cron_template)
|
||||
f.close()
|
||||
nrpe_setup.add_check(
|
||||
shortname="netns",
|
||||
description='Network Namespace check {%s}' % current_unit,
|
||||
check_cmd='check_status_file.py -f /var/lib/nagios/netns-check.txt'
|
||||
)
|
||||
nrpe_setup.write()
|
||||
|
||||
|
||||
@hooks.hook('ha-relation-joined')
|
||||
@hooks.hook('ha-relation-changed')
|
||||
def ha_relation_joined():
|
||||
|
@ -16,6 +16,9 @@ description: |
|
||||
categories:
|
||||
- openstack
|
||||
provides:
|
||||
nrpe-external-master:
|
||||
interface: nrpe-external-master
|
||||
scope: container
|
||||
quantum-network-service:
|
||||
interface: quantum
|
||||
requires:
|
||||
|
@ -42,6 +42,7 @@ TO_PATCH = [
|
||||
'b64decode',
|
||||
'is_relation_made',
|
||||
'create_sysctl',
|
||||
'update_nrpe_config',
|
||||
'update_legacy_ha_files',
|
||||
'add_hostname_to_hosts'
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user