Merge from quantum-gateway chunk

2015-01-15 17:40:34 +08:00 · 2015-01-15 17:40:34 +08:00 · e777443c5a
commit e777443c5a
parent 76d9b3f1c6 30e0ba5bc9
15 changed files with 528 additions and 590 deletions
--- a/charm-helpers-hooks.yaml
+++ b/charm-helpers-hooks.yaml
@ -9,3 +9,4 @@ include:
    - contrib.python.packages
    - contrib.storage.linux
    - payload.execd
+    - contrib.charmsupport
--- a/config.yaml
+++ b/config.yaml
@ -104,6 +104,16 @@ options:
    default: nova
    type: string
    description: Database name
+  nagios_context:
+    default: "juju"
+    type: string
+    description: |
+      Used by the nrpe-external-master subordinate charm.
+      A string that will be prepended to instance name to set the host name
+      in nagios. So for instance the hostname would be something like:
+          juju-myservice-0
+      If you're running multiple environments with the same services in them
+      this allows you to differentiate between them.
  # Network configuration options
  # by default all access is over 'private-address'
  os-data-network:
--- a/files/NeutronAgentMon
+++ b/files/NeutronAgentMon
@ -1,155 +0,0 @@
-#!/bin/sh
-#
-#
-#	NeutronAgentMon OCF RA.
-#	Starts crm_mon in background which logs cluster status as
-#	html to the specified file.
-#
-#       Copyright 2014 Canonical Ltd.
-#
-#       Authors: Hui Xiang <hui.xiang@canonical.com>
-#                Edward Hope-Morley <edward.hope-morley@canonical.com>
-#
-# OCF instance parameters:
-#	OCF_RESKEY_file
-
-#######################################################################
-# Initialization:
-: ${OCF_FUNCTIONS=${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs}
-. ${OCF_FUNCTIONS}
-: ${__OCF_ACTION=$1}
-
-#######################################################################
-
-meta_data() {
-	cat <<END
-<?xml version="1.0"?>
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="NeutronAgentMon">
-<version>1.0</version>
-
-<longdesc lang="en">
-This is a NeutronAgentMon Resource Agent.
-It monitors the 'neutron-ha-monitor daemon' status.
-</longdesc>
-<shortdesc lang="en">Monitor '/usr/local/bin/neutron-ha-monitor.py' in the background.</shortdesc>
-
-<parameters>
-
-<parameter name="file" unique="0">
-<longdesc lang="en">
-The file we want to run as a daemon.
-</longdesc>
-<shortdesc lang="en">The file we want to run as a daemon.</shortdesc>
-<content type="string" default="/usr/local/bin/neutron-ha-monitor.py" />
-</parameter>
-
-</parameters>
-
-<actions>
-<action name="start"   timeout="20" />
-<action name="stop"    timeout="20" />
-<action name="monitor" depth="0"  timeout="20" interval="60" />
-<action name="meta-data"  timeout="5" />
-<action name="validate-all"  timeout="30" />
-</actions>
-</resource-agent>
-END
-}
-
-#######################################################################
-
-NeutronAgentMon_usage() {
-	cat <<END
-usage: $0 {start|stop|monitor|validate-all|meta-data}
-
-Expects to have a fully populated OCF RA-compliant environment set.
-END
-}
-
-NeutronAgentMon_exit() {
-    if [ $1 != 0 ]; then
-        exit $OCF_ERR_GENERIC
-    else
-        exit $OCF_SUCCESS
-    fi
-}
-
-NeutronAgentMon_start() {
-    pid=`sudo ps -aux | grep neutron-ha-m\[o\]nitor.py | awk -F' ' '{print $2}'`
-    if [ -z $pid ]; then
-        ocf_log info "[NeutronAgentMon_start] Start Monitor daemon."
-        sudo mkdir -p /var/log/neutron-ha
-        sudo python /usr/local/bin/neutron-ha-monitor.py \
-        --config-file /var/lib/juju-neutron-ha/neutron-ha-monitor.conf \
-        --log-file /var/log/neutron-ha/monitor.log >> /dev/null 2>&1 & echo $!
-        sleep 5
-    else
-        ocf_log warn "[NeutronAgentMon_start] Monitor daemon already running."
-    fi
-    NeutronAgentMon_exit $?
-}
-
-NeutronAgentMon_stop() {
-    pid=`sudo ps -aux | grep neutron-ha-m\[o\]nitor.py | awk -F' ' '{print $2}'`
-    if [ ! -z $pid ]; then
-        sudo kill -s 9 $pid
-        ocf_log info "[NeutronAgentMon_stop] Pid $pid is killed."
-    else
-        ocf_log warn "[NeutronAgentMon_stop] Monitor daemon already stopped."
-    fi
-    NeutronAgentMon_exit 0
-}
-
-NeutronAgentMon_monitor() {
-    pid=`sudo ps -aux | grep neutron-ha-m\[o\]nitor.py | awk -F' ' '{print $2}'`
-    if [ ! -z $pid ]; then
-        ocf_log info "[NeutronAgentMon_monitor] success."
-        exit $OCF_SUCCESS
-    fi
-    exit $OCF_NOT_RUNNING
-}
-
-NeutronAgentMon_validate() {
-# Existence of the user
-    if [ -f $OCF_RESKEY_file ]; then
-        echo "Validate OK"
-        return $OCF_SUCCESS
-    else
-        ocf_log err "The file $OCF_RESKEY_file does not exist!"
-	exit $OCF_ERR_ARGS
-    fi
-}
-
-if [ $# -ne 1 ]; then
-    NeutronAgentMon_usage
-    exit $OCF_ERR_ARGS
-fi
-
-: ${OCF_RESKEY_update:="15000"}
-: ${OCF_RESKEY_pidfile:="/tmp/NeutronAgentMon_${OCF_RESOURCE_INSTANCE}.pid"}
-: ${OCF_RESKEY_htmlfile:="/tmp/NeutronAgentMon_${OCF_RESOURCE_INSTANCE}.html"}
-
-OCF_RESKEY_update=`expr $OCF_RESKEY_update / 1000`
-
-case $__OCF_ACTION in
-meta-data)	meta_data
-		exit $OCF_SUCCESS
-		;;
-start)		NeutronAgentMon_start
-		;;
-stop)		NeutronAgentMon_stop
-		;;
-monitor)	NeutronAgentMon_monitor
-		;;
-validate-all)	NeutronAgentMon_validate
-		;;
-usage|help)	NeutronAgentMon_usage
-		exit $OCF_SUCCESS
-		;;
-*)		NeutronAgentMon_usage
-		exit $OCF_ERR_UNIMPLEMENTED
-		;;
-esac
-
-exit $?
--- a/files/neutron-ha-monitor.conf
+++ b/files/neutron-ha-monitor.conf
@ -1,4 +0,0 @@
-[DEFAULT]
-verbose=True
-#debug=True
-check_interval=8
--- a/files/neutron-ha-monitor.py
+++ b/files/neutron-ha-monitor.py
@ -1,430 +0,0 @@
-# Copyright 2014 Canonical Ltd.
-#
-# Authors: Hui Xiang <hui.xiang@canonical.com>
-#          Joshua Zhang <joshua.zhang@canonical.com>
-#          Edward Hope-Morley <edward.hope-morley@canonical.com>
-#
-
-"""
-Helpers for monitoring Neutron agents, reschedule failed agents,
-cleaned resources on failed nodes.
-"""
-
-import os
-import re
-import sys
-import signal
-import socket
-import subprocess
-import time
-
-from oslo.config import cfg
-from neutron.agent.linux import ovs_lib
-from neutron.agent.linux import ip_lib
-from neutron.common import exceptions
-from neutron.openstack.common import log as logging
-
-LOG = logging.getLogger(__name__)
-
-
-class Daemon(object):
-    """A generic daemon class.
-
-    Usage: subclass the Daemon class and override the run() method
-    """
-    def __init__(self, stdin='/dev/null', stdout='/dev/null',
-                 stderr='/dev/null', procname='python'):
-        self.stdin = stdin
-        self.stdout = stdout
-        self.stderr = stderr
-        self.procname = procname
-
-    def _fork(self):
-        try:
-            pid = os.fork()
-            if pid > 0:
-                sys.exit(0)
-        except OSError:
-            LOG.exception('Fork failed')
-            sys.exit(1)
-
-    def daemonize(self):
-        """Daemonize process by doing Stevens double fork."""
-        # fork first time
-        self._fork()
-
-        # decouple from parent environment
-        os.chdir("/")
-        os.setsid()
-        os.umask(0)
-        # fork second time
-        self._fork()
-
-        # redirect standard file descriptors
-        sys.stdout.flush()
-        sys.stderr.flush()
-        stdin = open(self.stdin, 'r')
-        stdout = open(self.stdout, 'a+')
-        stderr = open(self.stderr, 'a+', 0)
-        os.dup2(stdin.fileno(), sys.stdin.fileno())
-        os.dup2(stdout.fileno(), sys.stdout.fileno())
-        os.dup2(stderr.fileno(), sys.stderr.fileno())
-
-        signal.signal(signal.SIGTERM, self.handle_sigterm)
-
-    def handle_sigterm(self, signum, frame):
-        sys.exit(0)
-
-    def start(self):
-        """Start the daemon."""
-        self.daemonize()
-        self.run()
-
-    def run(self):
-        """Override this method when subclassing Daemon.
-
-        start() will call this method after the process has daemonized.
-        """
-        pass
-
-
-class MonitorNeutronAgentsDaemon(Daemon):
-    def __init__(self):
-        super(MonitorNeutronAgentsDaemon, self).__init__()
-        logging.setup('Neuron-HA-Monitor')
-        LOG.info('Monitor Neutron Agent Loop Init')
-        self.hostname = None
-        self.env = {}
-
-    def get_env(self):
-        envrc_f = '/etc/legacy_ha_envrc'
-        envrc_f_m = False
-        if os.path.isfile(envrc_f):
-            ctime = time.ctime(os.stat(envrc_f).st_ctime)
-            mtime = time.ctime(os.stat(envrc_f).st_mtime)
-            if ctime != mtime:
-                envrc_f_m = True
-
-            if not self.env or envrc_f_m:
-                with open(envrc_f, 'r') as f:
-                    for line in f:
-                        data = line.strip().split('=')
-                        if data and data[0] and data[1]:
-                            self.env[data[0]] = data[1]
-                        else:
-                            raise Exception("OpenStack env data uncomplete.")
-        return self.env
-
-    def get_hostname(self):
-        if not self.hostname:
-            self.hostname = socket.gethostname()
-        return self.hostname
-
-    def get_root_helper(self):
-        return 'sudo'
-
-    def list_monitor_res(self):
-        # List crm resource 'cl_monitor' running node
-        nodes = []
-        cmd = ['crm', 'resource', 'show', 'cl_monitor']
-        output = subprocess.check_output(cmd)
-        pattern = re.compile('resource cl_monitor is running on: (.*) ')
-        nodes = pattern.findall(output)
-        return nodes
-
-    def get_crm_res_lead_node(self):
-        nodes = self.list_monitor_res()
-        if nodes:
-            return nodes[0].strip()
-        else:
-            LOG.error('Failed to get crm resource.')
-            return None
-
-    def unplug_device(self, device):
-        try:
-            device.link.delete()
-        except RuntimeError:
-            root_helper = self.get_root_helper()
-            # Maybe the device is OVS port, so try to delete
-            bridge_name = ovs_lib.get_bridge_for_iface(root_helper,
-                                                       device.name)
-            if bridge_name:
-                bridge = ovs_lib.OVSBridge(bridge_name, root_helper)
-                bridge.delete_port(device.name)
-            else:
-                LOG.debug('Unable to find bridge for device: %s', device.name)
-
-    def get_pattern(self, key, text):
-        if not key or not text:
-            LOG.debug('Invalid key(%s) or text(%s)' % (key, text))
-            return None
-
-        pattern = re.compile('%s' % key)
-        result = pattern.findall(text)
-        return result
-
-    def _cleanup(self, key1, key2):
-        namespaces = []
-        if key1:
-            for k in key1.iterkeys():
-                namespaces.append(key2 + '-' + k)
-        else:
-            try:
-                cmd = ['sudo', 'ip', 'netns']
-                ns = subprocess.check_output(cmd)
-                namespaces = self.get_pattern('(%s.*)' % key2, ns)
-            except RuntimeError as e:
-                LOG.error('Failed to list namespace, (%s)' % e)
-
-        if namespaces:
-            LOG.info('Namespaces: %s is going to be deleted.' % namespaces)
-            self.destroy_namespaces(namespaces)
-
-    def cleanup_dhcp(self, networks):
-        self._cleanup(networks, 'qdhcp')
-
-    def cleanup_router(self, routers):
-        self._cleanup(routers, 'qrouter')
-
-    def destroy_namespaces(self, namespaces):
-        try:
-            root_helper = self.get_root_helper()
-            for namespace in namespaces:
-                ip = ip_lib.IPWrapper(root_helper, namespace)
-                if ip.netns.exists(namespace):
-                    for device in ip.get_devices(exclude_loopback=True):
-                        self.unplug_device(device)
-
-            ip.garbage_collect_namespace()
-        except Exception:
-            LOG.exception('Error unable to destroy namespace: %s', namespace)
-
-    def is_same_host(self, host):
-        return str(host).strip() == self.get_hostname()
-
-    def validate_reschedule(self):
-        crm_no_1_node = self.get_crm_res_lead_node()
-        if not crm_no_1_node:
-            LOG.error('No crm first node could be found.')
-            return False
-
-        if not self.is_same_host(crm_no_1_node):
-            LOG.warn('Only the first crm node %s could reschedule. '
-                     % crm_no_1_node)
-            return False
-        return True
-
-    def l3_agents_reschedule(self, l3_agents, routers, quantum):
-        if not self.validate_reschedule():
-            return
-
-        index = 0
-        for router_id in routers:
-            agent = index % len(l3_agents)
-            LOG.info('Moving router %s from %s to %s' %
-                     (router_id, routers[router_id], l3_agents[agent]))
-            try:
-                quantum.remove_router_from_l3_agent(l3_agent=routers[router_id],
-                                                    router_id=router_id)
-            except exceptions.NeutronException as e:
-                LOG.error('Remove router raised exception: %s' % e)
-            try:
-                quantum.add_router_to_l3_agent(l3_agent=l3_agents[agent],
-                                               body={'router_id': router_id})
-            except exceptions.NeutronException as e:
-                LOG.error('Add router raised exception: %s' % e)
-            index += 1
-
-    def dhcp_agents_reschedule(self, dhcp_agents, networks, quantum):
-        if not self.validate_reschedule():
-            return
-
-        index = 0
-        for network_id in networks:
-            agent = index % len(dhcp_agents)
-            LOG.info('Moving network %s from %s to %s' % (network_id,
-                     networks[network_id], dhcp_agents[agent]))
-            try:
-                quantum.remove_network_from_dhcp_agent(
-                    dhcp_agent=networks[network_id], network_id=network_id)
-            except exceptions.NeutronException as e:
-                LOG.error('Remove network raised exception: %s' % e)
-            try:
-                quantum.add_network_to_dhcp_agent(
-                    dhcp_agent=dhcp_agents[agent],
-                    body={'network_id': network_id})
-            except exceptions.NeutronException as e:
-                LOG.error('Add network raised exception: %s' % e)
-            index += 1
-
-    def get_quantum_client(self):
-        env = self.get_env()
-        if not env:
-            LOG.info('Unable to re-assign resources at this time')
-            return None
-
-        try:
-            from quantumclient.v2_0 import client
-        except ImportError:
-            # Try to import neutronclient instead for havana+
-            from neutronclient.v2_0 import client
-
-        auth_url = '%(auth_protocol)s://%(keystone_host)s:%(auth_port)s/v2.0' \
-                   % env
-        quantum = client.Client(username=env['service_username'],
-                                password=env['service_password'],
-                                tenant_name=env['service_tenant'],
-                                auth_url=auth_url,
-                                region_name=env['region'])
-        return quantum
-
-    def reassign_agent_resources(self, quantum=None):
-        """Use agent scheduler API to detect down agents and re-schedule"""
-        if not quantum:
-            LOG.error('Failed to get quantum client.')
-            return
-
-        try:
-            DHCP_AGENT = "DHCP Agent"
-            L3_AGENT = "L3 Agent"
-            agents = quantum.list_agents(agent_type=DHCP_AGENT)
-        except exceptions.NeutronException as e:
-            LOG.error('Failed to get quantum agents, %s' % e)
-            return
-
-        dhcp_agents = []
-        l3_agents = []
-        networks = {}
-        for agent in agents['agents']:
-            hosted_networks = quantum.list_networks_on_dhcp_agent(
-                agent['id'])['networks']
-            if not agent['alive']:
-                LOG.info('DHCP Agent %s down' % agent['id'])
-                for network in hosted_networks:
-                    networks[network['id']] = agent['id']
-                if self.is_same_host(agent['host']):
-                    self.cleanup_dhcp(networks)
-            else:
-                dhcp_agents.append(agent['id'])
-                LOG.info('Active dhcp agents: %s' % agent['id'])
-                if not hosted_networks and self.is_same_host(agent['host']):
-                    self.cleanup_dhcp(None)
-
-        agents = quantum.list_agents(agent_type=L3_AGENT)
-        routers = {}
-        for agent in agents['agents']:
-            hosted_routers = quantum.list_routers_on_l3_agent(
-                agent['id'])['routers']
-            if not agent['alive']:
-                LOG.info('L3 Agent %s down' % agent['id'])
-                for router in hosted_routers:
-                    routers[router['id']] = agent['id']
-                if self.is_same_host(agent['host']):
-                    self.cleanup_router(routers)
-            else:
-                l3_agents.append(agent['id'])
-                LOG.info('Active l3 agents: %s' % agent['id'])
-                if not hosted_routers and self.is_same_host(agent['host']):
-                    self.cleanup_router(None)
-
-        if not networks and not routers:
-            LOG.info('No networks and routers hosted on failed agents.')
-            return
-
-        if len(dhcp_agents) == 0 and len(l3_agents) == 0:
-            LOG.error('Unable to relocate resources, there are %s dhcp_agents '
-                      'and %s l3_agents in this cluster' % (len(dhcp_agents),
-                                                            len(l3_agents)))
-            return
-
-        if len(l3_agents) > 0:
-            self.l3_agents_reschedule(l3_agents, routers, quantum)
-            # new l3 node will not create a tunnel if don't restart ovs process
-
-        if len(dhcp_agents) > 0:
-            self.dhcp_agents_reschedule(dhcp_agents, networks, quantum)
-
-
-    def check_ovs_tunnel(self, quantum=None):
-        if not quantum:
-            LOG.error('Failed to get quantum client.')
-            return
-
-        try:
-            OVS_AGENT = 'Open vSwitch agent'
-            agents = quantum.list_agents(agent_type=OVS_AGENT)
-        except exceptions.NeutronException as e:
-            LOG.error('No ovs agent found on localhost, error:%s.' % e)
-            return
-
-        for agent in agents['agents']:
-            if self.is_same_host(agent['host']):
-                conf = agent['configurations']
-                if 'gre' in conf['tunnel_types'] and conf['l2_population'] \
-                        and conf['devices']:
-                    LOG.warning('local ovs agent:%s' % agent)
-                    ovs_output = subprocess.check_output(['ovs-vsctl',
-                                                          'list-ports', 'br-tun'])
-                    ports = ovs_output.strip().split('\n')
-                    look_up_gre_port = False
-                    for port in ports:
-                        if port.startswith('gre-'):
-                            look_up_gre_port = True
-                            break
-                    if not look_up_gre_port:
-                        try:
-                            LOG.error('Found namespace, but no ovs tunnel is created,'
-                                      'restart ovs agent.')
-                            cmd = ['sudo', 'service', 'neutron-plugin-openvswitch-agent',
-                                   'restart']
-                            subprocess.call(cmd)
-                        except subprocess.CalledProcessError:
-                            LOG.error('Failed to restart neutron-plugin-openvswitch-agent.')
-
-    def check_local_agents(self):
-        services = ['openvswitch-switch', 'neutron-dhcp-agent',
-                    'neutron-metadata-agent', 'neutron-vpn-agent']
-        for s in services:
-            status = ['sudo', 'service', s, 'status']
-            restart = ['sudo', 'service', s, 'restart']
-            start = ['sudo', 'service', s, 'start']
-            stop = 'neutron-vpn-agent stop/waiting'
-            try:
-                output = subprocess.check_output(status)
-                if output.strip() == stop:
-                    subprocess.check_output(start)
-                    if s == 'neutron-metadata-agent':
-                        subprocess.check_output(['sudo', 'service',
-                                                 'neutron-vpn-agent',
-                                                 'restart'])
-            except subprocess.CalledProcessError:
-                LOG.error('Restart service: %s' % s)
-                subprocess.check_output(restart)
-                if s == 'neutron-metadata-agent':
-                    subprocess.check_output(['sudo', 'service',
-                                             'neutron-vpn-agent',
-                                             'restart'])
-
-    def run(self):
-        while True:
-            LOG.info('Monitor Neutron HA Agent Loop Start')
-            quantum = self.get_quantum_client()
-            self.reassign_agent_resources(quantum=quantum)
-            self.check_ovs_tunnel(quantum=quantum)
-            self.check_local_agents()
-            LOG.info('sleep %s' % cfg.CONF.check_interval)
-            time.sleep(float(cfg.CONF.check_interval))
-
-
-if __name__ == '__main__':
-    opts = [
-        cfg.StrOpt('check_interval',
-                   default=8,
-                   help='Check Neutron Agents interval.'),
-    ]
-
-    cfg.CONF.register_cli_opts(opts)
-    cfg.CONF(project='monitor_neutron_agents', default_config_files=[])
-    logging.setup('Neuron-HA-Monitor')
-    monitor_daemon = MonitorNeutronAgentsDaemon()
-    monitor_daemon.start()
--- a/hooks/charmhelpers/contrib/charmsupport/init.py
+++ b/hooks/charmhelpers/contrib/charmsupport/init.py
--- a/hooks/charmhelpers/contrib/charmsupport/nrpe.py
+++ b/hooks/charmhelpers/contrib/charmsupport/nrpe.py
@ -0,0 +1,308 @@
+"""Compatibility with the nrpe-external-master charm"""
+# Copyright 2012 Canonical Ltd.
+#
+# Authors:
+#  Matthew Wedgwood <matthew.wedgwood@canonical.com>
+
+import subprocess
+import pwd
+import grp
+import os
+import re
+import shlex
+import yaml
+
+from charmhelpers.core.hookenv import (
+    config,
+    local_unit,
+    log,
+    relation_ids,
+    relation_set,
+    relations_of_type,
+)
+
+from charmhelpers.core.host import service
+
+# This module adds compatibility with the nrpe-external-master and plain nrpe
+# subordinate charms. To use it in your charm:
+#
+# 1. Update metadata.yaml
+#
+#   provides:
+#     (...)
+#     nrpe-external-master:
+#       interface: nrpe-external-master
+#       scope: container
+#
+#   and/or
+#
+#   provides:
+#     (...)
+#     local-monitors:
+#       interface: local-monitors
+#       scope: container
+
+#
+# 2. Add the following to config.yaml
+#
+#    nagios_context:
+#      default: "juju"
+#      type: string
+#      description: |
+#        Used by the nrpe subordinate charms.
+#        A string that will be prepended to instance name to set the host name
+#        in nagios. So for instance the hostname would be something like:
+#            juju-myservice-0
+#        If you're running multiple environments with the same services in them
+#        this allows you to differentiate between them.
+#    nagios_servicegroups:
+#      default: ""
+#      type: string
+#      description: |
+#        A comma-separated list of nagios servicegroups.
+#        If left empty, the nagios_context will be used as the servicegroup
+#
+# 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
+#
+# 4. Update your hooks.py with something like this:
+#
+#    from charmsupport.nrpe import NRPE
+#    (...)
+#    def update_nrpe_config():
+#        nrpe_compat = NRPE()
+#        nrpe_compat.add_check(
+#            shortname = "myservice",
+#            description = "Check MyService",
+#            check_cmd = "check_http -w 2 -c 10 http://localhost"
+#            )
+#        nrpe_compat.add_check(
+#            "myservice_other",
+#            "Check for widget failures",
+#            check_cmd = "/srv/myapp/scripts/widget_check"
+#            )
+#        nrpe_compat.write()
+#
+#    def config_changed():
+#        (...)
+#        update_nrpe_config()
+#
+#    def nrpe_external_master_relation_changed():
+#        update_nrpe_config()
+#
+#    def local_monitors_relation_changed():
+#        update_nrpe_config()
+#
+# 5. ln -s hooks.py nrpe-external-master-relation-changed
+#    ln -s hooks.py local-monitors-relation-changed
+
+
+class CheckException(Exception):
+    pass
+
+
+class Check(object):
+    shortname_re = '[A-Za-z0-9-_]+$'
+    service_template = ("""
+#---------------------------------------------------
+# This file is Juju managed
+#---------------------------------------------------
+define service {{
+    use                             active-service
+    host_name                       {nagios_hostname}
+    service_description             {nagios_hostname}[{shortname}] """
+                        """{description}
+    check_command                   check_nrpe!{command}
+    servicegroups                   {nagios_servicegroup}
+}}
+""")
+
+    def __init__(self, shortname, description, check_cmd):
+        super(Check, self).__init__()
+        # XXX: could be better to calculate this from the service name
+        if not re.match(self.shortname_re, shortname):
+            raise CheckException("shortname must match {}".format(
+                Check.shortname_re))
+        self.shortname = shortname
+        self.command = "check_{}".format(shortname)
+        # Note: a set of invalid characters is defined by the
+        # Nagios server config
+        # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
+        self.description = description
+        self.check_cmd = self._locate_cmd(check_cmd)
+
+    def _locate_cmd(self, check_cmd):
+        search_path = (
+            '/usr/lib/nagios/plugins',
+            '/usr/local/lib/nagios/plugins',
+        )
+        parts = shlex.split(check_cmd)
+        for path in search_path:
+            if os.path.exists(os.path.join(path, parts[0])):
+                command = os.path.join(path, parts[0])
+                if len(parts) > 1:
+                    command += " " + " ".join(parts[1:])
+                return command
+        log('Check command not found: {}'.format(parts[0]))
+        return ''
+
+    def write(self, nagios_context, hostname, nagios_servicegroups=None):
+        nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format(
+            self.command)
+        with open(nrpe_check_file, 'w') as nrpe_check_config:
+            nrpe_check_config.write("# check {}\n".format(self.shortname))
+            nrpe_check_config.write("command[{}]={}\n".format(
+                self.command, self.check_cmd))
+
+        if not os.path.exists(NRPE.nagios_exportdir):
+            log('Not writing service config as {} is not accessible'.format(
+                NRPE.nagios_exportdir))
+        else:
+            self.write_service_config(nagios_context, hostname,
+                                      nagios_servicegroups)
+
+    def write_service_config(self, nagios_context, hostname,
+                             nagios_servicegroups=None):
+        for f in os.listdir(NRPE.nagios_exportdir):
+            if re.search('.*{}.cfg'.format(self.command), f):
+                os.remove(os.path.join(NRPE.nagios_exportdir, f))
+
+        if not nagios_servicegroups:
+            nagios_servicegroups = nagios_context
+
+        templ_vars = {
+            'nagios_hostname': hostname,
+            'nagios_servicegroup': nagios_servicegroups,
+            'description': self.description,
+            'shortname': self.shortname,
+            'command': self.command,
+        }
+        nrpe_service_text = Check.service_template.format(**templ_vars)
+        nrpe_service_file = '{}/service__{}_{}.cfg'.format(
+            NRPE.nagios_exportdir, hostname, self.command)
+        with open(nrpe_service_file, 'w') as nrpe_service_config:
+            nrpe_service_config.write(str(nrpe_service_text))
+
+    def run(self):
+        subprocess.call(self.check_cmd)
+
+
+class NRPE(object):
+    nagios_logdir = '/var/log/nagios'
+    nagios_exportdir = '/var/lib/nagios/export'
+    nrpe_confdir = '/etc/nagios/nrpe.d'
+
+    def __init__(self, hostname=None):
+        super(NRPE, self).__init__()
+        self.config = config()
+        self.nagios_context = self.config['nagios_context']
+        if 'nagios_servicegroups' in self.config:
+            self.nagios_servicegroups = self.config['nagios_servicegroups']
+        else:
+            self.nagios_servicegroups = 'juju'
+        self.unit_name = local_unit().replace('/', '-')
+        if hostname:
+            self.hostname = hostname
+        else:
+            self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
+        self.checks = []
+
+    def add_check(self, *args, **kwargs):
+        self.checks.append(Check(*args, **kwargs))
+
+    def write(self):
+        try:
+            nagios_uid = pwd.getpwnam('nagios').pw_uid
+            nagios_gid = grp.getgrnam('nagios').gr_gid
+        except:
+            log("Nagios user not set up, nrpe checks not updated")
+            return
+
+        if not os.path.exists(NRPE.nagios_logdir):
+            os.mkdir(NRPE.nagios_logdir)
+            os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
+
+        nrpe_monitors = {}
+        monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
+        for nrpecheck in self.checks:
+            nrpecheck.write(self.nagios_context, self.hostname,
+                            self.nagios_servicegroups)
+            nrpe_monitors[nrpecheck.shortname] = {
+                "command": nrpecheck.command,
+            }
+
+        service('restart', 'nagios-nrpe-server')
+
+        for rid in relation_ids("local-monitors"):
+            relation_set(relation_id=rid, monitors=yaml.dump(monitors))
+
+
+def get_nagios_hostcontext(relation_name='nrpe-external-master'):
+    """
+    Query relation with nrpe subordinate, return the nagios_host_context
+
+    :param str relation_name: Name of relation nrpe sub joined to
+    """
+    for rel in relations_of_type(relation_name):
+        if 'nagios_hostname' in rel:
+            return rel['nagios_host_context']
+
+
+def get_nagios_hostname(relation_name='nrpe-external-master'):
+    """
+    Query relation with nrpe subordinate, return the nagios_hostname
+
+    :param str relation_name: Name of relation nrpe sub joined to
+    """
+    for rel in relations_of_type(relation_name):
+        if 'nagios_hostname' in rel:
+            return rel['nagios_hostname']
+
+
+def get_nagios_unit_name(relation_name='nrpe-external-master'):
+    """
+    Return the nagios unit name prepended with host_context if needed
+
+    :param str relation_name: Name of relation nrpe sub joined to
+    """
+    host_context = get_nagios_hostcontext(relation_name)
+    if host_context:
+        unit = "%s:%s" % (host_context, local_unit())
+    else:
+        unit = local_unit()
+    return unit
+
+
+def add_init_service_checks(nrpe, services, unit_name):
+    """
+    Add checks for each service in list
+
+    :param NRPE nrpe: NRPE object to add check to
+    :param list services: List of services to check
+    :param str unit_name: Unit name to use in check description
+    """
+    for svc in services:
+        upstart_init = '/etc/init/%s.conf' % svc
+        sysv_init = '/etc/init.d/%s' % svc
+        if os.path.exists(upstart_init):
+            nrpe.add_check(
+                shortname=svc,
+                description='process check {%s}' % unit_name,
+                check_cmd='check_upstart_job %s' % svc
+            )
+        elif os.path.exists(sysv_init):
+            cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
+            cron_file = ('*/5 * * * * root '
+                         '/usr/local/lib/nagios/plugins/check_exit_status.pl '
+                         '-s /etc/init.d/%s status > '
+                         '/var/lib/nagios/service-check-%s.txt\n' % (svc,
+                                                                     svc)
+                         )
+            f = open(cronpath, 'w')
+            f.write(cron_file)
+            f.close()
+            nrpe.add_check(
+                shortname=svc,
+                description='process check {%s}' % unit_name,
+                check_cmd='check_status_file.py -f '
+                          '/var/lib/nagios/service-check-%s.txt' % svc,
+            )
--- a/hooks/charmhelpers/contrib/charmsupport/volumes.py
+++ b/hooks/charmhelpers/contrib/charmsupport/volumes.py
@ -0,0 +1,159 @@
+'''
+Functions for managing volumes in juju units. One volume is supported per unit.
+Subordinates may have their own storage, provided it is on its own partition.
+
+Configuration stanzas::
+
+  volume-ephemeral:
+    type: boolean
+    default: true
+    description: >
+      If false, a volume is mounted as sepecified in "volume-map"
+      If true, ephemeral storage will be used, meaning that log data
+         will only exist as long as the machine. YOU HAVE BEEN WARNED.
+  volume-map:
+    type: string
+    default: {}
+    description: >
+      YAML map of units to device names, e.g:
+        "{ rsyslog/0: /dev/vdb, rsyslog/1: /dev/vdb }"
+      Service units will raise a configure-error if volume-ephemeral
+      is 'true' and no volume-map value is set. Use 'juju set' to set a
+      value and 'juju resolved' to complete configuration.
+
+Usage::
+
+    from charmsupport.volumes import configure_volume, VolumeConfigurationError
+    from charmsupport.hookenv import log, ERROR
+    def post_mount_hook():
+        stop_service('myservice')
+    def post_mount_hook():
+        start_service('myservice')
+
+    if __name__ == '__main__':
+        try:
+            configure_volume(before_change=pre_mount_hook,
+                             after_change=post_mount_hook)
+        except VolumeConfigurationError:
+            log('Storage could not be configured', ERROR)
+
+'''
+
+# XXX: Known limitations
+# - fstab is neither consulted nor updated
+
+import os
+from charmhelpers.core import hookenv
+from charmhelpers.core import host
+import yaml
+
+
+MOUNT_BASE = '/srv/juju/volumes'
+
+
+class VolumeConfigurationError(Exception):
+    '''Volume configuration data is missing or invalid'''
+    pass
+
+
+def get_config():
+    '''Gather and sanity-check volume configuration data'''
+    volume_config = {}
+    config = hookenv.config()
+
+    errors = False
+
+    if config.get('volume-ephemeral') in (True, 'True', 'true', 'Yes', 'yes'):
+        volume_config['ephemeral'] = True
+    else:
+        volume_config['ephemeral'] = False
+
+    try:
+        volume_map = yaml.safe_load(config.get('volume-map', '{}'))
+    except yaml.YAMLError as e:
+        hookenv.log("Error parsing YAML volume-map: {}".format(e),
+                    hookenv.ERROR)
+        errors = True
+    if volume_map is None:
+        # probably an empty string
+        volume_map = {}
+    elif not isinstance(volume_map, dict):
+        hookenv.log("Volume-map should be a dictionary, not {}".format(
+            type(volume_map)))
+        errors = True
+
+    volume_config['device'] = volume_map.get(os.environ['JUJU_UNIT_NAME'])
+    if volume_config['device'] and volume_config['ephemeral']:
+        # asked for ephemeral storage but also defined a volume ID
+        hookenv.log('A volume is defined for this unit, but ephemeral '
+                    'storage was requested', hookenv.ERROR)
+        errors = True
+    elif not volume_config['device'] and not volume_config['ephemeral']:
+        # asked for permanent storage but did not define volume ID
+        hookenv.log('Ephemeral storage was requested, but there is no volume '
+                    'defined for this unit.', hookenv.ERROR)
+        errors = True
+
+    unit_mount_name = hookenv.local_unit().replace('/', '-')
+    volume_config['mountpoint'] = os.path.join(MOUNT_BASE, unit_mount_name)
+
+    if errors:
+        return None
+    return volume_config
+
+
+def mount_volume(config):
+    if os.path.exists(config['mountpoint']):
+        if not os.path.isdir(config['mountpoint']):
+            hookenv.log('Not a directory: {}'.format(config['mountpoint']))
+            raise VolumeConfigurationError()
+    else:
+        host.mkdir(config['mountpoint'])
+    if os.path.ismount(config['mountpoint']):
+        unmount_volume(config)
+    if not host.mount(config['device'], config['mountpoint'], persist=True):
+        raise VolumeConfigurationError()
+
+
+def unmount_volume(config):
+    if os.path.ismount(config['mountpoint']):
+        if not host.umount(config['mountpoint'], persist=True):
+            raise VolumeConfigurationError()
+
+
+def managed_mounts():
+    '''List of all mounted managed volumes'''
+    return filter(lambda mount: mount[0].startswith(MOUNT_BASE), host.mounts())
+
+
+def configure_volume(before_change=lambda: None, after_change=lambda: None):
+    '''Set up storage (or don't) according to the charm's volume configuration.
+       Returns the mount point or "ephemeral". before_change and after_change
+       are optional functions to be called if the volume configuration changes.
+    '''
+
+    config = get_config()
+    if not config:
+        hookenv.log('Failed to read volume configuration', hookenv.CRITICAL)
+        raise VolumeConfigurationError()
+
+    if config['ephemeral']:
+        if os.path.ismount(config['mountpoint']):
+            before_change()
+            unmount_volume(config)
+            after_change()
+        return 'ephemeral'
+    else:
+        # persistent storage
+        if os.path.ismount(config['mountpoint']):
+            mounts = dict(managed_mounts())
+            if mounts.get(config['mountpoint']) != config['device']:
+                before_change()
+                unmount_volume(config)
+                mount_volume(config)
+                after_change()
+        else:
+            before_change()
+            mount_volume(config)
+            after_change()
+        return config['mountpoint']
--- a/hooks/charmhelpers/contrib/openstack/utils.py
+++ b/hooks/charmhelpers/contrib/openstack/utils.py
@ -53,6 +53,7 @@ UBUNTU_OPENSTACK_RELEASE = OrderedDict([
    ('saucy', 'havana'),
    ('trusty', 'icehouse'),
    ('utopic', 'juno'),
+    ('vivid', 'kilo'),
 ])


@ -64,6 +65,7 @@ OPENSTACK_CODENAMES = OrderedDict([
    ('2013.2', 'havana'),
    ('2014.1', 'icehouse'),
    ('2014.2', 'juno'),
+    ('2015.1', 'kilo'),
 ])

 # The ugly duckling
@ -84,6 +86,7 @@ SWIFT_CODENAMES = OrderedDict([
    ('2.0.0', 'juno'),
    ('2.1.0', 'juno'),
    ('2.2.0', 'juno'),
+    ('2.2.1', 'kilo'),
 ])

 DEFAULT_LOOPBACK_SIZE = '5G'
@ -289,6 +292,9 @@ def configure_installation_source(rel):
            'juno': 'trusty-updates/juno',
            'juno/updates': 'trusty-updates/juno',
            'juno/proposed': 'trusty-proposed/juno',
+            'kilo': 'trusty-updates/kilo',
+            'kilo/updates': 'trusty-updates/kilo',
+            'kilo/proposed': 'trusty-proposed/kilo',
        }

        try:
--- a/hooks/charmhelpers/fetch/init.py
+++ b/hooks/charmhelpers/fetch/init.py
@ -64,9 +64,16 @@ CLOUD_ARCHIVE_POCKETS = {
    'trusty-juno/updates': 'trusty-updates/juno',
    'trusty-updates/juno': 'trusty-updates/juno',
    'juno/proposed': 'trusty-proposed/juno',
-    'juno/proposed': 'trusty-proposed/juno',
    'trusty-juno/proposed': 'trusty-proposed/juno',
    'trusty-proposed/juno': 'trusty-proposed/juno',
+    # Kilo
+    'kilo': 'trusty-updates/kilo',
+    'trusty-kilo': 'trusty-updates/kilo',
+    'trusty-kilo/updates': 'trusty-updates/kilo',
+    'trusty-updates/kilo': 'trusty-updates/kilo',
+    'kilo/proposed': 'trusty-proposed/kilo',
+    'trusty-kilo/proposed': 'trusty-proposed/kilo',
+    'trusty-proposed/kilo': 'trusty-proposed/kilo',
 }

 # The order of this list is very important. Handlers should be listed in from
--- a/hooks/nrpe-external-master-relation-changed
+++ b/hooks/nrpe-external-master-relation-changed
@ -0,0 +1 @@
+quantum_hooks.py
--- a/hooks/nrpe-external-master-relation-joined
+++ b/hooks/nrpe-external-master-relation-joined
@ -0,0 +1 @@
+quantum_hooks.py
--- a/hooks/quantum_hooks.py
+++ b/hooks/quantum_hooks.py
@ -36,10 +36,13 @@ from charmhelpers.contrib.openstack.utils import (
 from charmhelpers.payload.execd import execd_preinstall
 from charmhelpers.core.sysctl import create as create_sysctl

+from charmhelpers.contrib.charmsupport import nrpe
+
 import sys
 from quantum_utils import (
    register_configs,
    restart_map,
+    services,
    do_openstack_upgrade,
    get_packages,
    get_early_packages,
@ -92,6 +95,7 @@ def config_changed():
    global CONFIGS
    if openstack_upgrade_available(get_common_package()):
        CONFIGS = do_openstack_upgrade()
+    update_nrpe_config()

    sysctl_dict = config('sysctl')
    if sysctl_dict:
@ -234,6 +238,32 @@ def stop():
    stop_services()


+@hooks.hook('nrpe-external-master-relation-joined',
+            'nrpe-external-master-relation-changed')
+def update_nrpe_config():
+    # python-dbus is used by check_upstart_job
+    apt_install('python-dbus')
+    hostname = nrpe.get_nagios_hostname()
+    current_unit = nrpe.get_nagios_unit_name()
+    nrpe_setup = nrpe.NRPE(hostname=hostname)
+    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
+
+    cronpath = '/etc/cron.d/nagios-netns-check'
+    cron_template = ('*/5 * * * * root '
+                     '/usr/local/lib/nagios/plugins/check_netns.sh '
+                     '> /var/lib/nagios/netns-check.txt\n'
+                     )
+    f = open(cronpath, 'w')
+    f.write(cron_template)
+    f.close()
+    nrpe_setup.add_check(
+        shortname="netns",
+        description='Network Namespace check {%s}' % current_unit,
+        check_cmd='check_status_file.py -f /var/lib/nagios/netns-check.txt'
+        )
+    nrpe_setup.write()
+
+
@hooks.hook('ha-relation-joined')
@hooks.hook('ha-relation-changed')
 def ha_relation_joined():
--- a/metadata.yaml
+++ b/metadata.yaml
@ -16,6 +16,9 @@ description: |
 categories:
    - openstack
 provides:
+  nrpe-external-master:
+    interface: nrpe-external-master
+    scope: container
  quantum-network-service:
    interface: quantum
 requires:
--- a/unit_tests/test_quantum_hooks.py
+++ b/unit_tests/test_quantum_hooks.py
@ -42,6 +42,7 @@ TO_PATCH = [
    'b64decode',
    'is_relation_made',
    'create_sysctl',
+    'update_nrpe_config',
    'update_legacy_ha_files',
    'add_hostname_to_hosts'
 ]