From c1d49b62ca84556af2c5876db26b281f37e566ef Mon Sep 17 00:00:00 2001 From: James Page Date: Wed, 20 Mar 2013 16:08:54 +0000 Subject: [PATCH] Quantum HA v2 --- config.yaml | 14 --- hooks/cluster-relation-departed | 1 + hooks/hooks.py | 13 ++- hooks/lib/__init__.py | 0 hooks/lib/cluster_utils.py | 130 ++++++++++++++++++++++++++ hooks/{ => lib}/utils.py | 161 +++++++++++++++++++++++--------- hooks/quantum_utils.py | 80 +++++++++++++++- metadata.yaml | 5 +- 8 files changed, 339 insertions(+), 65 deletions(-) create mode 120000 hooks/cluster-relation-departed create mode 100644 hooks/lib/__init__.py create mode 100644 hooks/lib/cluster_utils.py rename hooks/{ => lib}/utils.py (56%) diff --git a/config.yaml b/config.yaml index ad69c428..5716c0c6 100644 --- a/config.yaml +++ b/config.yaml @@ -7,7 +7,6 @@ options: Supported values include: . ovs - OpenVSwitch - nvp - Nicira Network Virtualization Platform ext-port: type: string description: | @@ -23,16 +22,3 @@ options: - cloud:precise-folsom/proposed - cloud:precise-folsom - deb http://my.archive.com/ubuntu main|KEYID - # HA configuration settings - ha-bindiface: - type: string - default: eth0 - description: | - Default network interface on which HA cluster will bind to communication - with the other members of the HA Cluster. - ha-mcastport: - type: int - default: 5405 - description: | - Default multicast port number that will be used to communicate between - HA Cluster nodes. diff --git a/hooks/cluster-relation-departed b/hooks/cluster-relation-departed new file mode 120000 index 00000000..9416ca6a --- /dev/null +++ b/hooks/cluster-relation-departed @@ -0,0 +1 @@ +hooks.py \ No newline at end of file diff --git a/hooks/hooks.py b/hooks/hooks.py index 1dd1d9df..09bd6622 100755 --- a/hooks/hooks.py +++ b/hooks/hooks.py @@ -1,6 +1,7 @@ #!/usr/bin/python -import utils +import lib.utils as utils +import lib.cluster_utils as cluster import sys import quantum_utils as qutils import os @@ -247,7 +248,7 @@ def nm_changed(): def store_ca_cert(): ca_cert = get_ca_cert() if ca_cert: - utils.install_ca(ca_cert) + qutils.install_ca(ca_cert) def get_ca_cert(): @@ -263,6 +264,11 @@ def restart_agents(): utils.restart(*qutils.GATEWAY_AGENTS[PLUGIN]) +def cluster_departed(): + conf = get_keystone_conf() + if conf and cluster.eligible_leader(None): + qutils.reassign_agent_resources(conf) + utils.do_hooks({ "install": install, "config-changed": config_changed, @@ -271,7 +277,8 @@ utils.do_hooks({ "shared-db-relation-changed": db_changed, "amqp-relation-joined": amqp_joined, "amqp-relation-changed": amqp_changed, - "quantum-network-service-relation-changed": nm_changed + "quantum-network-service-relation-changed": nm_changed, + "cluster-relation-departed": cluster_departed }) sys.exit(0) diff --git a/hooks/lib/__init__.py b/hooks/lib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hooks/lib/cluster_utils.py b/hooks/lib/cluster_utils.py new file mode 100644 index 00000000..b7d00f8b --- /dev/null +++ b/hooks/lib/cluster_utils.py @@ -0,0 +1,130 @@ +# +# Copyright 2012 Canonical Ltd. +# +# This file is sourced from lp:openstack-charm-helpers +# +# Authors: +# James Page +# Adam Gandelman +# + +from lib.utils import ( + juju_log, + relation_ids, + relation_list, + relation_get, + get_unit_hostname, + config_get + ) +import subprocess +import os + + +def is_clustered(): + for r_id in (relation_ids('ha') or []): + for unit in (relation_list(r_id) or []): + clustered = relation_get('clustered', + rid=r_id, + unit=unit) + if clustered: + return True + return False + + +def is_leader(resource): + cmd = [ + "crm", "resource", + "show", resource + ] + try: + status = subprocess.check_output(cmd) + except subprocess.CalledProcessError: + return False + else: + if get_unit_hostname() in status: + return True + else: + return False + + +def peer_units(): + peers = [] + for r_id in (relation_ids('cluster') or []): + for unit in (relation_list(r_id) or []): + peers.append(unit) + return peers + + +def oldest_peer(peers): + local_unit_no = int(os.getenv('JUJU_UNIT_NAME').split('/')[1]) + for peer in peers: + remote_unit_no = int(peer.split('/')[1]) + if remote_unit_no < local_unit_no: + return False + return True + + +def eligible_leader(resource): + if is_clustered(): + if not is_leader(resource): + juju_log('INFO', 'Deferring action to CRM leader.') + return False + else: + peers = peer_units() + if peers and not oldest_peer(peers): + juju_log('INFO', 'Deferring action to oldest service unit.') + return False + return True + + +def https(): + ''' + Determines whether enough data has been provided in configuration + or relation data to configure HTTPS + . + returns: boolean + ''' + if config_get('use-https') == "yes": + return True + if config_get('ssl_cert') and config_get('ssl_key'): + return True + for r_id in relation_ids('identity-service'): + for unit in relation_list(r_id): + if (relation_get('https_keystone', rid=r_id, unit=unit) and + relation_get('ssl_cert', rid=r_id, unit=unit) and + relation_get('ssl_key', rid=r_id, unit=unit) and + relation_get('ca_cert', rid=r_id, unit=unit)): + return True + return False + + +def determine_api_port(public_port): + ''' + Determine correct API server listening port based on + existence of HTTPS reverse proxy and/or haproxy. + + public_port: int: standard public port for given service + + returns: int: the correct listening port for the API service + ''' + i = 0 + if len(peer_units()) > 0 or is_clustered(): + i += 1 + if https(): + i += 1 + return public_port - (i * 10) + + +def determine_haproxy_port(public_port): + ''' + Description: Determine correct proxy listening port based on public IP + + existence of HTTPS reverse proxy. + + public_port: int: standard public port for given service + + returns: int: the correct listening port for the HAProxy service + ''' + i = 0 + if https(): + i += 1 + return public_port - (i * 10) diff --git a/hooks/utils.py b/hooks/lib/utils.py similarity index 56% rename from hooks/utils.py rename to hooks/lib/utils.py index 3cf7135a..113da5fd 100644 --- a/hooks/utils.py +++ b/hooks/lib/utils.py @@ -1,18 +1,19 @@ - # # Copyright 2012 Canonical Ltd. # +# This file is sourced from lp:openstack-charm-helpers +# # Authors: # James Page # Paul Collins +# Adam Gandelman # +import json import os import subprocess import socket import sys -import apt_pkg as apt -import base64 def do_hooks(hooks): @@ -65,12 +66,12 @@ deb http://ubuntu-cloud.archive.canonical.com/ubuntu {} main """ CLOUD_ARCHIVE_POCKETS = { - 'precise-folsom': 'precise-updates/folsom', - 'precise-folsom/updates': 'precise-updates/folsom', - 'precise-folsom/proposed': 'precise-proposed/folsom', - 'precise-grizzly': 'precise-updates/grizzly', - 'precise-grizzly/updates': 'precise-updates/grizzly', - 'precise-grizzly/proposed': 'precise-proposed/grizzly' + 'folsom': 'precise-updates/folsom', + 'folsom/updates': 'precise-updates/folsom', + 'folsom/proposed': 'precise-proposed/folsom', + 'grizzly': 'precise-updates/grizzly', + 'grizzly/updates': 'precise-updates/grizzly', + 'grizzly/proposed': 'precise-proposed/grizzly' } @@ -87,8 +88,8 @@ def configure_source(): if source.startswith('cloud:'): install('ubuntu-cloud-keyring') pocket = source.split(':')[1] - with open('/etc/apt/sources.list.d/cloud-archive.list', 'w') as sfile: - sfile.write(CLOUD_ARCHIVE.format(CLOUD_ARCHIVE_POCKETS[pocket])) + with open('/etc/apt/sources.list.d/cloud-archive.list', 'w') as apt: + apt.write(CLOUD_ARCHIVE.format(CLOUD_ARCHIVE_POCKETS[pocket])) if source.startswith('deb'): l = len(source.split('|')) if l == 2: @@ -102,8 +103,8 @@ def configure_source(): elif l == 1: apt_line = source - with open('/etc/apt/sources.list.d/quantum.list', 'w') as sfile: - sfile.write(apt_line + "\n") + with open('/etc/apt/sources.list.d/quantum.list', 'w') as apt: + apt.write(apt_line + "\n") cmd = [ 'apt-get', 'update' @@ -132,22 +133,49 @@ def juju_log(severity, message): subprocess.check_call(cmd) +cache = {} + + +def cached(func): + def wrapper(*args, **kwargs): + global cache + key = str((func, args, kwargs)) + try: + return cache[key] + except KeyError: + res = func(*args, **kwargs) + cache[key] = res + return res + return wrapper + + +@cached def relation_ids(relation): cmd = [ 'relation-ids', relation ] - return subprocess.check_output(cmd).split() # IGNORE:E1103 + result = str(subprocess.check_output(cmd)).split() + if result == "": + return None + else: + return result +@cached def relation_list(rid): cmd = [ 'relation-list', '-r', rid, ] - return subprocess.check_output(cmd).split() # IGNORE:E1103 + result = str(subprocess.check_output(cmd)).split() + if result == "": + return None + else: + return result +@cached def relation_get(attribute, unit=None, rid=None): cmd = [ 'relation-get', @@ -165,6 +193,29 @@ def relation_get(attribute, unit=None, rid=None): return value +@cached +def relation_get_dict(relation_id=None, remote_unit=None): + """Obtain all relation data as dict by way of JSON""" + cmd = [ + 'relation-get', '--format=json' + ] + if relation_id: + cmd.append('-r') + cmd.append(relation_id) + if remote_unit: + remote_unit_orig = os.getenv('JUJU_REMOTE_UNIT', None) + os.environ['JUJU_REMOTE_UNIT'] = remote_unit + j = subprocess.check_output(cmd) + if remote_unit and remote_unit_orig: + os.environ['JUJU_REMOTE_UNIT'] = remote_unit_orig + d = json.loads(j) + settings = {} + # convert unicode to strings + for k, v in d.iteritems(): + settings[str(k)] = str(v) + return settings + + def relation_set(**kwargs): cmd = [ 'relation-set' @@ -172,14 +223,16 @@ def relation_set(**kwargs): args = [] for k, v in kwargs.items(): if k == 'rid': - cmd.append('-r') - cmd.append(v) + if v: + cmd.append('-r') + cmd.append(v) else: args.append('{}={}'.format(k, v)) cmd += args subprocess.check_call(cmd) +@cached def unit_get(attribute): cmd = [ 'unit-get', @@ -192,67 +245,85 @@ def unit_get(attribute): return value +@cached def config_get(attribute): cmd = [ 'config-get', - attribute + '--format', + 'json', ] - value = subprocess.check_output(cmd).strip() # IGNORE:E1103 - if value == "": + out = subprocess.check_output(cmd).strip() # IGNORE:E1103 + cfg = json.loads(out) + + try: + return cfg[attribute] + except KeyError: return None - else: - return value +@cached def get_unit_hostname(): return socket.gethostname() +@cached def get_host_ip(hostname=unit_get('private-address')): try: # Test to see if already an IPv4 address socket.inet_aton(hostname) return hostname except socket.error: - pass - answers = dns.resolver.query(hostname, 'A') - if answers: - return answers[0].address - else: - return None + answers = dns.resolver.query(hostname, 'A') + if answers: + return answers[0].address + return None -def _service_ctl(service, action): - subprocess.call(['service', service, action]) +def _svc_control(service, action): + subprocess.check_call(['service', service, action]) def restart(*services): for service in services: - _service_ctl(service, 'restart') + _svc_control(service, 'restart') def stop(*services): for service in services: - _service_ctl(service, 'stop') + _svc_control(service, 'stop') def start(*services): for service in services: - _service_ctl(service, 'start') + _svc_control(service, 'start') -def get_os_version(package=None): - apt.init() - cache = apt.Cache() - pkg = cache[package or 'quantum-common'] - if pkg.current_ver: - return apt.upstream_version(pkg.current_ver.ver_str) +def reload(*services): + for service in services: + try: + _svc_control(service, 'reload') + except subprocess.CalledProcessError: + # Reload failed - either service does not support reload + # or it was not running - restart will fixup most things + _svc_control(service, 'restart') + + +def running(service): + try: + output = subprocess.check_output(['service', service, 'status']) + except subprocess.CalledProcessError: + return False else: - return None + if ("start/running" in output or + "is running" in output): + return True + else: + return False -def install_ca(ca_cert): - with open('/usr/local/share/ca-certificates/keystone_juju_ca_cert.crt', - 'w') as crt: - crt.write(base64.b64decode(ca_cert)) - subprocess.check_call(['update-ca-certificates', '--fresh']) +def is_relation_made(relation, key='private-address'): + for r_id in (relation_ids(relation) or []): + for unit in (relation_list(r_id) or []): + if relation_get(key, rid=r_id, unit=unit): + return True + return False diff --git a/hooks/quantum_utils.py b/hooks/quantum_utils.py index 057329ea..dd90a182 100644 --- a/hooks/quantum_utils.py +++ b/hooks/quantum_utils.py @@ -1,8 +1,10 @@ import subprocess import os import uuid -from utils import juju_log as log -from utils import get_os_version +import base64 +import apt_pkg as apt +from lib.utils import juju_log as log +from lib.utils import get_unit_hostname OVS = "ovs" @@ -39,6 +41,16 @@ GATEWAY_AGENTS = { } +def get_os_version(package=None): + apt.init() + cache = apt.Cache() + pkg = cache[package or 'quantum-common'] + if pkg.current_ver: + return apt.upstream_version(pkg.current_ver.ver_str) + else: + return None + + if get_os_version('quantum-common') >= "2013.1": for plugin in GATEWAY_AGENTS: GATEWAY_AGENTS[plugin].append("quantum-metadata-agent") @@ -122,3 +134,67 @@ def flush_local_configuration(): agent_cmd.append('--config-file=/etc/quantum/{}'\ .format(agent_conf)) subprocess.call(agent_cmd) + + +def install_ca(ca_cert): + with open('/usr/local/share/ca-certificates/keystone_juju_ca_cert.crt', + 'w') as crt: + crt.write(base64.b64decode(ca_cert)) + subprocess.check_call(['update-ca-certificates', '--fresh']) + +DHCP_AGENT = "DHCP Agent" +L3_AGENT = "L3 Agent" + + +def reassign_agent_resources(env): + ''' Use agent scheduler API to detect down agents and re-schedule ''' + from quantumclient.v2_0 import client + # TODO: Fixup for https keystone + auth_url = 'http://%(auth_host)s:%(auth_port)s/v2.0' % env + quantum = client.Client(username=env['service_username'], + password=env['service_password'], + tenant_name=env['service_tenant'], + auth_url=auth_url, + region_name=env['region']) + + hostname = get_unit_hostname() + agents = quantum.list_agents(agent_type=DHCP_AGENT) + dhcp_agent_id = l3_agent_id = None + networks = {} + for agent in agents['agents']: + if not agent['alive']: + log('INFO', 'DHCP Agent %s down' % agent['id']) + for network in \ + quantum.list_networks_on_dhcp_agent(agent['id'])['networks']: + networks[network['id']] = agent['id'] + if agent['host'] == hostname: + dhcp_agent_id = agent['id'] + + agents = quantum.list_agents(agent_type=L3_AGENT) + routers = {} + for agent in agents['agents']: + if not agent['alive']: + log('INFO', 'L3 Agent %s down' % agent['id']) + for router in \ + quantum.list_routers_on_l3_agent(agent['id'])['routers']: + routers[router['id']] = agent['id'] + if agent['host'] == hostname: + l3_agent_id = agent['id'] + + for router_id in routers: + log('INFO', + 'Moving router %s from %s to %s' % \ + (router_id, routers[router_id], l3_agent_id)) + quantum.remove_router_from_l3_agent(l3_agent=routers[router_id], + router_id=router_id) + quantum.add_router_to_l3_agent(l3_agent=l3_agent_id, + body={'router_id': router_id}) + + for network_id in networks: + log('INFO', + 'Moving network %s from %s to %s' % \ + (network_id, networks[network_id], dhcp_agent_id)) + quantum.remove_network_from_dhcp_agent(dhcp_agent=networks[network_id], + network_id=network_id) + quantum.add_network_to_dhcp_agent(dhcp_agent=dhcp_agent_id, + body={'network_id': network_id}) diff --git a/metadata.yaml b/metadata.yaml index 5b6686b4..860e59b3 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -20,4 +20,7 @@ requires: shared-db: interface: mysql-shared amqp: - interface: rabbitmq \ No newline at end of file + interface: rabbitmq +peers: + cluster: + interface: quantum-gateway-ha \ No newline at end of file