Quantum HA v2

This commit is contained in:
James Page 2013-03-20 16:08:54 +00:00
parent af24205a58
commit c1d49b62ca
8 changed files with 339 additions and 65 deletions

View File

@ -7,7 +7,6 @@ options:
Supported values include:
.
ovs - OpenVSwitch
nvp - Nicira Network Virtualization Platform
ext-port:
type: string
description: |
@ -23,16 +22,3 @@ options:
- cloud:precise-folsom/proposed
- cloud:precise-folsom
- deb http://my.archive.com/ubuntu main|KEYID
# HA configuration settings
ha-bindiface:
type: string
default: eth0
description: |
Default network interface on which HA cluster will bind to communication
with the other members of the HA Cluster.
ha-mcastport:
type: int
default: 5405
description: |
Default multicast port number that will be used to communicate between
HA Cluster nodes.

View File

@ -0,0 +1 @@
hooks.py

View File

@ -1,6 +1,7 @@
#!/usr/bin/python
import utils
import lib.utils as utils
import lib.cluster_utils as cluster
import sys
import quantum_utils as qutils
import os
@ -247,7 +248,7 @@ def nm_changed():
def store_ca_cert():
ca_cert = get_ca_cert()
if ca_cert:
utils.install_ca(ca_cert)
qutils.install_ca(ca_cert)
def get_ca_cert():
@ -263,6 +264,11 @@ def restart_agents():
utils.restart(*qutils.GATEWAY_AGENTS[PLUGIN])
def cluster_departed():
conf = get_keystone_conf()
if conf and cluster.eligible_leader(None):
qutils.reassign_agent_resources(conf)
utils.do_hooks({
"install": install,
"config-changed": config_changed,
@ -271,7 +277,8 @@ utils.do_hooks({
"shared-db-relation-changed": db_changed,
"amqp-relation-joined": amqp_joined,
"amqp-relation-changed": amqp_changed,
"quantum-network-service-relation-changed": nm_changed
"quantum-network-service-relation-changed": nm_changed,
"cluster-relation-departed": cluster_departed
})
sys.exit(0)

0
hooks/lib/__init__.py Normal file
View File

130
hooks/lib/cluster_utils.py Normal file
View File

@ -0,0 +1,130 @@
#
# Copyright 2012 Canonical Ltd.
#
# This file is sourced from lp:openstack-charm-helpers
#
# Authors:
# James Page <james.page@ubuntu.com>
# Adam Gandelman <adamg@ubuntu.com>
#
from lib.utils import (
juju_log,
relation_ids,
relation_list,
relation_get,
get_unit_hostname,
config_get
)
import subprocess
import os
def is_clustered():
for r_id in (relation_ids('ha') or []):
for unit in (relation_list(r_id) or []):
clustered = relation_get('clustered',
rid=r_id,
unit=unit)
if clustered:
return True
return False
def is_leader(resource):
cmd = [
"crm", "resource",
"show", resource
]
try:
status = subprocess.check_output(cmd)
except subprocess.CalledProcessError:
return False
else:
if get_unit_hostname() in status:
return True
else:
return False
def peer_units():
peers = []
for r_id in (relation_ids('cluster') or []):
for unit in (relation_list(r_id) or []):
peers.append(unit)
return peers
def oldest_peer(peers):
local_unit_no = int(os.getenv('JUJU_UNIT_NAME').split('/')[1])
for peer in peers:
remote_unit_no = int(peer.split('/')[1])
if remote_unit_no < local_unit_no:
return False
return True
def eligible_leader(resource):
if is_clustered():
if not is_leader(resource):
juju_log('INFO', 'Deferring action to CRM leader.')
return False
else:
peers = peer_units()
if peers and not oldest_peer(peers):
juju_log('INFO', 'Deferring action to oldest service unit.')
return False
return True
def https():
'''
Determines whether enough data has been provided in configuration
or relation data to configure HTTPS
.
returns: boolean
'''
if config_get('use-https') == "yes":
return True
if config_get('ssl_cert') and config_get('ssl_key'):
return True
for r_id in relation_ids('identity-service'):
for unit in relation_list(r_id):
if (relation_get('https_keystone', rid=r_id, unit=unit) and
relation_get('ssl_cert', rid=r_id, unit=unit) and
relation_get('ssl_key', rid=r_id, unit=unit) and
relation_get('ca_cert', rid=r_id, unit=unit)):
return True
return False
def determine_api_port(public_port):
'''
Determine correct API server listening port based on
existence of HTTPS reverse proxy and/or haproxy.
public_port: int: standard public port for given service
returns: int: the correct listening port for the API service
'''
i = 0
if len(peer_units()) > 0 or is_clustered():
i += 1
if https():
i += 1
return public_port - (i * 10)
def determine_haproxy_port(public_port):
'''
Description: Determine correct proxy listening port based on public IP +
existence of HTTPS reverse proxy.
public_port: int: standard public port for given service
returns: int: the correct listening port for the HAProxy service
'''
i = 0
if https():
i += 1
return public_port - (i * 10)

View File

@ -1,18 +1,19 @@
#
# Copyright 2012 Canonical Ltd.
#
# This file is sourced from lp:openstack-charm-helpers
#
# Authors:
# James Page <james.page@ubuntu.com>
# Paul Collins <paul.collins@canonical.com>
# Adam Gandelman <adamg@ubuntu.com>
#
import json
import os
import subprocess
import socket
import sys
import apt_pkg as apt
import base64
def do_hooks(hooks):
@ -65,12 +66,12 @@ deb http://ubuntu-cloud.archive.canonical.com/ubuntu {} main
"""
CLOUD_ARCHIVE_POCKETS = {
'precise-folsom': 'precise-updates/folsom',
'precise-folsom/updates': 'precise-updates/folsom',
'precise-folsom/proposed': 'precise-proposed/folsom',
'precise-grizzly': 'precise-updates/grizzly',
'precise-grizzly/updates': 'precise-updates/grizzly',
'precise-grizzly/proposed': 'precise-proposed/grizzly'
'folsom': 'precise-updates/folsom',
'folsom/updates': 'precise-updates/folsom',
'folsom/proposed': 'precise-proposed/folsom',
'grizzly': 'precise-updates/grizzly',
'grizzly/updates': 'precise-updates/grizzly',
'grizzly/proposed': 'precise-proposed/grizzly'
}
@ -87,8 +88,8 @@ def configure_source():
if source.startswith('cloud:'):
install('ubuntu-cloud-keyring')
pocket = source.split(':')[1]
with open('/etc/apt/sources.list.d/cloud-archive.list', 'w') as sfile:
sfile.write(CLOUD_ARCHIVE.format(CLOUD_ARCHIVE_POCKETS[pocket]))
with open('/etc/apt/sources.list.d/cloud-archive.list', 'w') as apt:
apt.write(CLOUD_ARCHIVE.format(CLOUD_ARCHIVE_POCKETS[pocket]))
if source.startswith('deb'):
l = len(source.split('|'))
if l == 2:
@ -102,8 +103,8 @@ def configure_source():
elif l == 1:
apt_line = source
with open('/etc/apt/sources.list.d/quantum.list', 'w') as sfile:
sfile.write(apt_line + "\n")
with open('/etc/apt/sources.list.d/quantum.list', 'w') as apt:
apt.write(apt_line + "\n")
cmd = [
'apt-get',
'update'
@ -132,22 +133,49 @@ def juju_log(severity, message):
subprocess.check_call(cmd)
cache = {}
def cached(func):
def wrapper(*args, **kwargs):
global cache
key = str((func, args, kwargs))
try:
return cache[key]
except KeyError:
res = func(*args, **kwargs)
cache[key] = res
return res
return wrapper
@cached
def relation_ids(relation):
cmd = [
'relation-ids',
relation
]
return subprocess.check_output(cmd).split() # IGNORE:E1103
result = str(subprocess.check_output(cmd)).split()
if result == "":
return None
else:
return result
@cached
def relation_list(rid):
cmd = [
'relation-list',
'-r', rid,
]
return subprocess.check_output(cmd).split() # IGNORE:E1103
result = str(subprocess.check_output(cmd)).split()
if result == "":
return None
else:
return result
@cached
def relation_get(attribute, unit=None, rid=None):
cmd = [
'relation-get',
@ -165,6 +193,29 @@ def relation_get(attribute, unit=None, rid=None):
return value
@cached
def relation_get_dict(relation_id=None, remote_unit=None):
"""Obtain all relation data as dict by way of JSON"""
cmd = [
'relation-get', '--format=json'
]
if relation_id:
cmd.append('-r')
cmd.append(relation_id)
if remote_unit:
remote_unit_orig = os.getenv('JUJU_REMOTE_UNIT', None)
os.environ['JUJU_REMOTE_UNIT'] = remote_unit
j = subprocess.check_output(cmd)
if remote_unit and remote_unit_orig:
os.environ['JUJU_REMOTE_UNIT'] = remote_unit_orig
d = json.loads(j)
settings = {}
# convert unicode to strings
for k, v in d.iteritems():
settings[str(k)] = str(v)
return settings
def relation_set(**kwargs):
cmd = [
'relation-set'
@ -172,14 +223,16 @@ def relation_set(**kwargs):
args = []
for k, v in kwargs.items():
if k == 'rid':
cmd.append('-r')
cmd.append(v)
if v:
cmd.append('-r')
cmd.append(v)
else:
args.append('{}={}'.format(k, v))
cmd += args
subprocess.check_call(cmd)
@cached
def unit_get(attribute):
cmd = [
'unit-get',
@ -192,67 +245,85 @@ def unit_get(attribute):
return value
@cached
def config_get(attribute):
cmd = [
'config-get',
attribute
'--format',
'json',
]
value = subprocess.check_output(cmd).strip() # IGNORE:E1103
if value == "":
out = subprocess.check_output(cmd).strip() # IGNORE:E1103
cfg = json.loads(out)
try:
return cfg[attribute]
except KeyError:
return None
else:
return value
@cached
def get_unit_hostname():
return socket.gethostname()
@cached
def get_host_ip(hostname=unit_get('private-address')):
try:
# Test to see if already an IPv4 address
socket.inet_aton(hostname)
return hostname
except socket.error:
pass
answers = dns.resolver.query(hostname, 'A')
if answers:
return answers[0].address
else:
return None
answers = dns.resolver.query(hostname, 'A')
if answers:
return answers[0].address
return None
def _service_ctl(service, action):
subprocess.call(['service', service, action])
def _svc_control(service, action):
subprocess.check_call(['service', service, action])
def restart(*services):
for service in services:
_service_ctl(service, 'restart')
_svc_control(service, 'restart')
def stop(*services):
for service in services:
_service_ctl(service, 'stop')
_svc_control(service, 'stop')
def start(*services):
for service in services:
_service_ctl(service, 'start')
_svc_control(service, 'start')
def get_os_version(package=None):
apt.init()
cache = apt.Cache()
pkg = cache[package or 'quantum-common']
if pkg.current_ver:
return apt.upstream_version(pkg.current_ver.ver_str)
def reload(*services):
for service in services:
try:
_svc_control(service, 'reload')
except subprocess.CalledProcessError:
# Reload failed - either service does not support reload
# or it was not running - restart will fixup most things
_svc_control(service, 'restart')
def running(service):
try:
output = subprocess.check_output(['service', service, 'status'])
except subprocess.CalledProcessError:
return False
else:
return None
if ("start/running" in output or
"is running" in output):
return True
else:
return False
def install_ca(ca_cert):
with open('/usr/local/share/ca-certificates/keystone_juju_ca_cert.crt',
'w') as crt:
crt.write(base64.b64decode(ca_cert))
subprocess.check_call(['update-ca-certificates', '--fresh'])
def is_relation_made(relation, key='private-address'):
for r_id in (relation_ids(relation) or []):
for unit in (relation_list(r_id) or []):
if relation_get(key, rid=r_id, unit=unit):
return True
return False

View File

@ -1,8 +1,10 @@
import subprocess
import os
import uuid
from utils import juju_log as log
from utils import get_os_version
import base64
import apt_pkg as apt
from lib.utils import juju_log as log
from lib.utils import get_unit_hostname
OVS = "ovs"
@ -39,6 +41,16 @@ GATEWAY_AGENTS = {
}
def get_os_version(package=None):
apt.init()
cache = apt.Cache()
pkg = cache[package or 'quantum-common']
if pkg.current_ver:
return apt.upstream_version(pkg.current_ver.ver_str)
else:
return None
if get_os_version('quantum-common') >= "2013.1":
for plugin in GATEWAY_AGENTS:
GATEWAY_AGENTS[plugin].append("quantum-metadata-agent")
@ -122,3 +134,67 @@ def flush_local_configuration():
agent_cmd.append('--config-file=/etc/quantum/{}'\
.format(agent_conf))
subprocess.call(agent_cmd)
def install_ca(ca_cert):
with open('/usr/local/share/ca-certificates/keystone_juju_ca_cert.crt',
'w') as crt:
crt.write(base64.b64decode(ca_cert))
subprocess.check_call(['update-ca-certificates', '--fresh'])
DHCP_AGENT = "DHCP Agent"
L3_AGENT = "L3 Agent"
def reassign_agent_resources(env):
''' Use agent scheduler API to detect down agents and re-schedule '''
from quantumclient.v2_0 import client
# TODO: Fixup for https keystone
auth_url = 'http://%(auth_host)s:%(auth_port)s/v2.0' % env
quantum = client.Client(username=env['service_username'],
password=env['service_password'],
tenant_name=env['service_tenant'],
auth_url=auth_url,
region_name=env['region'])
hostname = get_unit_hostname()
agents = quantum.list_agents(agent_type=DHCP_AGENT)
dhcp_agent_id = l3_agent_id = None
networks = {}
for agent in agents['agents']:
if not agent['alive']:
log('INFO', 'DHCP Agent %s down' % agent['id'])
for network in \
quantum.list_networks_on_dhcp_agent(agent['id'])['networks']:
networks[network['id']] = agent['id']
if agent['host'] == hostname:
dhcp_agent_id = agent['id']
agents = quantum.list_agents(agent_type=L3_AGENT)
routers = {}
for agent in agents['agents']:
if not agent['alive']:
log('INFO', 'L3 Agent %s down' % agent['id'])
for router in \
quantum.list_routers_on_l3_agent(agent['id'])['routers']:
routers[router['id']] = agent['id']
if agent['host'] == hostname:
l3_agent_id = agent['id']
for router_id in routers:
log('INFO',
'Moving router %s from %s to %s' % \
(router_id, routers[router_id], l3_agent_id))
quantum.remove_router_from_l3_agent(l3_agent=routers[router_id],
router_id=router_id)
quantum.add_router_to_l3_agent(l3_agent=l3_agent_id,
body={'router_id': router_id})
for network_id in networks:
log('INFO',
'Moving network %s from %s to %s' % \
(network_id, networks[network_id], dhcp_agent_id))
quantum.remove_network_from_dhcp_agent(dhcp_agent=networks[network_id],
network_id=network_id)
quantum.add_network_to_dhcp_agent(dhcp_agent=dhcp_agent_id,
body={'network_id': network_id})

View File

@ -21,3 +21,6 @@ requires:
interface: mysql-shared
amqp:
interface: rabbitmq
peers:
cluster:
interface: quantum-gateway-ha