Avoid restart races for nova-api-metadata

It's possible that the nova-api-metadata will startup during the
time that the nova-conductor processes on the nova-cloud-controller
units are still starting up, resulting in a messaging timeout which
causes the daemon to exit 0.

Upstart will restart a service in this scenario, however systemd is
configured in packaging to only restart 'on-failure' so will not
attempt to restart.

This points to two other bugs - one that a messaging timeout results
in a exit code of 0, and that the OpenStack services under systemd
behave differently to under upstart.

Install an override file for systemd based installs to mimic the
behaviour of upstart, and deal with a code logic problem in the
restart_trigger handling to ensure that the charm does at least
try to restart the nova-api-metadata service at the right points
in time.

Change-Id: Ia08b7840efa33fd301d0e2c55bb30ae1a102cbfa
Closes-Bug: 1547122
This commit is contained in:
James Page 2016-07-12 10:57:28 +01:00
parent 1fd4c20f5d
commit 68ea83e60d
5 changed files with 90 additions and 7 deletions

4
files/override.conf Normal file
View File

@ -0,0 +1,4 @@
# Override default Restart policy for nova-api-metadata to always
# dealing with messaging timeout on startup with conductor services
[Service]
Restart=always

View File

@ -69,6 +69,7 @@ from neutron_utils import (
use_l3ha, use_l3ha,
NEUTRON_COMMON, NEUTRON_COMMON,
assess_status, assess_status,
install_systemd_override,
) )
hooks = Hooks() hooks = Hooks()
@ -104,6 +105,10 @@ def install():
# Legacy HA for Icehouse # Legacy HA for Icehouse
update_legacy_ha_files() update_legacy_ha_files()
# Install systemd overrides to remove service startup race between
# n-gateway and n-cloud-controller services.
install_systemd_override()
@hooks.hook('config-changed') @hooks.hook('config-changed')
@restart_on_change(restart_map()) @restart_on_change(restart_map())
@ -161,6 +166,10 @@ def upgrade_charm():
config_changed() config_changed()
update_legacy_ha_files(force=True) update_legacy_ha_files(force=True)
# Install systemd overrides to remove service startup race between
# n-gateway and n-cloud-controller services.
install_systemd_override()
@hooks.hook('amqp-nova-relation-joined') @hooks.hook('amqp-nova-relation-joined')
def amqp_nova_joined(relation_id=None): def amqp_nova_joined(relation_id=None):
@ -229,8 +238,7 @@ def nm_changed():
restart_nonce = relation_get('restart_trigger') restart_nonce = relation_get('restart_trigger')
if restart_nonce is not None: if restart_nonce is not None:
db = kv() db = kv()
previous_nonce = db.get('restart_nonce', previous_nonce = db.get('restart_nonce')
restart_nonce)
if previous_nonce != restart_nonce: if previous_nonce != restart_nonce:
if not is_unit_paused_set(): if not is_unit_paused_set():
service_restart('nova-api-metadata') service_restart('nova-api-metadata')

View File

@ -13,6 +13,7 @@ from charmhelpers.core.host import (
service_stop, service_stop,
service_restart, service_restart,
write_file, write_file,
init_is_systemd,
) )
from charmhelpers.core.hookenv import ( from charmhelpers.core.hookenv import (
charm_dir, charm_dir,
@ -478,6 +479,25 @@ SERVICE_RENAMES = {
} }
# Override file for systemd
SYSTEMD_NOVA_OVERRIDE = (
'/etc/systemd/system/nova-api-metadata.service.d/override.conf'
)
def install_systemd_override():
'''
Install systemd override files for nova-api-metadata
and reload systemd daemon if required.
'''
if init_is_systemd() and not os.path.exists(SYSTEMD_NOVA_OVERRIDE):
mkdir(os.path.dirname(SYSTEMD_NOVA_OVERRIDE))
shutil.copy(os.path.join('files',
os.path.basename(SYSTEMD_NOVA_OVERRIDE)),
SYSTEMD_NOVA_OVERRIDE)
subprocess.check_call(['systemctl', 'daemon-reload'])
def remap_service(service_name): def remap_service(service_name):
''' '''
Remap service names based on openstack release to deal Remap service names based on openstack release to deal

View File

@ -57,6 +57,7 @@ TO_PATCH = [
'kv', 'kv',
'service_restart', 'service_restart',
'is_unit_paused_set', 'is_unit_paused_set',
'install_systemd_override',
] ]
@ -93,6 +94,7 @@ class TestQuantumHooks(CharmTestCase):
self.assertTrue(self.get_early_packages.called) self.assertTrue(self.get_early_packages.called)
self.assertTrue(self.get_packages.called) self.assertTrue(self.get_packages.called)
self.assertTrue(self.execd_preinstall.called) self.assertTrue(self.execd_preinstall.called)
self.assertTrue(self.install_systemd_override.called)
def test_install_hook_precise_nocloudarchive(self): def test_install_hook_precise_nocloudarchive(self):
self.test_config.set('openstack-origin', 'distro') self.test_config.set('openstack-origin', 'distro')
@ -238,6 +240,7 @@ class TestQuantumHooks(CharmTestCase):
self._call_hook('upgrade-charm') self._call_hook('upgrade-charm')
self.assertTrue(_install.called) self.assertTrue(_install.called)
self.assertTrue(_config_changed.called) self.assertTrue(_config_changed.called)
self.assertTrue(self.install_systemd_override.called)
def test_amqp_joined(self): def test_amqp_joined(self):
self._call_hook('amqp-relation-joined') self._call_hook('amqp-relation-joined')
@ -283,7 +286,7 @@ class TestQuantumHooks(CharmTestCase):
def _relation_get(key): def _relation_get(key):
data = { data = {
'ca_cert': 'cert', 'ca_cert': 'cert',
'restart_nonce': None, 'restart_trigger': None,
} }
return data.get(key) return data.get(key)
self.relation_get.side_effect = _relation_get self.relation_get.side_effect = _relation_get
@ -291,7 +294,30 @@ class TestQuantumHooks(CharmTestCase):
self.assertTrue(self.CONFIGS.write_all.called) self.assertTrue(self.CONFIGS.write_all.called)
self.install_ca_cert.assert_called_with('cert') self.install_ca_cert.assert_called_with('cert')
def test_nm_changed_restart_nonce(self):
'''Ensure first set of restart_trigger restarts nova-api-metadata'''
def _relation_get(key):
data = {
'ca_cert': 'cert',
'restart_trigger': '1111111222222333333',
}
return data.get(key)
self.relation_get.side_effect = _relation_get
self.is_unit_paused_set.return_value = False
kv_mock = MagicMock()
self.kv.return_value = kv_mock
kv_mock.get.return_value = None
self._call_hook('quantum-network-service-relation-changed')
self.assertTrue(self.CONFIGS.write_all.called)
self.install_ca_cert.assert_called_with('cert')
self.service_restart.assert_called_with('nova-api-metadata')
kv_mock.get.assert_called_with('restart_nonce')
kv_mock.set.assert_called_with('restart_nonce',
'1111111222222333333')
self.assertTrue(kv_mock.flush.called)
def test_nm_changed_restart_nonce_changed(self): def test_nm_changed_restart_nonce_changed(self):
'''Ensure change of restart_trigger restarts nova-api-metadata'''
def _relation_get(key): def _relation_get(key):
data = { data = {
'ca_cert': 'cert', 'ca_cert': 'cert',
@ -307,13 +333,13 @@ class TestQuantumHooks(CharmTestCase):
self.assertTrue(self.CONFIGS.write_all.called) self.assertTrue(self.CONFIGS.write_all.called)
self.install_ca_cert.assert_called_with('cert') self.install_ca_cert.assert_called_with('cert')
self.service_restart.assert_called_with('nova-api-metadata') self.service_restart.assert_called_with('nova-api-metadata')
kv_mock.get.assert_called_with('restart_nonce', kv_mock.get.assert_called_with('restart_nonce')
'1111111222222333333')
kv_mock.set.assert_called_with('restart_nonce', kv_mock.set.assert_called_with('restart_nonce',
'1111111222222333333') '1111111222222333333')
self.assertTrue(kv_mock.flush.called) self.assertTrue(kv_mock.flush.called)
def test_nm_changed_restart_nonce_nochange(self): def test_nm_changed_restart_nonce_nochange(self):
'''Ensure no change in restart_trigger skips restarts'''
def _relation_get(key): def _relation_get(key):
data = { data = {
'ca_cert': 'cert', 'ca_cert': 'cert',
@ -329,8 +355,7 @@ class TestQuantumHooks(CharmTestCase):
self.assertTrue(self.CONFIGS.write_all.called) self.assertTrue(self.CONFIGS.write_all.called)
self.install_ca_cert.assert_called_with('cert') self.install_ca_cert.assert_called_with('cert')
self.assertFalse(self.service_restart.called) self.assertFalse(self.service_restart.called)
kv_mock.get.assert_called_with('restart_nonce', kv_mock.get.assert_called_with('restart_nonce')
'1111111222222333333')
self.assertFalse(kv_mock.set.called) self.assertFalse(kv_mock.set.called)
self.assertFalse(kv_mock.flush.called) self.assertFalse(kv_mock.flush.called)

View File

@ -42,6 +42,7 @@ TO_PATCH = [
'mkdir', 'mkdir',
'copy2', 'copy2',
'NeutronAPIContext', 'NeutronAPIContext',
'init_is_systemd',
] ]
openstack_origin_git = \ openstack_origin_git = \
@ -1335,3 +1336,28 @@ class TestNeutronAgentReallocation(CharmTestCase):
asf.assert_called_once_with('some-config') asf.assert_called_once_with('some-config')
# ports=None whilst port checks are disabled. # ports=None whilst port checks are disabled.
f.assert_called_once_with('assessor', services=['s1'], ports=None) f.assert_called_once_with('assessor', services=['s1'], ports=None)
@patch.object(neutron_utils, 'subprocess')
@patch.object(neutron_utils, 'shutil')
@patch('os.path.exists')
def test_install_systemd_override_systemd(self, _os_exists, _shutil,
_subprocess):
'''
Ensure systemd override is only installed on systemd based systems
'''
self.init_is_systemd.return_value = True
_os_exists.return_value = False
neutron_utils.install_systemd_override()
_os_exists.assert_called_with(
'/etc/systemd/system/nova-api-metadata.service.d/override.conf'
)
self.mkdir.assert_called_with(
'/etc/systemd/system/nova-api-metadata.service.d'
)
_shutil.copy.assert_called_with(
'files/override.conf',
'/etc/systemd/system/nova-api-metadata.service.d/override.conf'
)
_subprocess.check_call.assert_called_with(
['systemctl', 'daemon-reload']
)