From 68ea83e60d4930b6c80a910c3ef5465c5f34416b Mon Sep 17 00:00:00 2001 From: James Page Date: Tue, 12 Jul 2016 10:57:28 +0100 Subject: [PATCH] Avoid restart races for nova-api-metadata It's possible that the nova-api-metadata will startup during the time that the nova-conductor processes on the nova-cloud-controller units are still starting up, resulting in a messaging timeout which causes the daemon to exit 0. Upstart will restart a service in this scenario, however systemd is configured in packaging to only restart 'on-failure' so will not attempt to restart. This points to two other bugs - one that a messaging timeout results in a exit code of 0, and that the OpenStack services under systemd behave differently to under upstart. Install an override file for systemd based installs to mimic the behaviour of upstart, and deal with a code logic problem in the restart_trigger handling to ensure that the charm does at least try to restart the nova-api-metadata service at the right points in time. Change-Id: Ia08b7840efa33fd301d0e2c55bb30ae1a102cbfa Closes-Bug: 1547122 --- files/override.conf | 4 ++++ hooks/neutron_hooks.py | 12 +++++++++-- hooks/neutron_utils.py | 20 ++++++++++++++++++ unit_tests/test_neutron_hooks.py | 35 +++++++++++++++++++++++++++----- unit_tests/test_neutron_utils.py | 26 ++++++++++++++++++++++++ 5 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 files/override.conf diff --git a/files/override.conf b/files/override.conf new file mode 100644 index 00000000..718e982a --- /dev/null +++ b/files/override.conf @@ -0,0 +1,4 @@ +# Override default Restart policy for nova-api-metadata to always +# dealing with messaging timeout on startup with conductor services +[Service] +Restart=always \ No newline at end of file diff --git a/hooks/neutron_hooks.py b/hooks/neutron_hooks.py index f9d1acc1..cf7feda2 100755 --- a/hooks/neutron_hooks.py +++ b/hooks/neutron_hooks.py @@ -69,6 +69,7 @@ from neutron_utils import ( use_l3ha, NEUTRON_COMMON, assess_status, + install_systemd_override, ) hooks = Hooks() @@ -104,6 +105,10 @@ def install(): # Legacy HA for Icehouse update_legacy_ha_files() + # Install systemd overrides to remove service startup race between + # n-gateway and n-cloud-controller services. + install_systemd_override() + @hooks.hook('config-changed') @restart_on_change(restart_map()) @@ -161,6 +166,10 @@ def upgrade_charm(): config_changed() update_legacy_ha_files(force=True) + # Install systemd overrides to remove service startup race between + # n-gateway and n-cloud-controller services. + install_systemd_override() + @hooks.hook('amqp-nova-relation-joined') def amqp_nova_joined(relation_id=None): @@ -229,8 +238,7 @@ def nm_changed(): restart_nonce = relation_get('restart_trigger') if restart_nonce is not None: db = kv() - previous_nonce = db.get('restart_nonce', - restart_nonce) + previous_nonce = db.get('restart_nonce') if previous_nonce != restart_nonce: if not is_unit_paused_set(): service_restart('nova-api-metadata') diff --git a/hooks/neutron_utils.py b/hooks/neutron_utils.py index 87ad8f91..f10402dd 100644 --- a/hooks/neutron_utils.py +++ b/hooks/neutron_utils.py @@ -13,6 +13,7 @@ from charmhelpers.core.host import ( service_stop, service_restart, write_file, + init_is_systemd, ) from charmhelpers.core.hookenv import ( charm_dir, @@ -478,6 +479,25 @@ SERVICE_RENAMES = { } +# Override file for systemd +SYSTEMD_NOVA_OVERRIDE = ( + '/etc/systemd/system/nova-api-metadata.service.d/override.conf' +) + + +def install_systemd_override(): + ''' + Install systemd override files for nova-api-metadata + and reload systemd daemon if required. + ''' + if init_is_systemd() and not os.path.exists(SYSTEMD_NOVA_OVERRIDE): + mkdir(os.path.dirname(SYSTEMD_NOVA_OVERRIDE)) + shutil.copy(os.path.join('files', + os.path.basename(SYSTEMD_NOVA_OVERRIDE)), + SYSTEMD_NOVA_OVERRIDE) + subprocess.check_call(['systemctl', 'daemon-reload']) + + def remap_service(service_name): ''' Remap service names based on openstack release to deal diff --git a/unit_tests/test_neutron_hooks.py b/unit_tests/test_neutron_hooks.py index d46224a3..969b79f8 100644 --- a/unit_tests/test_neutron_hooks.py +++ b/unit_tests/test_neutron_hooks.py @@ -57,6 +57,7 @@ TO_PATCH = [ 'kv', 'service_restart', 'is_unit_paused_set', + 'install_systemd_override', ] @@ -93,6 +94,7 @@ class TestQuantumHooks(CharmTestCase): self.assertTrue(self.get_early_packages.called) self.assertTrue(self.get_packages.called) self.assertTrue(self.execd_preinstall.called) + self.assertTrue(self.install_systemd_override.called) def test_install_hook_precise_nocloudarchive(self): self.test_config.set('openstack-origin', 'distro') @@ -238,6 +240,7 @@ class TestQuantumHooks(CharmTestCase): self._call_hook('upgrade-charm') self.assertTrue(_install.called) self.assertTrue(_config_changed.called) + self.assertTrue(self.install_systemd_override.called) def test_amqp_joined(self): self._call_hook('amqp-relation-joined') @@ -283,7 +286,7 @@ class TestQuantumHooks(CharmTestCase): def _relation_get(key): data = { 'ca_cert': 'cert', - 'restart_nonce': None, + 'restart_trigger': None, } return data.get(key) self.relation_get.side_effect = _relation_get @@ -291,7 +294,30 @@ class TestQuantumHooks(CharmTestCase): self.assertTrue(self.CONFIGS.write_all.called) self.install_ca_cert.assert_called_with('cert') + def test_nm_changed_restart_nonce(self): + '''Ensure first set of restart_trigger restarts nova-api-metadata''' + def _relation_get(key): + data = { + 'ca_cert': 'cert', + 'restart_trigger': '1111111222222333333', + } + return data.get(key) + self.relation_get.side_effect = _relation_get + self.is_unit_paused_set.return_value = False + kv_mock = MagicMock() + self.kv.return_value = kv_mock + kv_mock.get.return_value = None + self._call_hook('quantum-network-service-relation-changed') + self.assertTrue(self.CONFIGS.write_all.called) + self.install_ca_cert.assert_called_with('cert') + self.service_restart.assert_called_with('nova-api-metadata') + kv_mock.get.assert_called_with('restart_nonce') + kv_mock.set.assert_called_with('restart_nonce', + '1111111222222333333') + self.assertTrue(kv_mock.flush.called) + def test_nm_changed_restart_nonce_changed(self): + '''Ensure change of restart_trigger restarts nova-api-metadata''' def _relation_get(key): data = { 'ca_cert': 'cert', @@ -307,13 +333,13 @@ class TestQuantumHooks(CharmTestCase): self.assertTrue(self.CONFIGS.write_all.called) self.install_ca_cert.assert_called_with('cert') self.service_restart.assert_called_with('nova-api-metadata') - kv_mock.get.assert_called_with('restart_nonce', - '1111111222222333333') + kv_mock.get.assert_called_with('restart_nonce') kv_mock.set.assert_called_with('restart_nonce', '1111111222222333333') self.assertTrue(kv_mock.flush.called) def test_nm_changed_restart_nonce_nochange(self): + '''Ensure no change in restart_trigger skips restarts''' def _relation_get(key): data = { 'ca_cert': 'cert', @@ -329,8 +355,7 @@ class TestQuantumHooks(CharmTestCase): self.assertTrue(self.CONFIGS.write_all.called) self.install_ca_cert.assert_called_with('cert') self.assertFalse(self.service_restart.called) - kv_mock.get.assert_called_with('restart_nonce', - '1111111222222333333') + kv_mock.get.assert_called_with('restart_nonce') self.assertFalse(kv_mock.set.called) self.assertFalse(kv_mock.flush.called) diff --git a/unit_tests/test_neutron_utils.py b/unit_tests/test_neutron_utils.py index a5baffe6..55a71f2e 100644 --- a/unit_tests/test_neutron_utils.py +++ b/unit_tests/test_neutron_utils.py @@ -42,6 +42,7 @@ TO_PATCH = [ 'mkdir', 'copy2', 'NeutronAPIContext', + 'init_is_systemd', ] openstack_origin_git = \ @@ -1335,3 +1336,28 @@ class TestNeutronAgentReallocation(CharmTestCase): asf.assert_called_once_with('some-config') # ports=None whilst port checks are disabled. f.assert_called_once_with('assessor', services=['s1'], ports=None) + + @patch.object(neutron_utils, 'subprocess') + @patch.object(neutron_utils, 'shutil') + @patch('os.path.exists') + def test_install_systemd_override_systemd(self, _os_exists, _shutil, + _subprocess): + ''' + Ensure systemd override is only installed on systemd based systems + ''' + self.init_is_systemd.return_value = True + _os_exists.return_value = False + neutron_utils.install_systemd_override() + _os_exists.assert_called_with( + '/etc/systemd/system/nova-api-metadata.service.d/override.conf' + ) + self.mkdir.assert_called_with( + '/etc/systemd/system/nova-api-metadata.service.d' + ) + _shutil.copy.assert_called_with( + 'files/override.conf', + '/etc/systemd/system/nova-api-metadata.service.d/override.conf' + ) + _subprocess.check_call.assert_called_with( + ['systemctl', 'daemon-reload'] + )