From c274ab9f1a9b270c0ec2cb2d4aebe052aa37d416 Mon Sep 17 00:00:00 2001 From: Lingxian Kong Date: Wed, 7 Jul 2021 09:34:17 +1200 Subject: [PATCH] Keep user defined configuration after resizing instance * Never remove user defined config, changing the function name from save_configuration to reset_configuration in trove-guestagent. * Improved some logs * Do not remove Innodb Log Files after resize which will cause error: Can't open and lock privilege tables: Table './mysql/user' is marked as crashed and should be repaired Story: 2009033 Task: 42773 Change-Id: I9e3165ed9b38b15714542e35456415e65d438497 --- .../notes/xena-fix-resize-instance.yaml | 4 ++ trove/guestagent/api.py | 11 +++-- trove/guestagent/common/configuration.py | 19 +++++---- trove/guestagent/datastore/manager.py | 12 ++++-- .../datastore/mysql_common/manager.py | 4 +- .../datastore/mysql_common/service.py | 5 --- .../guestagent/datastore/postgres/manager.py | 2 +- .../guestagent/datastore/postgres/service.py | 3 -- trove/guestagent/datastore/service.py | 7 +++- trove/taskmanager/models.py | 40 ++++++++----------- 10 files changed, 58 insertions(+), 49 deletions(-) create mode 100644 releasenotes/notes/xena-fix-resize-instance.yaml diff --git a/releasenotes/notes/xena-fix-resize-instance.yaml b/releasenotes/notes/xena-fix-resize-instance.yaml new file mode 100644 index 0000000000..32412dafc8 --- /dev/null +++ b/releasenotes/notes/xena-fix-resize-instance.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - Fixed an issue that the replication configuration is lost after resizing + instance. diff --git a/trove/guestagent/api.py b/trove/guestagent/api.py index eb50b464c9..fc14610097 100644 --- a/trove/guestagent/api.py +++ b/trove/guestagent/api.py @@ -408,7 +408,10 @@ class API(object): self._call("restart", self.agent_high_timeout, version=version) def start_db_with_conf_changes(self, config_contents, ds_version): - """Start the database server.""" + """Start the database with given configuration. + + This method is called after resize. + """ LOG.debug("Sending the call to start the database process on " "the Guest with a timeout of %s.", self.agent_high_timeout) @@ -424,8 +427,10 @@ class API(object): version=version, **start_args) def reset_configuration(self, configuration): - """Ignore running state of the database server; just change - the config file to a new flavor. + """Reset the database base configuration. + + Ignore running state of the database server, just change the config + file to a new flavor. """ LOG.debug("Sending the call to change the database conf file on the " "Guest with a timeout of %s.", diff --git a/trove/guestagent/common/configuration.py b/trove/guestagent/common/configuration.py index 9ea7d4e1fe..b3165080f2 100644 --- a/trove/guestagent/common/configuration.py +++ b/trove/guestagent/common/configuration.py @@ -132,20 +132,23 @@ class ConfigurationManager(object): return base_options - def save_configuration(self, options): + def reset_configuration(self, options, remove_overrides=False): """Write given contents to the base configuration file. - Remove all existing overrides (both system and user). - :param options Contents of the configuration file. - :type options string or dict + Remove all existing overrides (both system and user) as required. + + :param options: Contents of the configuration file (string or dict). + :param remove_overrides: Remove the overrides or not. """ if isinstance(options, dict): # Serialize a dict of options for writing. - self.save_configuration(self._codec.serialize(options)) + self.reset_configuration(self._codec.serialize(options), + remove_overrides=remove_overrides) else: - self._override_strategy.remove(self.USER_GROUP) - self._override_strategy.remove(self.SYSTEM_PRE_USER_GROUP) - self._override_strategy.remove(self.SYSTEM_POST_USER_GROUP) + if remove_overrides: + self._override_strategy.remove(self.USER_GROUP) + self._override_strategy.remove(self.SYSTEM_PRE_USER_GROUP) + self._override_strategy.remove(self.SYSTEM_POST_USER_GROUP) operating_system.write_file( self._base_config_path, options, as_root=self._requires_root) diff --git a/trove/guestagent/datastore/manager.py b/trove/guestagent/datastore/manager.py index f290e49ab8..2bdd3c4866 100644 --- a/trove/guestagent/datastore/manager.py +++ b/trove/guestagent/datastore/manager.py @@ -328,6 +328,10 @@ class Manager(periodic_task.PeriodicTasks): pass def start_db_with_conf_changes(self, context, config_contents, ds_version): + """Start the database with given configuration. + + This method is called after resize. + """ self.app.start_db_with_conf_changes(config_contents, ds_version) def stop_db(self, context): @@ -392,15 +396,17 @@ class Manager(periodic_task.PeriodicTasks): # Configuration ############### def reset_configuration(self, context, configuration): - """The default implementation should be sufficient if a + """Reset database base configuration. + + The default implementation should be sufficient if a configuration_manager is provided. Even if one is not, this method needs to be implemented to allow the rollback of flavor-resize on the guestagent side. """ - LOG.info("Resetting configuration.") if self.configuration_manager: + LOG.info("Resetting configuration.") config_contents = configuration['config_contents'] - self.configuration_manager.save_configuration(config_contents) + self.configuration_manager.reset_configuration(config_contents) def apply_overrides_on_prepare(self, context, overrides): self.update_overrides(context, overrides) diff --git a/trove/guestagent/datastore/mysql_common/manager.py b/trove/guestagent/datastore/mysql_common/manager.py index e27b47ffaf..538c29e1ed 100644 --- a/trove/guestagent/datastore/mysql_common/manager.py +++ b/trove/guestagent/datastore/mysql_common/manager.py @@ -79,7 +79,7 @@ class MySqlManager(manager.Manager): # Prepare mysql configuration LOG.info('Preparing database configuration') - self.app.configuration_manager.save_configuration(config_contents) + self.app.configuration_manager.reset_configuration(config_contents) self.app.update_overrides(overrides) # Restore data from backup and reset root password @@ -310,7 +310,7 @@ class MySqlManager(manager.Manager): try: # Prepare mysql configuration LOG.debug('Preparing database configuration') - self.app.configuration_manager.save_configuration(config_contents) + self.app.configuration_manager.reset_configuration(config_contents) self.app.update_overrides(config_overrides) # Start database service. diff --git a/trove/guestagent/datastore/mysql_common/service.py b/trove/guestagent/datastore/mysql_common/service.py index 64abe54204..4efa091cca 100644 --- a/trove/guestagent/datastore/mysql_common/service.py +++ b/trove/guestagent/datastore/mysql_common/service.py @@ -641,11 +641,6 @@ class BaseMySqlApp(service.BaseDbApp): LOG.exception("Could not delete logfile.") raise - def reset_configuration(self, configuration): - LOG.info("Resetting configuration.") - self.configuration_manager.save_configuration(configuration) - self.wipe_ib_logfiles() - def restart(self): LOG.info("Restarting mysql") diff --git a/trove/guestagent/datastore/postgres/manager.py b/trove/guestagent/datastore/postgres/manager.py index 2a2da7f0ab..b233d5b350 100644 --- a/trove/guestagent/datastore/postgres/manager.py +++ b/trove/guestagent/datastore/postgres/manager.py @@ -54,7 +54,7 @@ class PostgresManager(manager.Manager): as_root=True) LOG.info('Preparing database config files') - self.app.configuration_manager.save_configuration(config_contents) + self.app.configuration_manager.reset_configuration(config_contents) self.app.set_data_dir(self.app.datadir) self.app.update_overrides(overrides) diff --git a/trove/guestagent/datastore/postgres/service.py b/trove/guestagent/datastore/postgres/service.py index 4c2435f039..a2cc986516 100644 --- a/trove/guestagent/datastore/postgres/service.py +++ b/trove/guestagent/datastore/postgres/service.py @@ -147,9 +147,6 @@ class PgSqlApp(service.BaseDbApp): if overrides: self.configuration_manager.apply_user_override(overrides) - def reset_configuration(self, configuration): - self.configuration_manager.save_configuration(configuration) - def apply_overrides(self, overrides): """Reload config.""" cmd = "pg_ctl reload" diff --git a/trove/guestagent/datastore/service.py b/trove/guestagent/datastore/service.py index fe30652d2a..a10f821de6 100644 --- a/trove/guestagent/datastore/service.py +++ b/trove/guestagent/datastore/service.py @@ -378,7 +378,8 @@ class BaseDbApp(object): self.configuration_manager.remove_user_override() def reset_configuration(self, configuration): - pass + LOG.info("Resetting configuration.") + self.configuration_manager.reset_configuration(configuration) def stop_db(self, update_db=False): LOG.info("Stopping database.") @@ -399,6 +400,10 @@ class BaseDbApp(object): pass def start_db_with_conf_changes(self, config_contents, ds_version): + """Start the database with given configuration. + + This method is called after resize. + """ LOG.info(f"Starting database service with new configuration and " f"datastore version {ds_version}.") diff --git a/trove/taskmanager/models.py b/trove/taskmanager/models.py index 5be6f08c1d..9626e29f54 100755 --- a/trove/taskmanager/models.py +++ b/trove/taskmanager/models.py @@ -58,7 +58,6 @@ from trove.common.utils import try_recover from trove.extensions.mysql import models as mysql_models from trove.instance import models as inst_models from trove.instance import service_status as srvstatus -from trove.instance.models import BuiltInstance from trove.instance.models import DBInstance from trove.instance.models import FreshInstance from trove.instance.models import Instance @@ -1093,7 +1092,7 @@ class FreshInstanceTasks(FreshInstance, NotifyMixin, ConfigurationMixin): return sg_id -class BuiltInstanceTasks(BuiltInstance, NotifyMixin, ConfigurationMixin): +class BuiltInstanceTasks(Instance, NotifyMixin, ConfigurationMixin): """ BuiltInstanceTasks contains the tasks related an instance that already associated with a compute server. @@ -1193,7 +1192,7 @@ class BuiltInstanceTasks(BuiltInstance, NotifyMixin, ConfigurationMixin): self.reset_task_status() def attach_replica(self, master, restart=False): - LOG.info("Attaching replica %s to master %s", self.id, master.id) + LOG.info("Attaching replica %s to primary %s", self.id, master.id) try: replica_info = master.guest.get_replica_context() flavor = self.nova_client.flavors.get(self.flavor_id) @@ -1856,15 +1855,17 @@ class ResizeActionBase(object): pass def _assert_datastore_is_ok(self): + LOG.info(f"Re-config database for instance {self.instance.id} after " + f"resize") self._start_datastore() def _assert_processes_are_ok(self): """Checks the procs; if anything is wrong, reverts the operation.""" # Tell the guest to turn back on, and make sure it can start. + LOG.info(f"Waiting for database status changed after resizing " + f"{self.instance.id}") self._assert_guest_is_ok() - LOG.debug("Nova guest is ok.") self._assert_datastore_is_ok() - LOG.debug("Datastore is ok.") def _confirm_nova_action(self): LOG.debug("Instance %s calling Compute confirm resize...", @@ -1893,7 +1894,7 @@ class ResizeActionBase(object): def execute(self): """Initiates the action.""" try: - LOG.debug("Instance %s calling stop_db...", self.instance.id) + LOG.info(f"Stopping database service for {self.instance.id}") self.instance.guest.stop_db(do_not_start_on_reboot=True) except Exception as e: if self.ignore_stop_error: @@ -1927,25 +1928,17 @@ class ResizeActionBase(object): def _perform_nova_action(self): """Calls Nova to resize or migrate an instance, and confirms.""" - LOG.debug("Begin resize method _perform_nova_action instance: %s", - self.instance.id) need_to_revert = False try: - LOG.debug("Initiating nova action") self._initiate_nova_action() - LOG.debug("Waiting for nova action completed") self._wait_for_nova_action() - LOG.debug("Asserting nova status is ok") self._assert_nova_status_is_ok() need_to_revert = True - LOG.debug("Asserting nova action success") self._assert_nova_action_was_successful() - LOG.debug("Asserting processes are OK") self._assert_processes_are_ok() - LOG.debug("Confirming nova action") self._confirm_nova_action() except Exception: - LOG.exception("Exception during nova action.") + LOG.exception(f"Failed to resize instance {self.instance.id}") if need_to_revert: LOG.error("Reverting action for instance %s", self.instance.id) @@ -1953,13 +1946,12 @@ class ResizeActionBase(object): self._wait_for_revert_nova_action() if self.instance.server_status_matches(['ACTIVE']): - LOG.error("Restarting datastore.") + LOG.error(f"Restarting instance {self.instance.id}") self.instance.guest.restart() else: - LOG.error("Cannot restart datastore because " - "Nova server status is not ACTIVE") + LOG.error(f"Cannot restart instance {self.instance.id} " + f"because Nova server status is not ACTIVE") - LOG.error("Error resizing instance %s.", self.instance.id) raise self._record_action_success() @@ -1967,8 +1959,8 @@ class ResizeActionBase(object): self.instance.id) def _wait_for_nova_action(self): - LOG.info(f"Waiting for Nova server status changed to " - f"{self.wait_status}") + LOG.debug(f"Waiting for Nova server status changed to " + f"{self.wait_status} for {self.instance.id}") def update_server_info(): self.instance.refresh_compute_server_info() @@ -2014,6 +2006,7 @@ class ResizeAction(ResizeActionBase): raise TroveError(msg) def _initiate_nova_action(self): + LOG.info(f"Resizing Nova server for instance {self.instance.id}") self.instance.server.resize(self.new_flavor_id) def _revert_nova_action(self): @@ -2087,7 +2080,7 @@ class RebuildAction(ResizeActionBase): self.instance.datastore.name, self.instance.datastore_version.name) - LOG.debug(f"Rebuilding Nova server {self.instance.server.id}") + LOG.info(f"Rebuilding Nova server for instance {self.instance.id}") # Before Nova version 2.57, userdata is not supported when doing # rebuild, have to use injected files instead. self.instance.server.rebuild( @@ -2120,7 +2113,8 @@ class RebuildAction(ResizeActionBase): self.instance.datastore_version.version, config_contents=config_contents, config_overrides=overrides) - LOG.info(f"Waiting for instance {self.instance.id} healthy") + LOG.info(f"Waiting for instance {self.instance.id} healthy after " + f"rebuild") self._assert_guest_is_ok() self.wait_for_healthy() LOG.info(f"Finished to rebuild {self.instance.id}")