Fixes a race condition in resize flavor

When resizing an instance's flavor, Trove asks the guest to start the
datastore via an RPC call and then, to be safe, would check the
service_status table in the database to make sure it was updated to
RUNNING. The thing is, that second check was not only superfluous, it
was causing resizes to fail in the post-conductor world, because while
the guest would have sent a message to conductor describing the
datastore's current state before it informed taskmanager that it had
finished the call, conductor itself might not have updated the
database. This commit changes things so taskmanager polls until the
service_status is RUNNING after calling the guest. If there is an error
the call to the guest will fail anyway.

Change-Id: I4e1ca75a150ed58233c21372d21c6337596e43d0
Closes-Bug: 1278282
This commit is contained in:
Tim Simpson 2014-02-11 09:41:25 -06:00
parent c72edbdec1
commit c85e5260e8
2 changed files with 14 additions and 6 deletions

View File

@ -1181,13 +1181,12 @@ class ResizeActionBase(ConfigurationMixin):
def _assert_mysql_is_ok(self):
# Tell the guest to turn on MySQL, and ensure the status becomes
# ACTIVE.
# RUNNING.
self._start_mysql()
# The guest should do this for us... but sometimes it walks funny.
self.instance._refresh_compute_service_status()
if self.instance.service_status != rd_instance.ServiceStatuses.RUNNING:
raise Exception("Migration failed! Service status was %s."
% self.instance.service_status)
utils.poll_until(
self._datastore_is_online,
sleep_time=2,
time_out=RESIZE_TIME_OUT)
def _assert_processes_are_ok(self):
"""Checks the procs; if anything is wrong, reverts the operation."""
@ -1202,6 +1201,11 @@ class ResizeActionBase(ConfigurationMixin):
% self.instance.id)
self.instance.server.confirm_resize()
def _datastore_is_online(self):
self.instance._refresh_compute_service_status()
return (self.instance.service_status ==
rd_instance.ServiceStatuses.RUNNING)
def _revert_nova_action(self):
LOG.debug(_("Instance %s calling Compute revert resize...")
% self.instance.id)

View File

@ -178,6 +178,8 @@ class ResizeTests(ResizeTestBase):
self.instance.service_status = rd_instance.ServiceStatuses.SHUTDOWN
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)
self._start_mysql()
utils.poll_until(mox.IgnoreArg(), sleep_time=2,
time_out=120).AndRaise(PollTimeOut)
self.instance.guest.reset_configuration(mox.IgnoreArg())
self.instance.server.revert_resize()
self._server_changes_to("ACTIVE", OLD_FLAVOR_ID)
@ -190,6 +192,7 @@ class ResizeTests(ResizeTestBase):
self.instance.service_status = rd_instance.ServiceStatuses.RUNNING
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)
self._start_mysql()
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)
self.server.status = "SHUTDOWN"
self.instance.server.confirm_resize()
@ -236,4 +239,5 @@ class MigrateTests(ResizeTestBase):
self.instance.service_status = rd_instance.ServiceStatuses.RUNNING
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)
self._start_mysql()
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)
self.instance.server.confirm_resize()