Negative Taskmanager Resize/Migration fixes.

* Revert barrier should be right after Verify_RESIZE is confirmed, but
  before confirming flavor for a resize action.
* Verify nova server status before restarting mysql.
  If anything goes wrong during resize/migrate actions, taskmanager
  first checks that the Nova server status is ACTIVE and only then make
  the call to restart MySQL.

Fixes: bug #1102523

Change-Id: Ibca436d7fdcdef9f1afcec111da84891cd15353c
This commit is contained in:
Joe Cruz 2013-01-18 08:55:09 -06:00 committed by Joe Cruz
parent 52344398de
commit d9c9a91642
4 changed files with 53 additions and 11 deletions

View File

@ -83,6 +83,7 @@ server_delete_time_out=10
use_nova_server_volume = False
dns_time_out = 120
resize_time_out = 120
revert_time_out = 120
# ============ notifer queue kombu connection options ========================

View File

@ -90,6 +90,7 @@ common_opts = [
cfg.StrOpt('service_options', default=['mysql']),
cfg.IntOpt('dns_time_out', default=60 * 2),
cfg.IntOpt('resize_time_out', default=60 * 10),
cfg.IntOpt('revert_time_out', default=60 * 10),
]

View File

@ -50,6 +50,7 @@ CONF = cfg.CONF
VOLUME_TIME_OUT = CONF.volume_time_out # seconds.
DNS_TIME_OUT = CONF.dns_time_out # seconds.
RESIZE_TIME_OUT = CONF.resize_time_out # seconds.
REVERT_TIME_OUT = CONF.revert_time_out # seconds.
use_nova_server_volume = CONF.use_nova_server_volume
@ -423,7 +424,7 @@ class ResizeActionBase(object):
sleep_time=2,
time_out=RESIZE_TIME_OUT)
def _assert_nova_was_successful(self):
def _assert_nova_status_is_ok(self):
# Make sure Nova thinks things went well.
if self.instance.server.status != "VERIFY_RESIZE":
msg = "Migration failed! status=%s and not %s" \
@ -480,11 +481,13 @@ class ResizeActionBase(object):
self._initiate_nova_action()
LOG.debug("Waiting for nova action")
self._wait_for_nova_action()
LOG.debug("Asserting success")
self._assert_nova_was_successful()
LOG.debug("Asserting processes are OK")
LOG.debug("Asserting nova status is ok")
self._assert_nova_status_is_ok()
need_to_revert = True
LOG.debug("* * * REVERT BARRIER PASSED * * *")
LOG.debug("Asserting nova action success")
self._assert_nova_action_was_successful()
LOG.debug("Asserting processes are OK")
self._assert_processes_are_ok()
LOG.debug("Confirming nova action")
self._confirm_nova_action()
@ -494,7 +497,15 @@ class ResizeActionBase(object):
LOG.error("Reverting action for instance %s" %
self.instance.id)
self._revert_nova_action()
self.instance.guest.restart()
self._wait_for_revert_nova_action()
if self.instance.server.status == 'ACTIVE':
LOG.error("Restarting MySQL.")
self.instance.guest.restart()
else:
LOG.error("Can not restart MySQL because "
"Nova server status is not ACTIVE")
LOG.error("Error resizing instance %s." % self.instance.id)
raise ex
@ -511,6 +522,16 @@ class ResizeActionBase(object):
sleep_time=2,
time_out=RESIZE_TIME_OUT)
def _wait_for_revert_nova_action(self):
# Wait for the server to return to ACTIVE after revert.
def update_server_info():
self.instance._refresh_compute_server_info()
return self.instance.server.status == 'ACTIVE'
utils.poll_until(
update_server_info,
sleep_time=2,
time_out=REVERT_TIME_OUT)
class ResizeAction(ResizeActionBase):
@ -519,13 +540,12 @@ class ResizeAction(ResizeActionBase):
self.new_flavor_id = new_flavor_id
self.new_memory_size = new_memory_size
def _assert_nova_was_successful(self):
def _assert_nova_action_was_successful(self):
# Do check to make sure the status and flavor id are correct.
if str(self.instance.server.flavor['id']) != str(self.new_flavor_id):
msg = "Assertion failed! flavor_id=%s and not %s" \
% (self.instance.server.flavor['id'], self.new_flavor_id)
raise ReddwarfError(msg)
super(ResizeAction, self)._assert_nova_was_successful()
def _initiate_nova_action(self):
self.instance.server.resize(self.new_flavor_id)
@ -541,6 +561,9 @@ class ResizeAction(ResizeActionBase):
class MigrateAction(ResizeActionBase):
def _assert_nova_action_was_successful(self):
LOG.debug("Currently no assertions for a Migrate Action")
def _initiate_nova_action(self):
LOG.debug("Migrating instance %s without flavor change ..."
% self.instance.id)

View File

@ -61,6 +61,7 @@ class ResizeTestBase(TestCase):
self.mock.StubOutWithMock(self.instance, 'update_db')
self.mock.StubOutWithMock(self.instance,
'_set_service_status_to_paused')
self.poll_until_mocked = False
self.action = None
def _teardown(self):
@ -81,7 +82,9 @@ class ResizeTestBase(TestCase):
self.server.status = new_status
self.instance.server.flavor['id'] = new_flavor_id
self.mock.StubOutWithMock(utils, "poll_until")
if not self.poll_until_mocked:
self.mock.StubOutWithMock(utils, "poll_until")
self.poll_until_mocked = True
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)\
.WithSideEffects(lambda ignore, sleep_time, time_out: change())
@ -113,6 +116,7 @@ class ResizeTests(ResizeTestBase):
def test_nova_wont_resize(self):
self._stop_mysql()
self.server.resize(NEW_FLAVOR_ID).AndRaise(BadRequest)
self.server.status = "ACTIVE"
self.guest.restart()
def test_nova_resize_timeout(self):
@ -122,19 +126,19 @@ class ResizeTests(ResizeTestBase):
self.mock.StubOutWithMock(utils, 'poll_until')
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)\
.AndRaise(PollTimeOut)
self.guest.restart()
def test_nova_doesnt_change_flavor(self):
self._stop_mysql()
self.server.resize(NEW_FLAVOR_ID)
self._server_changes_to("VERIFY_RESIZE", OLD_FLAVOR_ID)
self.instance.server.revert_resize()
self._server_changes_to("ACTIVE", OLD_FLAVOR_ID)
self.guest.restart()
def test_nova_resize_fails(self):
self._stop_mysql()
self.server.resize(NEW_FLAVOR_ID)
self._server_changes_to("ACTIVE", OLD_FLAVOR_ID)
self.guest.restart()
self._server_changes_to("ERROR", OLD_FLAVOR_ID)
def test_nova_resizes_in_weird_state(self):
self._stop_mysql()
@ -150,6 +154,7 @@ class ResizeTests(ResizeTestBase):
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)\
.AndRaise(PollTimeOut)
self.instance.server.revert_resize()
self._server_changes_to("ACTIVE", OLD_FLAVOR_ID)
self.guest.restart()
def test_mysql_is_not_okay(self):
@ -160,6 +165,8 @@ class ResizeTests(ResizeTestBase):
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)
self._start_mysql()
self.instance.server.revert_resize()
self._server_changes_to("ACTIVE", OLD_FLAVOR_ID)
self.guest.restart()
def test_confirm_resize_fails(self):
self._stop_mysql()
@ -171,6 +178,16 @@ class ResizeTests(ResizeTestBase):
self.server.status = "SHUTDOWN"
self.instance.server.confirm_resize()
def test_revert_nova_fails(self):
self._stop_mysql()
self._nova_resizes_successfully()
self.instance._set_service_status_to_paused()
self.instance.service_status = ServiceStatuses.PAUSED
utils.poll_until(mox.IgnoreArg(), sleep_time=2, time_out=120)\
.AndRaise(PollTimeOut)
self.instance.server.revert_resize()
self._server_changes_to("ERROR", OLD_FLAVOR_ID)
@test(groups=[GROUP, GROUP + '.migrate'])
class MigrateTests(ResizeTestBase):