Fixes the way to check db instance status

This PR changes the way to check instance status. Since Victoria, trove
has change the db instance status name from `RUNNING` to `HEALTHY`[1].

Original problem:
Some clustering databases like Apache Cassandra fail to update cluster
status because they check db instance status using `RUNNING`.

[1]: a0a10f0b94

Story: 2010147
Task: 45791
Change-Id: Iaa032fb46ed51b6e416e7d4efdfd272924ba146b
This commit is contained in:
Hirotaka Wakabayashi 2022-07-15 18:36:06 +09:00 committed by wu.chunyang
parent 87ba3b7876
commit c167159c4a
4 changed files with 72 additions and 1 deletions

View File

@ -0,0 +1,6 @@
---
fixes:
- |
Fix the way to check instance status. Since Victoria, Trove has change
the database instance status name from `RUNNING` to `HEALTHY` but
some clustering databases like Apache Cassandra stil used RUNNING.

View File

@ -238,7 +238,7 @@ class CassandraClusterTasks(task_models.ClusterTasks):
node['guest'].node_cleanup()
LOG.debug("Waiting for node to finish its "
"cleanup: %s", nid)
if not self._all_instances_running([nid], cluster_id):
if not self._all_instances_healthy([nid], cluster_id):
LOG.warning("Node did not complete cleanup "
"successfully: %s", nid)

View File

@ -230,6 +230,17 @@ class ClusterTasks(Cluster):
]
)
def _all_instances_healthy(self, instance_ids, cluster_id, shard_id=None):
"""Wait for all instances to become HEALTHY."""
return self._all_instances_acquire_status(
instance_ids, cluster_id, shard_id,
srvstatus.ServiceStatuses.HEALTHY,
fast_fail_statuses=[
srvstatus.ServiceStatuses.FAILED,
srvstatus.ServiceStatuses.FAILED_TIMEOUT_GUESTAGENT
]
)
def _all_instances_acquire_status(
self, instance_ids, cluster_id, shard_id, expected_status,
fast_fail_statuses=None):

View File

@ -34,6 +34,60 @@ from trove.instance.service_status import ServiceStatuses
from trove.tests.unittests import trove_testtools
class CassandraClusterTasksTest(trove_testtools.TestCase):
def setUp(self):
super(CassandraClusterTasksTest, self).setUp()
self.cluster_id = "1234"
self.cluster_name = "test1"
self.tenant_id = "2345"
self.db_cluster = DBCluster(ClusterTaskStatus.NONE,
id=self.cluster_id,
created=str(datetime.date),
updated=str(datetime.date),
name=self.cluster_name,
task_id=ClusterTaskStatus.NONE._code,
tenant_id=self.tenant_id,
datastore_version_id="1",
deleted=False)
self.dbinst1 = DBInstance(InstanceTasks.NONE, id="1", name="member1",
compute_instance_id="compute-1",
task_id=InstanceTasks.NONE._code,
task_description=InstanceTasks.NONE._db_text,
volume_id="volume-1",
datastore_version_id="1",
cluster_id=self.cluster_id,
shard_id="shard-1",
type="member")
self.dbinst2 = DBInstance(InstanceTasks.NONE, id="2", name="member2",
compute_instance_id="compute-2",
task_id=InstanceTasks.NONE._code,
task_description=InstanceTasks.NONE._db_text,
volume_id="volume-2",
datastore_version_id="1",
cluster_id=self.cluster_id,
shard_id="shard-1",
type="member")
mock_ds1 = Mock()
mock_ds1.name = 'cassandra'
mock_dv1 = Mock()
mock_dv1.name = '4.0.0'
self.clustertasks = ClusterTasks(Mock(),
self.db_cluster,
datastore=mock_ds1,
datastore_version=mock_dv1)
@patch.object(DBInstance, 'find_by')
@patch.object(InstanceServiceStatus, 'find_by')
def test_all_instances_healthy(self, mock_find, mock_db_find):
(mock_find.return_value.
get_status.return_value) = ServiceStatuses.HEALTHY
(mock_db_find.return_value.
get_task_status.return_value) = InstanceTasks.NONE
ret_val = self.clustertasks._all_instances_healthy(["1", "2"],
self.cluster_id)
self.assertTrue(ret_val)
class MongoDbClusterTasksTest(trove_testtools.TestCase):
def setUp(self):
super(MongoDbClusterTasksTest, self).setUp()