From c1c51db08d28528f6428fa611b3d0f88fdd65182 Mon Sep 17 00:00:00 2001 From: Robert Church Date: Mon, 21 May 2018 09:10:15 -0500 Subject: [PATCH] Update upgrade code for removing Ceph Cache Tiering Story: 2002884 Task: 22846 Change-Id: I31cf3eb7de935676790cf7e4c1b40307d7110390 Signed-off-by: Don Penney Signed-off-by: Jack Ding --- .../ceph-manager/ceph_manager/constants.py | 2 +- .../ceph-manager/ceph_manager/monitor.py | 44 +++++++++---------- .../ceph-manager/ceph_manager/server.py | 7 +-- 3 files changed, 23 insertions(+), 30 deletions(-) diff --git a/ceph-manager/ceph-manager/ceph_manager/constants.py b/ceph-manager/ceph-manager/ceph_manager/constants.py index ede99b2a..6cfbba4f 100644 --- a/ceph-manager/ceph-manager/ceph_manager/constants.py +++ b/ceph-manager/ceph-manager/ceph_manager/constants.py @@ -65,7 +65,7 @@ CEPH_MANAGER_TOPIC = 'sysinv.ceph_manager' SYSINV_CONFIG_FILE = '/etc/sysinv/sysinv.conf' # Titanium Cloud version strings -TITANIUM_SERVER_VERSION_16_10 = '16.10' +TITANIUM_SERVER_VERSION_18_03 = '18.03' CEPH_HEALTH_WARN_REQUIRE_JEWEL_OSDS_NOT_SET = ( "all OSDs are running jewel or later but the " diff --git a/ceph-manager/ceph-manager/ceph_manager/monitor.py b/ceph-manager/ceph-manager/ceph_manager/monitor.py index c0960fbd..44c56b22 100644 --- a/ceph-manager/ceph-manager/ceph_manager/monitor.py +++ b/ceph-manager/ceph-manager/ceph_manager/monitor.py @@ -22,20 +22,18 @@ import exception LOG = logging.getLogger(__name__) -# When upgrading from 16.10 to 17.x Ceph goes from Hammer release -# to Jewel release. After all storage nodes are upgraded to 17.x -# the cluster is in HEALTH_WARN until administrator explicitly -# enables require_jewel_osds flag - which signals Ceph that it -# can safely transition from Hammer to Jewel +# In 18.03 R5, ceph cache tiering was disabled and prevented from being +# re-enabled. When upgrading from 18.03 (R5) to R6 we need to remove the +# cache-tier from the crushmap ceph-cache-tiering # -# This class is needed only when upgrading from 16.10 to 17.x -# TODO: remove it after 1st 17.x release +# This class is needed only when upgrading from R5 to R6 +# TODO: remove it after 1st R6 release # class HandleUpgradesMixin(object): def __init__(self, service): self.service = service - self.surpress_require_jewel_osds_warning = False + self.wait_for_upgrade_complete = False def setup(self, config): self._set_upgrade(self.service.retry_get_software_upgrade_status()) @@ -45,9 +43,10 @@ class HandleUpgradesMixin(object): from_version = upgrade.get('from_version') if (state and state != constants.UPGRADE_COMPLETED - and from_version == constants.TITANIUM_SERVER_VERSION_16_10): - LOG.info(_LI("Surpress require_jewel_osds health warning")) - self.surpress_require_jewel_osds_warning = True + and from_version == constants.TITANIUM_SERVER_VERSION_18_03): + + LOG.info(_LI("Wait for caph upgrade to complete before monitoring cluster.")) + self.wait_for_upgrade_complete = True def set_flag_require_jewel_osds(self): try: @@ -73,7 +72,7 @@ class HandleUpgradesMixin(object): health = self.auto_heal(health) # filter out require_jewel_osds warning # - if not self.surpress_require_jewel_osds_warning: + if not self.wait_for_upgrade_complete: return health if health['health'] != constants.CEPH_HEALTH_WARN: return health @@ -114,17 +113,16 @@ class HandleUpgradesMixin(object): state = upgrade.get('state') # surpress require_jewel_osds in case upgrade is # in progress but not completed or aborting - if (not self.surpress_require_jewel_osds_warning + if (not self.wait_for_upgrade_complete and (upgrade.get('from_version') - == constants.TITANIUM_SERVER_VERSION_16_10) + == constants.TITANIUM_SERVER_VERSION_18_03) and state not in [ None, constants.UPGRADE_COMPLETED, constants.UPGRADE_ABORTING, constants.UPGRADE_ABORT_COMPLETING, constants.UPGRADE_ABORTING_ROLLBACK]): - LOG.info(_LI("Surpress require_jewel_osds health warning")) - self.surpress_require_jewel_osds_warning = True + self.wait_for_upgrade_complete = True # set require_jewel_osds in case upgrade is # not in progress or completed if (state in [None, constants.UPGRADE_COMPLETED]): @@ -135,16 +133,14 @@ class HandleUpgradesMixin(object): self.set_flag_require_jewel_osds() health = self._remove_require_jewel_osds_warning(health) LOG.info(_LI("Unsurpress require_jewel_osds health warning")) - self.surpress_require_jewel_osds_warning = False + self.wait_for_upgrade_complete = False # unsurpress require_jewel_osds in case upgrade # is aborting - if (self.surpress_require_jewel_osds_warning - and state in [ - constants.UPGRADE_ABORTING, - constants.UPGRADE_ABORT_COMPLETING, - constants.UPGRADE_ABORTING_ROLLBACK]): - LOG.info(_LI("Unsurpress require_jewel_osds health warning")) - self.surpress_require_jewel_osds_warning = False + if (state in [ + constants.UPGRADE_ABORTING, + constants.UPGRADE_ABORT_COMPLETING, + constants.UPGRADE_ABORTING_ROLLBACK]): + self.wait_for_upgrade_complete = False return health diff --git a/ceph-manager/ceph-manager/ceph_manager/server.py b/ceph-manager/ceph-manager/ceph_manager/server.py index c8b96a72..72edf406 100644 --- a/ceph-manager/ceph-manager/ceph_manager/server.py +++ b/ceph-manager/ceph-manager/ceph_manager/server.py @@ -97,9 +97,6 @@ class RpcEndpoint(PeriodicTasks): return self.service.monitor.cluster_is_up -# This class is needed only when upgrading from 16.10 to 17.x -# TODO: remove it after 1st 17.x release -# class SysinvConductorUpgradeApi(object): def __init__(self): self.sysinv_conductor = None @@ -113,10 +110,10 @@ class SysinvConductorUpgradeApi(object): return upgrade @retry(wait_fixed=1000, - retry_on_exception=lambda exception: + retry_on_exception=lambda e: LOG.warn(_LW( "Getting software upgrade status failed " - "with: %s. Retrying... ") % str(exception)) or True) + "with: %s. Retrying... ") % str(e)) or True) def retry_get_software_upgrade_status(self): return self.get_software_upgrade_status()