From dd9ca375dcf651ba892bd73425cfa1e39eab651c Mon Sep 17 00:00:00 2001 From: Ngairangbam Mili Date: Tue, 2 Jul 2024 09:04:22 +0000 Subject: [PATCH] Host Software Deployment (USM) Story: 2010676 Task: 50141 Change-Id: I2ab679259b22c71d1e544a036d2cfa5cc8f81a2c Signed-off-by: Ngairangbam Mili --- doc/source/_vendor/rl-strings.txt | 27 - ...-regionone-and-subclouds-using-the-cli.rst | 5 +- ...ing-the-systemcontroller-using-the-cli.rst | 2 - ...ates-to-systemcontroller-using-horizon.rst | 12 +- .../aborting-simplex-system-upgrades.rst | 110 --- ...onfigure-firmware-update-orchestration.rst | 5 +- .../configuring-update-orchestration.rst | 194 ----- ...version-and-update-level-using-horizon.rst | 35 - ...version-and-update-level-using-the-cli.rst | 58 -- ...ersus-reboot-required-software-updates.rst | 23 - .../index-updates-kub-03d4d10fa0be.rst | 141 ++-- ...-service-software-update-using-horizon.rst | 81 --- ...service-software-updates-using-the-cli.rst | 131 ---- ...equired-software-updates-using-horizon.rst | 126 ---- ...equired-software-updates-using-the-cli.rst | 300 -------- ...e-updates-before-initial-commissioning.rst | 105 --- .../kubernetes/managing-software-updates.rst | 112 --- ...-host-software-deployment-ee17ec6f71a4.rst | 674 ++++++++++++++++++ ...-host-software-deployment-24f47e80e518.rst | 86 +++ ...-host-software-deployment-9295ce1e6e29.rst | 460 ++++++++++++ .../kubernetes/manual-upgrade-overview.rst | 47 -- ...-host-software-deployment-d234754c7d20.rst | 670 +++++++++++++++++ ...-host-software-deployment-3f542895daf8.rst | 135 ++++ ...-host-software-deployment-c6b12f13a8a1.rst | 255 +++++++ .../orchestration-upgrade-overview.rst | 139 ---- .../overview-of-upgrade-abort-procedure.rst | 29 - ...-an-orchestrated-upgrade-using-the-cli.rst | 340 --------- .../performing-an-orchestrated-upgrade.rst | 184 ----- .../populating-the-storage-area.rst | 82 --- .../kubernetes/reclaiming-disk-space.rst | 95 --- ...oving-reboot-required-software-updates.rst | 117 --- ...de-after-the-second-controller-upgrade.rst | 160 ----- ...e-before-the-second-controller-upgrade.rst | 78 -- .../software-update-space-reclamation.rst | 18 - ...-updates-and-upgrades-software-updates.rst | 12 +- .../updates/kubernetes/software-upgrades.rst | 28 +- .../kubernetes/update-orchestration-cli.rst | 69 -- .../update-orchestration-overview.rst | 95 --- .../update-status-and-lifecycle.rst | 76 -- ...pgrading-all-in-one-duplex-or-standard.rst | 566 --------------- .../upgrading-all-in-one-simplex.rst | 483 ------------- 41 files changed, 2355 insertions(+), 4010 deletions(-) delete mode 100644 doc/source/updates/kubernetes/aborting-simplex-system-upgrades.rst delete mode 100644 doc/source/updates/kubernetes/configuring-update-orchestration.rst delete mode 100644 doc/source/updates/kubernetes/identifying-the-software-version-and-update-level-using-horizon.rst delete mode 100644 doc/source/updates/kubernetes/identifying-the-software-version-and-update-level-using-the-cli.rst delete mode 100644 doc/source/updates/kubernetes/in-service-versus-reboot-required-software-updates.rst delete mode 100644 doc/source/updates/kubernetes/installing-in-service-software-update-using-horizon.rst delete mode 100644 doc/source/updates/kubernetes/installing-in-service-software-updates-using-the-cli.rst delete mode 100644 doc/source/updates/kubernetes/installing-reboot-required-software-updates-using-horizon.rst delete mode 100644 doc/source/updates/kubernetes/installing-reboot-required-software-updates-using-the-cli.rst delete mode 100644 doc/source/updates/kubernetes/installing-software-updates-before-initial-commissioning.rst delete mode 100644 doc/source/updates/kubernetes/managing-software-updates.rst create mode 100644 doc/source/updates/kubernetes/manual-host-software-deployment-ee17ec6f71a4.rst create mode 100644 doc/source/updates/kubernetes/manual-removal-host-software-deployment-24f47e80e518.rst create mode 100644 doc/source/updates/kubernetes/manual-rollback-host-software-deployment-9295ce1e6e29.rst delete mode 100644 doc/source/updates/kubernetes/manual-upgrade-overview.rst create mode 100644 doc/source/updates/kubernetes/orchestrated-deployment-host-software-deployment-d234754c7d20.rst create mode 100644 doc/source/updates/kubernetes/orchestrated-removal-host-software-deployment-3f542895daf8.rst create mode 100644 doc/source/updates/kubernetes/orchestrated-rollback-host-software-deployment-c6b12f13a8a1.rst delete mode 100644 doc/source/updates/kubernetes/orchestration-upgrade-overview.rst delete mode 100644 doc/source/updates/kubernetes/overview-of-upgrade-abort-procedure.rst delete mode 100644 doc/source/updates/kubernetes/performing-an-orchestrated-upgrade-using-the-cli.rst delete mode 100644 doc/source/updates/kubernetes/performing-an-orchestrated-upgrade.rst delete mode 100644 doc/source/updates/kubernetes/populating-the-storage-area.rst delete mode 100644 doc/source/updates/kubernetes/reclaiming-disk-space.rst delete mode 100644 doc/source/updates/kubernetes/removing-reboot-required-software-updates.rst delete mode 100644 doc/source/updates/kubernetes/rolling-back-a-software-upgrade-after-the-second-controller-upgrade.rst delete mode 100644 doc/source/updates/kubernetes/rolling-back-a-software-upgrade-before-the-second-controller-upgrade.rst delete mode 100644 doc/source/updates/kubernetes/software-update-space-reclamation.rst delete mode 100644 doc/source/updates/kubernetes/update-orchestration-cli.rst delete mode 100644 doc/source/updates/kubernetes/update-orchestration-overview.rst delete mode 100644 doc/source/updates/kubernetes/update-status-and-lifecycle.rst delete mode 100644 doc/source/updates/kubernetes/upgrading-all-in-one-duplex-or-standard.rst delete mode 100644 doc/source/updates/kubernetes/upgrading-all-in-one-simplex.rst diff --git a/doc/source/_vendor/rl-strings.txt b/doc/source/_vendor/rl-strings.txt index c1359f2ee..6a91a4631 100644 --- a/doc/source/_vendor/rl-strings.txt +++ b/doc/source/_vendor/rl-strings.txt @@ -535,45 +535,18 @@ .. |apply-update-to-the-openstack-application| replace:: :ref:`Apply Update to the OpenStack Application ` .. |software-updates-and-upgrades-overview| replace:: :ref:`Overview ` .. |index-updates-e3b970bb69ce| replace:: :ref:`Updates and Upgrades ` -.. |update-orchestration-cli| replace:: :ref:`Update Orchestration CLI ` .. |about-kubernetes-orchestrated-upgrades| replace:: :ref:`About Kubernetes Version Upgrade Cloud Orchestration ` -.. |managing-software-updates| replace:: :ref:`Manage Software Updates ` -.. |update-status-and-lifecycle| replace:: :ref:`Update Status and Lifecycle ` .. |the-kubernetes-update-orchestration-process| replace:: :ref:`Kubernetes Version Upgrade Cloud Orchestration Overview ` -.. |identifying-the-software-version-and-update-level-using-horizon| replace:: :ref:`Identify the Software Version and Update Level Using Horizon ` -.. |removing-reboot-required-software-updates| replace:: :ref:`Remove Reboot-Required Software Updates ` -.. |installing-reboot-required-software-updates-using-horizon| replace:: :ref:`Install Reboot-Required Software Updates Using Horizon ` -.. |performing-an-orchestrated-upgrade| replace:: :ref:`Perform an Orchestrated Upgrade ` -.. |orchestration-upgrade-overview| replace:: :ref:`Upgrade Orchestration Overview ` .. |handle-firmware-update-orchestration-failures| replace:: :ref:`Handle Firmware Update Orchestration Failures ` -.. |configuring-update-orchestration| replace:: :ref:`Configure Update Orchestration ` -.. |manual-upgrade-overview| replace:: :ref:`Manual Upgrade Overview ` -.. |upgrading-all-in-one-simplex| replace:: :ref:`Upgrade All-in-One Simplex ` -.. |software-update-space-reclamation| replace:: :ref:`Software Update Space Reclamation ` -.. |aborting-simplex-system-upgrades| replace:: :ref:`Abort Simplex System Upgrades ` -.. |rolling-back-a-software-upgrade-after-the-second-controller-upgrade| replace:: :ref:`Roll Back a Software Upgrade After the Second Controller Upgrade ` -.. |overview-of-upgrade-abort-procedure| replace:: :ref:`Overview of Upgrade Abort Procedure ` -.. |rolling-back-a-software-upgrade-before-the-second-controller-upgrade| replace:: :ref:`Roll Back a Software Upgrade Before the Second Controller Upgrade ` .. |firmware-update-operations-requiring-manual-migration| replace:: :ref:`Firmware Update Operations Requiring Manual Migration ` -.. |reclaiming-disk-space| replace:: :ref:`Reclaim Disk Space ` .. |firmware-update-orchestration-using-the-cli| replace:: :ref:`Firmware Update Orchestration Using the CLI ` .. |configure-firmware-update-orchestration| replace:: :ref:`Configure Firmware Update Orchestration ` .. |configuring-kubernetes-update-orchestration| replace:: :ref:`Create Kubernetes Version Upgrade Cloud Orchestration Strategy ` -.. |identifying-the-software-version-and-update-level-using-the-cli| replace:: :ref:`Identify the Software Version and Update Level Using the CLI ` .. |software-upgrades| replace:: :ref:`Software Upgrades ` -.. |performing-an-orchestrated-upgrade-using-the-cli| replace:: :ref:`Perform an Orchestrated Upgrade Using the CLI ` -.. |installing-software-updates-before-initial-commissioning| replace:: :ref:`Install Software Updates Before Initial Commissioning ` -.. |update-orchestration-overview| replace:: :ref:`Update Orchestration Overview ` -.. |in-service-versus-reboot-required-software-updates| replace:: :ref:`In-Service Versus Reboot-Required Software Updates ` -.. |installing-in-service-software-updates-using-the-cli| replace:: :ref:`Install In-Service Software Updates Using the CLI ` -.. |installing-in-service-software-update-using-horizon| replace:: :ref:`Install In-Service Software Update Using Horizon ` -.. |populating-the-storage-area| replace:: :ref:`Populate the Storage Area ` .. |configuring-kubernetes-multi-version-upgrade-orchestration-aio-b0b59a346466| replace:: :ref:`Configure Kubernetes Multi-Version Upgrade Cloud Orchestration for AIO-SX ` .. |manual-kubernetes-components-upgrade| replace:: :ref:`Manual Kubernetes Version Upgrade ` .. |overview-of-firmware-update-orchestration| replace:: :ref:`Overview ` .. |handling-kubernetes-update-orchestration-failures| replace:: :ref:`Handle Kubernetes Version Upgrade Orchestration Failures ` -.. |upgrading-all-in-one-duplex-or-standard| replace:: :ref:`Upgrade All-in-One Duplex / Standard ` -.. |installing-reboot-required-software-updates-using-the-cli| replace:: :ref:`Install Reboot-Required Software Updates Using the CLI ` .. |the-firmware-update-orchestration-process| replace:: :ref:`The Firmware Update Orchestration Process ` .. |software-updates-and-upgrades-software-updates| replace:: :ref:`Software Updates ` .. |manual-kubernetes-multi-version-upgrade-in-aio-sx-13e05ba19840| replace:: :ref:`Manual Kubernetes Multi-Version Upgrade in AIO-SX ` diff --git a/doc/source/dist_cloud/kubernetes/update-orchestration-of-central-clouds-regionone-and-subclouds-using-the-cli.rst b/doc/source/dist_cloud/kubernetes/update-orchestration-of-central-clouds-regionone-and-subclouds-using-the-cli.rst index d10d960b0..574ed1b1d 100644 --- a/doc/source/dist_cloud/kubernetes/update-orchestration-of-central-clouds-regionone-and-subclouds-using-the-cli.rst +++ b/doc/source/dist_cloud/kubernetes/update-orchestration-of-central-clouds-regionone-and-subclouds-using-the-cli.rst @@ -329,8 +329,9 @@ individual subclouds. **alarm restriction type** relaxed or strict — determines whether the orchestration is aborted for - alarms that are not management-affecting. For more information, refer - to |updates-doc|: :ref:`configuring-update-orchestration`. + alarms that are not management-affecting. + + .. For more information, refer to |updates-doc|: :ref:`configuring-update-orchestration`. **default instance action** .. note:: diff --git a/doc/source/dist_cloud/kubernetes/upgrading-the-systemcontroller-using-the-cli.rst b/doc/source/dist_cloud/kubernetes/upgrading-the-systemcontroller-using-the-cli.rst index 16e7e6b17..bb25aa434 100644 --- a/doc/source/dist_cloud/kubernetes/upgrading-the-systemcontroller-using-the-cli.rst +++ b/doc/source/dist_cloud/kubernetes/upgrading-the-systemcontroller-using-the-cli.rst @@ -77,8 +77,6 @@ Follow the steps below to manually upgrade the system controller: :start-after: wrsbegin :end-before: wrsend - For more information, see |updates-doc|: :ref:`Managing Software Updates `. - #. Confirm that the system is healthy. Check the current system health status, resolve any alarms and other issues diff --git a/doc/source/dist_cloud/kubernetes/uploading-and-applying-updates-to-systemcontroller-using-horizon.rst b/doc/source/dist_cloud/kubernetes/uploading-and-applying-updates-to-systemcontroller-using-horizon.rst index 33013cb5e..fb4272b7d 100644 --- a/doc/source/dist_cloud/kubernetes/uploading-and-applying-updates-to-systemcontroller-using-horizon.rst +++ b/doc/source/dist_cloud/kubernetes/uploading-and-applying-updates-to-systemcontroller-using-horizon.rst @@ -50,8 +50,9 @@ Update the RegionOne To fully deploy the Central Cloud's RegionOne through Horizon: -#. Upload and apply updates to SystemController region, for more details see - :ref:`configuring-update-orchestration`. +#. Upload and apply updates to SystemController region. + + .. For more details see :ref:`configuring-update-orchestration`. #. Update the RegionOne region: @@ -69,11 +70,10 @@ To fully deploy the Central Cloud's RegionOne through Horizon: #. Click **Apply Strategy** to apply the update strategy. -To update the RegionOne using the CLI see :ref:`update-orchestration-cli`. +.. To update the RegionOne using the CLI see :ref:`update-orchestration-cli`. -.. note:: - This procedure closely resembles what is described in + .. This procedure closely resembles what is described in :ref:`configuring-update-orchestration`. The key difference lies in the necessity to preselect RegionOne. @@ -81,4 +81,4 @@ To update the RegionOne using the CLI see :ref:`update-orchestration-cli`. To update the software on the System Controller and subclouds, you must use the |prod-dc| Update Orchestration. For more information, see -:ref:`update-orchestration-of-central-clouds-regionone-and-subclouds`. \ No newline at end of file +:ref:`update-orchestration-of-central-clouds-regionone-and-subclouds`. diff --git a/doc/source/updates/kubernetes/aborting-simplex-system-upgrades.rst b/doc/source/updates/kubernetes/aborting-simplex-system-upgrades.rst deleted file mode 100644 index 72ea8040a..000000000 --- a/doc/source/updates/kubernetes/aborting-simplex-system-upgrades.rst +++ /dev/null @@ -1,110 +0,0 @@ - -.. syj1592947192958 -.. _aborting-simplex-system-upgrades: - -============================= -Abort Simplex System Upgrades -============================= - -You can abort a Simplex System upgrade before or after upgrading controller-0. -The upgrade abort procedure can only be applied before the -:command:`upgrade-complete` command is issued. Once this command is issued the -upgrade can not be aborted. If you must return to the previous release, then -restore the system using the backup data taken prior to the upgrade. - -Before starting, verify the upgrade data under ``/opt/platform-backup``. This -data must be present to perform the abort process. - -.. _aborting-simplex-system-upgrades-section-N10025-N1001B-N10001: - -.. contents:: |minitoc| - :local: - :depth: 1 - ------------------------------ -Before upgrading controller-0 ------------------------------ - -.. _aborting-simplex-system-upgrades-ol-nlw-zbp-xdb: - -#. Abort the upgrade with the upgrade-abort command. - - .. code-block:: none - - ~(keystone_admin)$ system upgrade-abort - - The upgrade state is set to ``aborting``. Once this is executed, it cannot - be cancelled; the upgrade must be completely aborted. - -#. Complete the upgrade. - - .. code-block:: none - - ~(keystone_admin)$ system upgrade-complete - - At this time any upgrade data generated as part of the upgrade-start - command will be deleted. This includes the upgrade data in - ``/opt/platform-backup``. - -.. _aborting-simplex-system-upgrades-section-N10063-N1001B-N10001: - ----------------------------- -After upgrading controller-0 ----------------------------- - -After controller-0 has been upgraded, it is possible to roll back the software -upgrade. This involves performing a system restore with the previous release. - -.. _aborting-simplex-system-upgrades-ol-jmw-kcp-xdb: - -#. Install the previous release of |prod-long| Simplex software via network or - USB manually. - -#. Verify and configure IP connectivity. External connectivity is required to - run the Ansible restore playbook. The |prod-long| boot image will |DHCP| out - all interfaces so the server may have obtained an IP address and have - external IP connectivity if a |DHCP| server is present in your environment. - Verify this using the :command:`ip addr` command. Otherwise, manually - configure an IP address and default IP route. - - .. note:: - - If there are patches on the |AIO-SX| system prior to the upgrade, - Ansible restore playbook will install the patches and cause a reboot - of the system. The reboot will lead to the loss of IP connectivity - configuration. Verify and re-configure IP connectivity and - re-run the Ansible restore playbook to complete the platform restore. - -#. Restore the system data. The restore is preserved in ``/opt/platform-backup``. - - The system will be restored to the state when the :command:`upgrade-start` - command was issued. Follow the process in :ref:`Run Restore Playbook Locally - on the Controller `. - - Specify the upgrade data filename as `backup_filename` and the - `initial_backup_dir` as ``/opt/platform-backup``. - - The user images will also need to be restored as described in the - Postrequisites section. - -#. Unlock controller-0 - - .. code-block:: none - - ~(keystone_admin)$ system host-unlock controller-0 - - -#. Abort the upgrade with the :command:`upgrade-abort` command. - - .. code-block:: none - - ~(keystone_admin)$ system upgrade-abort - - The upgrade state is set to ``aborting``. Once this is executed, it cannot - be cancelled; the upgrade must be completely aborted. - -#. Complete the upgrade. - - .. code-block:: none - - ~(keystone_admin)$ system upgrade-complete diff --git a/doc/source/updates/kubernetes/configure-firmware-update-orchestration.rst b/doc/source/updates/kubernetes/configure-firmware-update-orchestration.rst index 803a4e4fe..b98a20ecd 100644 --- a/doc/source/updates/kubernetes/configure-firmware-update-orchestration.rst +++ b/doc/source/updates/kubernetes/configure-firmware-update-orchestration.rst @@ -39,8 +39,9 @@ ignored even when the default strict restrictions are selected: - Hosts that need to be updated must be in the ``unlocked-enabled`` state. -- The firmware update image must be in the ``applied`` state. For more - information, see :ref:`Managing Software Updates `. +- The firmware update image must be in the ``applied`` state. + + .. For more information, see :ref:`Managing Software Updates `. .. rubric:: |proc| diff --git a/doc/source/updates/kubernetes/configuring-update-orchestration.rst b/doc/source/updates/kubernetes/configuring-update-orchestration.rst deleted file mode 100644 index 7661f1ca0..000000000 --- a/doc/source/updates/kubernetes/configuring-update-orchestration.rst +++ /dev/null @@ -1,194 +0,0 @@ - -.. gep1552920534437 -.. _configuring-update-orchestration: - -============================== -Configure Update Orchestration -============================== - -You can configure update orchestration using the Horizon Web interface. - -.. rubric:: |context| - -The update orchestration interface is found in Horizon on the Patch -Orchestration tab, available from **Admin** \> **Platform** \> **Software -Management** in the left-hand pane. - -.. note:: - Management-affecting alarms cannot be ignored at the indicated severity - level or higher by using relaxed alarm rules during an orchestrated update - operation. For a list of management-affecting alarms, see |fault-doc|: - :ref:`Alarm Messages <100-series-alarm-messages-starlingx>`. To display - management-affecting active alarms, use the following command: - - .. code-block:: none - - ~(keystone_admin)]$ fm alarm-list --mgmt_affecting - - During an orchestrated update operation, the following alarms are ignored - even when strict restrictions are selected: - - - 200.001, Maintenance host lock alarm - - - 900.001, Patch in progress - - - 900.005, Upgrade in progress - - - 900.101, Software patch auto apply in progress - -.. _configuring-update-orchestration-ul-qhy-q1p-v1b: - -.. rubric:: |prereq| - -You cannot successfully create an update (patch) strategy if any hosts show -**Patch Current** = **Pending**, indicating that the update status of these -hosts has not yet been updated. The creation attempt fails, and you must try -again. You can use :command:`sw-patch query-hosts` to review the current update -status before creating an update strategy. - -.. rubric:: |proc| - -#. Upload and apply your updates as described in :ref:`Manage Software Updates - ` (do not lock any hosts or use - :command:`host-install` to install the updates on any hosts). - -#. Select **Platform** \> **Software Management**, then select the **Patch - Orchestration** tab. - -#. Click the **Create Strategy** button. - - The Create Strategy dialog appears. - - .. image:: figures/zcj1567178380908.png - :height: 365px - :width: 475px - -#. Create an update strategy by specifying settings for the parameters in the - Create Strategy dialog box. - - **Description** - Provides information about current alarms, including whether an alarm - is Management Affecting. - - **Controller Apply Type** - - Serial (default): controllers will be updated one at a time - (standby controller first) - - - Ignore: controllers will not be updated - - **Storage Apply Type** - - Serial (default): storage hosts will be updated one at a time - - - Parallel: storage hosts will be updated in parallel, ensuring that - only one storage node in each replication group is updated at a - time. - - - Ignore: storage hosts will not be updated - - **Worker Apply Type** - - Serial (default): worker hosts will be updated one at a time - - - Parallel: worker hosts will be updated in parallel - - - At most, **Parallel** will be updated at the same time. - - - For a reboot parallel update only, worker hosts with no pods - are updated before worker hosts with pods. - - - Parallel: specify the maximum worker hosts to update in parallel - (minimum: 2, maximum: 100) - - - Ignore: Worker hosts will not be updated - - **Default Instance Action** - This parameter only applies for systems with the |prefix|-openstack - application. - - - Stop-Start (default): hosted applications VMs will be stopped - before a host is updated (applies to reboot updates only) - - - Migrate: hosted application VMs will be migrated off a host before - it is updated (applies to reboot updates only). - - **Alarm Restrictions** - This option lets you specify how update orchestration behaves when - alarms are present. - - You can use the CLI command :command:`fm alarm-list --mgmt_affecting` - to view the alarms that are management affecting. - - **Strict** - The default strict option will result in update orchestration - failing if there are any alarms present in the system (except for a - small list of alarms). - - **Relaxed** - This option allows orchestration to proceed if alarms are present, - as long as none of these alarms are management affecting. - -#. Click **Create Strategy** to save the update orchestration strategy. - - .. note:: - The update orchestration process ensures that no hosts are reported as - **Patch Status** = **Pending**. If any hosts have this status, the - creation attempt fails with an error message. Wait a few minutes and - try again. You can also use :command:`sw-patch query-hosts` to review - the current update status. - - Examine the update strategy. Pay careful attention to: - - - - The sets of hosts that will be updated together in each stage. - - - The sets of hosted application pods that will be impacted in each stage. - - - The update strategy has one or more stages, with each stage consisting of - one or more hosts to be updated at the same time. Each stage is split into - steps (for example, :command:`query-alarms`, :command:`lock-hosts`, - :command:`sw-patch-hosts`). Note the following about stages: - - .. note:: - - - Controller hosts are updated first, followed by storage hosts and - then worker hosts. - - - Worker hosts with no hosted application pods are updated before - worker hosts with hosted application pods. - - - The final step in each stage is ``system-stabilize``, which waits - for a period of time (up to several minutes) and ensures that the - system is free of alarms. This ensures that the update orchestrator - does not continue to update more hosts if the update application has - caused an issue resulting in an alarm. - - -#. Click the **Apply Strategy** button to apply the update strategy. You can - optionally apply a single stage at a time by clicking the **Apply Stage** - button. - - When applying a single stage, you can only apply the next stage; you cannot - skip stages. - -#. To abort the update, click the **Abort Strategy** button. - - - While an update-strategy is being applied, it can be aborted. This - results in: - - - The current step being allowed to complete. - - - If necessary an abort phase will be created and applied, which will - attempt to unlock any hosts that were locked. - - .. note:: - If an update strategy is aborted after hosts were locked, but before - they were updated, the hosts will not be unlocked, as this would result - in the updates being installed. You must either install the updates on - the hosts or remove the updates before unlocking the hosts. - -#. Delete the update strategy. - - After an update strategy has been applied (or aborted) it must be deleted - before another update strategy can be created. If an update strategy - application fails, you must address the issue that caused the failure, then - delete and re-create the strategy before attempting to apply it again. diff --git a/doc/source/updates/kubernetes/identifying-the-software-version-and-update-level-using-horizon.rst b/doc/source/updates/kubernetes/identifying-the-software-version-and-update-level-using-horizon.rst deleted file mode 100644 index 4e6ac4e87..000000000 --- a/doc/source/updates/kubernetes/identifying-the-software-version-and-update-level-using-horizon.rst +++ /dev/null @@ -1,35 +0,0 @@ - -.. kiv1552920729184 -.. _identifying-the-software-version-and-update-level-using-horizon: - -============================================================ -Identify the Software Version and Update Level Using Horizon -============================================================ - -You can view the current software version and update level from the Horizon Web -interface. The system type is also shown. - -.. rubric:: |proc| - -#. In the |prod| Horizon, open the **System Configuration** page. - - The **System Configuration** page is available from **Admin** \> - **Platform** \> **System Configuration** in the left-hand pane. - -#. Select the **Systems** tab to view the software version. - - The software version is shown in the **Version** field. - - The type of system selected at installation (Standard or All-in-one) is - shown in the **System Type** field. The mode (**simplex**, **duplex**, or - **standard**) is shown in the **System Mode** field. - -#. In the |prod| Horizon interface, open the **Software Management** page. - - The **Software Management** page is available from **Admin** \> **Platform** - \> **Software Management** in the left-hand pane. - -#. Select the **Patches** tab to view update information. - - The **Patches** tab shows the Patch ID, a Summary description, the Status - of the Patch, and an Actions button to use to select an appropriate action. diff --git a/doc/source/updates/kubernetes/identifying-the-software-version-and-update-level-using-the-cli.rst b/doc/source/updates/kubernetes/identifying-the-software-version-and-update-level-using-the-cli.rst deleted file mode 100644 index 0dc6f24c1..000000000 --- a/doc/source/updates/kubernetes/identifying-the-software-version-and-update-level-using-the-cli.rst +++ /dev/null @@ -1,58 +0,0 @@ - -.. lob1552920716157 -.. _identifying-the-software-version-and-update-level-using-the-cli: - -============================================================ -Identify the Software Version and Update Level Using the CLI -============================================================ - -You can view the current software version and update level from the CLI. The -system type is also shown. - -.. rubric:: |context| - -For more about working with software updates, see :ref:`Manage Software Updates -` - -.. rubric:: |proc| - -.. _identifying-the-software-version-and-update-level-using-the-cli-steps-smg-b4r-hkb: - -- To find the software version from the CLI, use the :command:`system show` - command. - - .. code-block:: none - - ~(keystone_admin)]$ system show - +----------------------+----------------------------------------------------+ - | Property | Value | - +----------------------+----------------------------------------------------+ - | contact | None | - | created_at | 2020-02-27T15:29:26.140606+00:00 | - | description | yow-cgcs-ironpass-1_4 | - | https_enabled | False | - | location | None | - | name | yow-cgcs-ironpass-1-4 | - | region_name | RegionOne | - | sdn_enabled | False | - | security_feature | spectre_meltdown_v1 | - | service_project_name | services | - | software_version | nn.nn | - | system_mode | duplex | - | system_type | Standard | - | timezone | UTC | - | updated_at | 2020-02-28T16:19:56.987581+00:00 | - | uuid | 90212c98-7e27-4a14-8981-b8f5b777b26b | - | vswitch_type | none | - +----------------------+----------------------------------------------------+ - - .. note:: - The **system_mode** field is shown only for a |prod| Simplex or Duplex - system. - -- To list applied software updates from the CLI, use the :command:`sw-patch - query` command. - - .. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch query diff --git a/doc/source/updates/kubernetes/in-service-versus-reboot-required-software-updates.rst b/doc/source/updates/kubernetes/in-service-versus-reboot-required-software-updates.rst deleted file mode 100644 index 6f3e50c52..000000000 --- a/doc/source/updates/kubernetes/in-service-versus-reboot-required-software-updates.rst +++ /dev/null @@ -1,23 +0,0 @@ - -.. gwe1552920505159 -.. _in-service-versus-reboot-required-software-updates: - -================================================== -In-Service Versus Reboot-Required Software Updates -================================================== - -In-Service (Reboot-not-Required) and a Reboot-Required software updates are -available depending on the nature of the update to be performed. - -In-Service software updates provide a mechanism to issue updates that do not -require a reboot, allowing the update to be installed on in-service nodes and -restarting affected processes as needed. - -Depending on the area of software being updated and the type of software change, -installation of the update may or may not require the |prod| hosts to be -rebooted. For example, a software update to the kernel would require the host to -be rebooted in order to apply the update. Software updates are classified as -reboot-required or reboot-not-required (also referred to as -in-service) type updates to indicate this. For reboot-required updates, the -hosted application pods are automatically relocated to an alternate host as part -of the update procedure, prior to applying the update and rebooting the host. diff --git a/doc/source/updates/kubernetes/index-updates-kub-03d4d10fa0be.rst b/doc/source/updates/kubernetes/index-updates-kub-03d4d10fa0be.rst index 76d1f0f74..162295725 100644 --- a/doc/source/updates/kubernetes/index-updates-kub-03d4d10fa0be.rst +++ b/doc/source/updates/kubernetes/index-updates-kub-03d4d10fa0be.rst @@ -19,42 +19,61 @@ Introduction software-updates-and-upgrades-software-updates software-upgrades ------------------------ -Manual software updates ------------------------ +------------------------ +Host software deployment +------------------------ .. toctree:: :maxdepth: 1 - managing-software-updates - in-service-versus-reboot-required-software-updates - identifying-the-software-version-and-update-level-using-horizon - identifying-the-software-version-and-update-level-using-the-cli - populating-the-storage-area - update-status-and-lifecycle - installing-software-updates-before-initial-commissioning - installing-reboot-required-software-updates-using-horizon - installing-reboot-required-software-updates-using-the-cli - installing-in-service-software-update-using-horizon - installing-in-service-software-updates-using-the-cli - removing-reboot-required-software-updates - software-update-space-reclamation - reclaiming-disk-space + manual-host-software-deployment-ee17ec6f71a4 + manual-rollback-host-software-deployment-9295ce1e6e29 + manual-removal-host-software-deployment-24f47e80e518 + orchestrated-deployment-host-software-deployment-d234754c7d20 + orchestrated-rollback-host-software-deployment-c6b12f13a8a1 + orchestrated-removal-host-software-deployment-3f542895daf8 ----------------------------- -Orchestrated Software Update ----------------------------- +-------------------------- +Kubernetes Version Upgrade +-------------------------- + +****** +Manual +****** .. toctree:: :maxdepth: 1 - update-orchestration-overview - configuring-update-orchestration - update-orchestration-cli + manual-kubernetes-components-upgrade + manual-kubernetes-multi-version-upgrade-in-aio-sx-13e05ba19840 ----------------------------------------- +************ +Orchestrated +************ + +.. toctree:: + :maxdepth: 1 + + about-kubernetes-orchestrated-upgrades + the-kubernetes-update-orchestration-process + configuring-kubernetes-update-orchestration + configuring-kubernetes-multi-version-upgrade-orchestration-aio-b0b59a346466 + handling-kubernetes-update-orchestration-failures + +-------- +Appendix +-------- + +.. toctree:: + :maxdepth: 1 + +********** +Deprecated +********** + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ N3000 FPGA Firmware Update Orchestration ----------------------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. toctree:: :maxdepth: 1 @@ -66,78 +85,6 @@ N3000 FPGA Firmware Update Orchestration firmware-update-orchestration-using-the-cli handle-firmware-update-orchestration-failures ---------------------------------- -Manual Kubernetes Version Upgrade ---------------------------------- - -.. toctree:: - :maxdepth: 1 - - manual-kubernetes-components-upgrade - manual-kubernetes-multi-version-upgrade-in-aio-sx-13e05ba19840 - ----------------------------------------------- -Kubernetes Version Upgrade Cloud Orchestration ----------------------------------------------- - -.. toctree:: - :maxdepth: 1 - - about-kubernetes-orchestrated-upgrades - the-kubernetes-update-orchestration-process - configuring-kubernetes-update-orchestration - configuring-kubernetes-multi-version-upgrade-orchestration-aio-b0b59a346466 - handling-kubernetes-update-orchestration-failures - ----------------------------------- -Manual Platform components upgrade ----------------------------------- - -.. toctree:: - :maxdepth: 1 - - manual-upgrade-overview - -****************** -All-in-one Simplex -****************** - -.. toctree:: - :maxdepth: 1 - - upgrading-all-in-one-simplex - aborting-simplex-system-upgrades - -****************** -All-in-one Duplex -****************** - -.. toctree:: - :maxdepth: 1 - - upgrading-all-in-one-duplex-or-standard - overview-of-upgrade-abort-procedure - -****************** -Roll back upgrades -****************** - -.. toctree:: - :maxdepth: 1 - - rolling-back-a-software-upgrade-before-the-second-controller-upgrade - rolling-back-a-software-upgrade-after-the-second-controller-upgrade - ---------------------------------------- -Orchestrated Platform component upgrade ---------------------------------------- - -.. toctree:: - :maxdepth: 1 - - orchestration-upgrade-overview - performing-an-orchestrated-upgrade - performing-an-orchestrated-upgrade-using-the-cli ------------------------------ Appendix - Kubernetes Platform diff --git a/doc/source/updates/kubernetes/installing-in-service-software-update-using-horizon.rst b/doc/source/updates/kubernetes/installing-in-service-software-update-using-horizon.rst deleted file mode 100644 index 4e1aa8931..000000000 --- a/doc/source/updates/kubernetes/installing-in-service-software-update-using-horizon.rst +++ /dev/null @@ -1,81 +0,0 @@ - -.. jfc1552920636790 -.. _installing-in-service-software-update-using-horizon: - -================================================ -Install In-Service Software Update Using Horizon -================================================ - -The procedure for applying an in-service update is similar to that of a -reboot-required update, except that the host does not need to be locked and -unlocked as part of applying the update. - -.. rubric:: |proc| - -.. _installing-in-service-software-update-using-horizon-steps-x1b-qnv-vw: - -#. Log in to the Horizon Web interface as the **admin** user. - -#. In |prod| Horizon, open the Software Management page. - - The **Software Management** page is available from **Admin** \> **Platform** - \> **Software Management** in the left-hand pane. - -#. Select the **Patches** tab to see the current update status. - - The **Patches** tab shows the current status of all updates uploaded to the - system. If there are no updates, an empty **Patch Table** is displayed. - -#. Upload the update (patch) file to the update storage area. - - Click the **Upload Patch** button to display an upload window from which - you can browse your workstation's file system to select the update file. - Click the **Upload Patch** button once the selection is done. - - The update file is transferred to the Active Controller and is copied to - the update storage area, but it has yet to be applied to the cluster. This - is reflected in the **Patches** tab. - -#. Apply the update. - - Click the **Apply Patch** button associated with the update. Alternatively, - select the update first using the selection boxes on the left, and then - click the **Apply Patches** button at the top. You can use this selection - process to apply all updates, or a selected subset, in a single operation. - - The **Patches** tab is updated to report the update to be in the - *Partial-Apply* state. - -#. Install the update on controller-0. - - #. Select the **Hosts** tab. - - The **Hosts** tab on the **Host Inventory** page reflects the new status - of the hosts with respect to the new update state. In this example, the - update only applies to controller software, as can be seen by the - worker host's status field being empty, indicating that it is 'patch - current'. - - .. image:: figures/ekn1453233538504.png - - #. Select the Install Patches option from the **Edit Host** button - associated with controller-0 to install the update. - - A confirmation window is presented giving you a last opportunity to - cancel the operation before proceeding. - -#. Repeat the steps 6 a,b, above with controller-1 to install the update - on controller-1. - -#. Repeat the steps 6 a,b above for the worker and/or storage hosts (if - present). - - This step does not apply for |prod| Simplex or Duplex systems. - -#. Verify the state of the update. - - Visit the **Patches** tab again. The update is now in the *Applied* state. - -.. rubric:: |result| - -The update is now applied, and all affected hosts have been updated. diff --git a/doc/source/updates/kubernetes/installing-in-service-software-updates-using-the-cli.rst b/doc/source/updates/kubernetes/installing-in-service-software-updates-using-the-cli.rst deleted file mode 100644 index 5ce27bf43..000000000 --- a/doc/source/updates/kubernetes/installing-in-service-software-updates-using-the-cli.rst +++ /dev/null @@ -1,131 +0,0 @@ - -.. hfj1552920618138 -.. _installing-in-service-software-updates-using-the-cli: - -================================================= -Install In-Service Software Updates Using the CLI -================================================= - -The procedure for applying an in-service update is similar to that of a -reboot-required update, except that the host does not need to be locked and -unlocked as part of applying the update. - -.. rubric:: |proc| - -#. Upload the update (patch). - - .. code-block:: none - - $ sudo sw-patch upload INSVC_HORIZON_SYSINV.patch - INSVC_HORIZON_SYSINV is now available - -#. Confirm that the update is available. - - .. code-block:: none - - $ sudo sw-patch query - Patch ID RR Release Patch State - ==================== == ======= =========== - INSVC_HORIZON_SYSINV N nn.nn Available - -#. Check the status of the hosts. - - .. code-block:: none - - $ sudo sw-patch query-hosts - Hostname IP Address Patch Current Reboot Required Release State - ============ ============== ============= =============== ======= ===== - worker-0 192.168.204.24 Yes No nn.nn idle - controller-0 192.168.204.3 Yes No nn.nn idle - controller-1 192.168.204.4 Yes No nn.nn idle - -#. Ensure that the original update files have been deleted from the root drive. - - After they are uploaded to the storage area, the original files are no - longer required. You must use the command-line interface to delete them, in - order to ensure enough disk space to complete the installation. - - .. code-block:: none - - $ rm - - .. caution:: - If the original files are not deleted before the updates are applied, - the installation may fail due to a full disk. - -#. Apply the update (patch). - - .. code-block:: none - - $ sudo sw-patch apply INSVC_HORIZON_SYSINV - INSVC_HORIZON_SYSINV is now in the repo - - The update state transitions to Partial-Apply: - - .. code-block:: none - - $ sudo sw-patch query - Patch ID RR Release Patch State - ==================== == ======= ============= - INSVC_HORIZON_SYSINV N nn.nn Partial-Apply - - As it is an in-service update, the hosts report that they are not 'patch - current', but they do not require a reboot. - - .. code-block:: none - - $ sudo sw-patch query-hosts - Hostname IP Address Patch Current Reboot Required Release State - ============ ============== ============= =============== ======= ===== - worker-0 192.168.204.24 No No nn.nn idle - controller-0 192.168.204.3 No No nn.nn idle - controller-1 192.168.204.4 No No nn.nn idle - - -#. Install the update on controller-0. - - .. code-block:: none - - $ sudo sw-patch host-install controller-0 - ............. - Installation was successful. - -#. Query the hosts to check status. - - .. code-block:: none - - $ sudo sw-patch query-hosts - Hostname IP Address Patch Current Reboot Required Release State - ============ ============== ============= =============== ======= ===== - worker-0 192.168.204.24 No No nn.nn idle - controller-0 192.168.204.3 Yes No nn.nn idle - controller-1 192.168.204.4 No No nn.nn idle - - The controller-1 host reports it is now 'patch current' and does not - require a reboot, without having been locked or rebooted - -#. Install the update on worker-0 (and other worker nodes and storage nodes, - if present) - - .. code-block:: none - - $ sudo sw-patch host-install worker-0 - .... - Installation was successful. - - You can query the hosts to confirm that all nodes are now 'patch current', - and that the update has transitioned to the Applied state. - - .. code-block:: none - - $ sudo sw-patch query-hosts - Hostname IP Address Patch Current Reboot Required Release State - ============ ============== ============= =============== ======= ===== - worker-0 192.168.204.24 Yes No nn.nn idle - controller-0 192.168.204.3 Yes No nn.nn idle - controller-1 192.168.204.4 Yes No nn.nn idle - - $ sudo sw-patch query - Patch ID RR Release Patch State - ==================== == ======= =========== - INSVC_HORIZON_SYSINV N nn.nn Applied diff --git a/doc/source/updates/kubernetes/installing-reboot-required-software-updates-using-horizon.rst b/doc/source/updates/kubernetes/installing-reboot-required-software-updates-using-horizon.rst deleted file mode 100644 index 9596a92b0..000000000 --- a/doc/source/updates/kubernetes/installing-reboot-required-software-updates-using-horizon.rst +++ /dev/null @@ -1,126 +0,0 @@ - -.. phg1552920664442 -.. _installing-reboot-required-software-updates-using-horizon: - -====================================================== -Install Reboot-Required Software Updates Using Horizon -====================================================== - -You can use the Horizon Web interface to upload, delete, apply, and remove -software updates. - -.. rubric:: |context| - -This section presents an example of a software update workflow using a single -update. The main steps of the procedure are: - - -.. _installing-reboot-required-software-updates-using-horizon-ul-mbr-wsr-s5: - -- Upload the updates. - -- Lock the host\(s). - -- Install updates; any unlocked nodes will reject the request. - -- Unlock the host\(s). Unlocking the host\(s) automatically triggers a - reboot. - -.. rubric:: |proc| - -.. _installing-reboot-required-software-updates-using-horizon-steps-lnt-14y-hjb: - -#. Log in to the Horizon Web interface as the **admin** user. - -#. In Horizon, open the **Software Management** page. - - The **Software Management** page is available from **Admin** \> **Platform** - \> **Software Management** in the left-hand pane. - -#. Select the **Patches** tab to see the current status. - - The **Patches** tab shows the current status of all updates uploaded to the - system. If there are no updates, an empty **Patch Table** is displayed. - -#. Upload the update (patch) file to the update storage area. - - Click the **Upload Patches** button to display an upload window from which - you can browse your workstation's file system to select the update file. - Click the **Upload Patches** button once the selection is done. - - The update file is transferred to the Active Controller and is copied to - the storage area, but it has yet to be applied to the cluster. This is - reflected on the **Patches** tab. - -#. Apply the update. - - Click the **Apply Patch** button associated with the update. Alternatively, - select the update first using the selection boxes on the left, and then - click the **Apply Patches** button at the top. You can use this selection - process to apply all updates, or a selected subset, in a single operation. - - The Patches page is updated to report the update to be in the - *Partial-Apply* state. - -#. Install the update on controller-0. - - .. _installing-reboot-required-software-updates-using-horizon-step-N10107-N10028-N1001C-N10001: - - #. Select the **Hosts** tab. - - The **Hosts** tab on the **Host Inventory** page reflects the new status - of the hosts with respect to the new update state. As shown below, both - controllers are now reported as not 'patch current' and requiring - reboot. - - .. image:: figures/ekn1453233538504.png - - #. Transfer active services to the standby controller by selecting the - **Swact Host** option from the **Edit Host** button associated with the - active controller host. - - .. note:: - Access to Horizon may be lost briefly during the active controller - transition. You may have to log in again. - - #. Select the **Lock Host** option from the **Edit Host** button associated - with **controller-0**. - - #. Select the **Install Patches** option from the **Edit Host** button - associated with **controller-0** to install the update. - - A confirmation window is presented giving you a last opportunity to - cancel the operation before proceeding. - - Wait for the update install to complete. - - #. Select the **Unlock Host** option from the **Edit Host** button - associated with controller-0. - -#. Repeat steps :ref:`6 - ` - a to e, with **controller-1** to install the update on controller-1. - - .. note:: - For |prod| Simplex systems, this step does not apply. - -#. Repeat steps :ref:`6 - ` - a to e, for the worker and/or storage hosts. - - .. note:: - For |prod| Simplex or Duplex systems, this step does not apply. - -#. Verify the state of the update. - - Visit the **Patches** page. The update is now in the *Applied* state. - - -.. rubric:: |result| - -The update is now applied, and all affected hosts have been updated. - -Updates can be removed using the **Remove Patches** button from the **Patches** -tab. The workflow is similar to the one presented in this section, with the -exception that updates are being removed from each host instead of being -applied. diff --git a/doc/source/updates/kubernetes/installing-reboot-required-software-updates-using-the-cli.rst b/doc/source/updates/kubernetes/installing-reboot-required-software-updates-using-the-cli.rst deleted file mode 100644 index e772869e0..000000000 --- a/doc/source/updates/kubernetes/installing-reboot-required-software-updates-using-the-cli.rst +++ /dev/null @@ -1,300 +0,0 @@ - -.. ffh1552920650754 -.. _installing-reboot-required-software-updates-using-the-cli: - -====================================================== -Install Reboot-Required Software Updates Using the CLI -====================================================== - -You can install reboot-required software updates using the CLI. - -.. rubric:: |proc| - - -.. _installing-reboot-required-software-updates-using-the-cli-steps-v1q-vlv-vw: - -#. Log in as user **sysadmin** to the active controller and source the script - ``/etc/platform/openrc`` to obtain administrative privileges. - -#. Verify that the updates are available using the :command:`sw-patch query` - command. - - .. parsed-literal:: - - ~(keystone_admin)]$ sudo sw-patch query - - Patch ID Patch State - ===================== =========== - |pn|-nn.nn_PATCH_0001 Available - |pn|-nn.nn_PATCH_0002 Available - |pn|-nn.nn_PATCH_0003 Available - - where *nn.nn* in the update (patch) filename is the |prod| release number. - -#. Ensure the original update files have been deleted from the root drive. - - After the updates are uploaded to the storage area, the original files are - no longer required. You must use the command-line interface to delete them, - in order to ensure enough disk space to complete the installation. - - .. code-block:: none - - $ rm - - .. caution:: - If the original files are not deleted before the updates are applied, - the installation may fail due to a full disk. - -#. Apply the update. - - .. parsed-literal:: - - ~(keystone_admin)]$ sudo sw-patch apply |pn|-._PATCH_0001 - |pn|-._PATCH_0001 is now in the repo - - where . in the update filename is the |prod-long| release number. - - The update is now in the Partial-Apply state, ready for installation from - the software updates repository on the impacted hosts. - -#. Apply all available updates in a single operation, for example: - - .. parsed-literal:: - - ~(keystone_admin)]$ sudo sw-patch apply --all - |pn|-|pvr|-PATCH_0001 is now in the repo - |pn|-|pvr|-PATCH_0002 is now in the repo - |pn|-|pvr|-PATCH_0003 is now in the repo - - In this example, there are three updates ready for installation from the - software updates repository. - -#. Query the updating status of all hosts in the cluster. - - You can query the updating status of all hosts at any time as illustrated - below. - - .. note:: - The reported status is the accumulated result of all applied and - removed updates in the software updates repository, and not just the - status due to a particular update. - - .. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch query-hosts - - Hostname IP Address Patch Current Reboot Required Release State - ============ ============== ============= =============== ======= ===== - worker-0 192.168.204.12 Yes No nn.nn idle - controller-0 192.168.204.3 Yes Yes nn.nn idle - controller-1 192.168.204.4 Yes Yes nn.nn idle - - - For each host in the cluster, the following status fields are displayed: - - **Patch Current** - Indicates whether there are updates pending for installation or removal - on the host or not. If *Yes*, then all relevant updates in the software - updates repository have been installed on, or removed from, the host - already. If *No*, then there is at least one update in either the - Partial-Apply or Partial-Remove state that has not been applied to the - host. - - The **Patch Current** field of the :command:`query-hosts` command will - briefly report *Pending* after you apply or remove an update, until - that host has checked against the repository to see if it is impacted - by the patching operation. - - **Reboot Required** - Indicates whether the host must be rebooted or not as a result of one - or more updates that have been either applied or removed, or because it - is not 'patch current'. - - **Release** - Indicates the running software release version. - - **State** - There are four possible states: - - **idle** - In a wait state. - - **installing** - Installing (or removing) updates. - - **install-failed** - The operation failed, either due to an update error or something - killed the process. Check the ``patching.log`` on the node in - question. - - **install-rejected** - The node is unlocked, therefore the request to install has been - rejected. This state persists until there is another install - request, or the node is reset. - - Once the state has gone back to idle, the install operation is complete - and you can safely unlock the node. - - In this example, **worker-0** is up to date, no updates need to be - installed and no reboot is required. By contrast, the controllers are not - 'patch current', and therefore a reboot is required as part of installing - the update. - -#. Install all pending updates on **controller-0**. - - - #. Switch the active controller services. - - .. code-block:: none - - ~(keystone_admin)]$ system host-swact controller-0 - - Before updating a controller node, you must transfer any active - services running on the host to the other controller. Only then it is - safe to lock the host. - - #. Lock the host. - - You must lock the target host, controller, worker, or storage, before - installing updates. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock controller-0 - - #. Install the update. - - .. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch host-install - - .. note:: - You can use the :command:`sudo sw-patch host-install-async ` - command if you are launching multiple installs in - parallel. - - #. Unlock the host. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock controller-0 - - Unlocking the host forces a reset of the host followed by a reboot. - This ensures that the host is restarted in a known state. - - All updates are now installed on controller-0. Querying the current - update status displays the following information: - - .. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch query-hosts - - Hostname IP Address Patch Current Reboot Required Release State - ============ ============== ============= =============== ======= ===== - worker-0 192.168.204.95 Yes No nn.nn idle - worker-1 192.168.204.63 Yes No nn.nn idle - worker-2 192.168.204.99 Yes No nn.nn idle - worker-3 192.168.204.49 Yes No nn.nn idle - controller-0 192.168.204.3 Yes No nn.nn idle - controller-1 192.168.204.4 Yes No nn.nn idle - storage-0 192.168.204.37 Yes No nn.nn idle - storage-1 192.168.204.90 Yes No nn.nn idle - -#. Install all pending updates on controller-1. - - .. note:: - For |prod| Simplex systems, this step does not apply. - - Repeat the previous step targeting controller-1. - - All updates are now installed on controller-1 as well. Querying the - current updating status displays the following information: - - .. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch query-hosts - - Hostname IP Address Patch Current Reboot Required Release State - ============ ============== ============= =============== ======= ===== - worker-0 192.168.204.95 Yes No nn.nn idle - worker-1 192.168.204.63 Yes No nn.nn idle - worker-2 192.168.204.99 Yes No nn.nn idle - worker-3 192.168.204.49 Yes No nn.nn idle - controller-0 192.168.204.3 Yes No nn.nn idle - controller-1 192.168.204.4 Yes No nn.nn idle - storage-0 192.168.204.37 Yes No nn.nn idle - storage-1 192.168.204.90 Yes No nn.nn idle - -#. Install any pending updates for the worker or storage hosts. - - .. note:: - This step does not apply for |prod| Simplex or Duplex systems. - - All hosted application pods currently running on a worker host are - re-located to another host. - - If the **Patch Current** status for a worker or storage host is *No*, - apply the pending updates using the following commands: - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock - - .. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch host-install-async - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock - - where is the name of the host (for example, ``worker-0``). - - .. note:: - Update installations can be triggered in parallel. - - The :command:`sw-patch host-install-async` command ( corresponding to - **install patches** on the Horizon Web interface) can be run on all - locked nodes, without waiting for one node to complete the install - before triggering the install on the next. If you can lock the nodes at - the same time, without impacting hosted application services, you can - update them at the same time. - - Likewise, you can install an update to the standby controller and a - worker node at the same time. The only restrictions are those of the - lock: - - * You cannot lock both controllers. - - * You cannot lock a worker node if you do not have enough free resources - to relocate the hosted applications from it. - - Also, in a Ceph configuration (with storage nodes), you cannot lock - more than one of controller-0/controller-1/storage-0 at the same time, - as these nodes are running Ceph monitors and you must have at least two - in service at all times. - -#. Confirm that all updates are installed and the |prod| is up-to-date. - - Use the :command:`sw-patch query` command to verify that all updates are - *Applied*. - - .. parsed-literal:: - - ~(keystone_admin)]$ sudo sw-patch query - - Patch ID Patch State - ========================= =========== - |pn|-._PATCH_0001 Applied - - where . in the update filename is the |prod| release number. - - If the **Patch State** for any update is still shown as *Available* or - *Partial-Apply*, use the :command:`sw-patch query-hosts`` command to identify - which hosts are not *Patch Current*, and then apply updates to them as - described in the preceding steps. - - -.. rubric:: |result| - -The |prod| is up to date now. All updates are installed. diff --git a/doc/source/updates/kubernetes/installing-software-updates-before-initial-commissioning.rst b/doc/source/updates/kubernetes/installing-software-updates-before-initial-commissioning.rst deleted file mode 100644 index d7802d576..000000000 --- a/doc/source/updates/kubernetes/installing-software-updates-before-initial-commissioning.rst +++ /dev/null @@ -1,105 +0,0 @@ - -.. tla1552920677022 -.. _installing-software-updates-before-initial-commissioning: - -===================================================== -Install Software Updates Before Initial Commissioning -===================================================== - -This section describes installing software updates before you can commission -|prod-long|. - -.. rubric:: |context| - -This procedure assumes that the software updates to install are available on a -USB flash drive, or from a server reachable by controller-0. - -.. rubric:: |prereq| - -When initially installing the |prod-long| software, it is required that you -install the latest available updates on controller-0 before running Ansible -Bootstrap Playbook, and before installing the software on other hosts. This -ensures that: - -.. _installing-software-updates-before-initial-commissioning-ul-gsq-1ht-vp: - -- The software on controller-0, and all other hosts, is up to date when - the cluster comes alive. - -- You reduce installation time by avoiding updating the system right after an - out-of-date software installation is complete. - -.. rubric:: |proc| - -#. Install software on controller-0. - - Use the |prod-long| bootable ISO image to initialize controller-0. - - This step takes you to the point where you use the console port to log in - to controller-0 as user **sysadmin**. - -#. Populate the storage area. - - Upload the updates from the USB flash drive using the command - :command:`sw-patch upload` or :command:`sw-patch upload-dir` as described - in :ref:`Populating the Storage Area `. - -#. Delete the update files from the root drive. - - After the updates are uploaded to the storage area, the original files are - no longer required. You must delete them to ensure enough disk space to - complete the installation. - - .. caution:: - If the original files are not deleted before the updates are applied, - the installation may fail due to a full disk. - -#. Apply the updates. - - Apply the updates using the command :command:`sw-patch apply --all`. - - The updates are now in the repository, ready to be installed. - -#. Install the updates on the controller. - - .. code-block:: none - - $ sudo sw-patch install-local - Patch installation is complete. - Please reboot before continuing with configuration. - - This command installs all applied updates on controller-0. - -#. Reboot controller-0. - - You must reboot the controller to ensure that it is running with the - software fully updated. - - .. code-block:: none - - $ sudo reboot - -#. Bootstrap system on controller-0. - - #. Configure an IP interface. - - .. note:: - The |prod| software will automatically enable all interfaces and - send out a |DHCP| request, so this may happen automatically if a - |DHCP| Server is present on the network. Otherwise, you must - manually configure an IP interface. - - #. Run the Ansible Bootstrap Playbook. This can be run remotely or locally - on controller-0. - -.. include:: /_includes/installing-software-updates-before-initial-commissioning.rest - -.. rubric:: |result| - -Once all hosts in the cluster are initialized and they are all running fully -updated software. The |prod-long| cluster is up to date. - - -.. xbooklink From step 1 - For details, see :ref:`Install Software on controller-0 - ` for your system. \ No newline at end of file diff --git a/doc/source/updates/kubernetes/managing-software-updates.rst b/doc/source/updates/kubernetes/managing-software-updates.rst deleted file mode 100644 index ea887b4c5..000000000 --- a/doc/source/updates/kubernetes/managing-software-updates.rst +++ /dev/null @@ -1,112 +0,0 @@ - -.. kol1552920779041 -.. _managing-software-updates: - -======================= -Manage Software Updates -======================= - -Updates (also known as patches) to the system software become available as -needed to address issues associated with a current |prod-long| software release. -Software updates must be uploaded to the active controller and applied to all -required hosts in the cluster. - -.. note:: - Updating |prod-dc| is distinct from updating other |prod| configurations. - -.. xbooklink For information on updating |prod-dc|, see |distcloud-doc|: :ref:`Update - Management for Distributed Cloud - `. - -The following elements form part of the software update environment: - -**Reboot-Required Software Updates** - Reboot-required updates are typically major updates that require hosts to be - locked during the update process and rebooted to complete the process. - - .. note:: - When a |prod| host is locked and rebooted for updates, the hosted - application pods are re-located to an alternate host in order to - minimize the impact to the hosted application service. - -**In-Service Software Updates** - In-service (reboot-not-required), software updates are updates that do not - require the locking and rebooting of hosts. The required |prod| software is - updated and any required |prod| processes are re-started. Hosted - applications pods and services are completely unaffected. - -**Software Update Commands** - The :command:`sw-patch` command is available on both active controllers. It - must be run as root using :command:`sudo`. It provides the user interface to - process the updates, including querying the state of an update, listing - affected hosts, and applying, installing, and removing updates. - -**Software Update Storage Area** - A central storage area maintained by the update controller. Software updates - are initially uploaded to the storage area and remains there until they are - deleted. - -**Software Update Repository** - A central repository of software updates associated with any updates applied - to the system. This repository is used by all hosts in the cluster to - identify the software updates and rollbacks required on each host. - -**Software Update Logs** - The following logs are used to record software update activity: - - **patching.log** - This records software update agent activity on each host. - - **patching-api.log** - This records user actions that involve software updates, performed - using either the CLI or the REST API. - - **patching-insvc.log** - This records the execution of patch scripts while in-service patches are - applied. - -The overall flow for installing a software update from the command line -interface on a working |prod| cluster is the following: - -.. _managing-software-updates-ol-vgf-yzz-jp: - -#. Consult the |org| support personnel for details on the availability of new - software updates. - -#. Download the software update from the |org| servers to a workstation that - can reach the active controller through the |OAM| network. - -#. Copy the software update to the active controller using the cluster's |OAM| - floating IP address as the destination point. - - You can use a command such as :command:`scp` to copy the software update. - The software update workflows presented in this document assume that this - step is complete already, that is, they assume that the software update is - already available on the file system of the active controller. - -#. Upload the new software update to the storage area. - - This step makes the new software update available within the system, but - does not install it to the cluster yet. For all purposes, the software - update is dormant. - -#. Apply the software update. - - This step adds the updates to the repository, making it visible to all - hosts in the cluster. - -#. Install the software updates on each of the affected hosts in the cluster. - This can be done manually or by using upgrade orchestration. For more - information, see :ref:`Update Orchestration Overview - `. - -Updating software in the system can be done using the Horizon Web interface or -the command line interface on the active controller. When using Horizon you -upload the software update directly from your workstation using a file browser -window provided by the software update upload facility. - -A special case occurs during the initial provisioning of a cluster when you -want to update controller-0 before the system software is configured. This -can only be done from the command line interface. See :ref:`Install Software -Updates Before Initial Commissioning -` for details. diff --git a/doc/source/updates/kubernetes/manual-host-software-deployment-ee17ec6f71a4.rst b/doc/source/updates/kubernetes/manual-host-software-deployment-ee17ec6f71a4.rst new file mode 100644 index 000000000..640c33fc7 --- /dev/null +++ b/doc/source/updates/kubernetes/manual-host-software-deployment-ee17ec6f71a4.rst @@ -0,0 +1,674 @@ +.. WARNING: Add no lines of text between the label immediately following +.. and the title. + +.. _manual-host-software-deployment-ee17ec6f71a4: + +=============================== +Manual Host Software Deployment +=============================== + +|prod| software management enables you to upversion the |prod| software to a +new patch release or a new major release using a manual procedure of +step-by-step host-by-host commands. + +.. rubric:: |context| + +This procedure describes upversioning to either a new patch release (in-service +or reboot-required) or a new major release. + +.. note:: + + Upversioning can also be performed to a new major release that has been + pre-patched with the current patches for the major release. This is + packaged and delivered as a pre-patched major release ISO. All the comments in + this section that are specific to a major release also apply to a + pre-patched major release. + +This procedure covers all standalone configurations: |AIO-SX|, |AIO-DX| and +standard configuration. + +.. note:: + + For a major release software deployment, the following procedure can be + aborted and rolled back at any time between the :command:`software deploy start` and + :command:`software deploy delete`. For procedure on rolling back, see + Manual Rollback Host Software Deployment. + +.. rubric:: |prereq| + +- A recent full backup is available. It is not explicitly required for software + deployment, however, it is a good practice to have a recent full backup + available prior to performing major changes to the system. + +- Highly recommended to not have any active alarms on the system, otherwise + upgrades will not proceed. + +- If you are using a private container image registry for installs/updates/upgrades, + + - The private registry must be populated with any new container images + required for the new software release. + + - The list of container images required by the new software release can be + found in your software distribution site. + +- For a new major release software deployment only, + + - All hosts are unlocked and enabled/available. + + - The system should be patch current, that is, all the available patch + releases for the current major release should be deployed. + +.. rubric:: |proc| + +#. For a duplex (dual controller) system, switch the activity from + controller-1 such that controller-0 becomes active. + + .. note:: + + This step is not required for an |AIO-SX| system. + + .. code-block:: + + ~(keystone_admin)]$ system host-swact controller-1 + + Wait for the activity to switch to controller-0. This may take up to a + minute depending on the hardware. + + Reconnect to the system. + +#. Transfer the new software release files to the active controller-0. + + - For major release, it includes major release install ISO, software + signature file, and license file. + + - For patch release, it includes the patch release `.patch` archive file. + +#. For major release only, install the license file for the release you are upgrading to. + + .. note:: + + This step is not required for a patch release deployment. + + .. code-block:: + + ~(keystone_admin)]$ system license-install + +#. Upload the new software release into the system. + + #. For major release + + .. code-block:: + + ~(keystone_admin)]$ software upload [ --local ] .iso .sig + is now uploaded + +-------------------------------+-------------------+ + | Uploaded File | Release | + +-------------------------------+-------------------+ + | .iso | | + +-------------------------------+-------------------+ + + where `--local`` can be used when running this command in an |SSH| session on + the active controller to optimize performance. With this option, the + system will read files directly from the local disk rather than + transferring files over REST APIs backing the |CLI|. + + This command may take 5-10 mins depending on the hardware. + + #. For patch release + + .. code-block:: + + ~(keystone_admin)]$ software upload .patch + is now uploaded + +-------------------------------+-------------------+ + | Uploaded File | Release | + +-------------------------------+-------------------+ + | .patch | | + +-------------------------------+-------------------+ + + #. Ensure that the new software release was successfully uploaded. + + .. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-----------+ + | Release | RR | State | + +--------------------------+-------+-----------+ + | starlingx-10.0.0 | True | deployed | + | | True | available | + +--------------------------+-------+-----------+ + +#. Run software deployment prechecks and confirm that the system is healthy. + + .. code-block:: + + ~(keystone_admin)]$ software deploy precheck [ -f ] + System Health: + All hosts are provisioned: [OK] + All hosts are unlocked/enabled: [OK] + All hosts have current configurations: [OK] + All hosts are patch current: [OK] + Ceph Storage Healthy: [OK] + No alarms: [OK] + All kubernetes nodes are ready: [OK] + All kubernetes control plane pods are ready: [OK] + All PodSecurityPolicies are removed: [OK] + All kubernetes applications are in a valid state: [OK] + Installed license is valid: [OK] + Required patches are applied: [OK] + + Resolve any checks that are not ok and re-run the :command:`software deploy precheck` command. + Use the `-f` option to ignore non-management affecting alarms. + + .. note:: + + The failed prechecks must be cleared before software deployment is + allowed to proceed. + +#. Start the software deployment procedure. + + .. note:: + + The :command:`software deploy start` command will automatically run + the prechecks of previous steps if the prechecks have not been run or + have not passed. + + By default, the software deployment procedure cannot be started unless + prechecks pass. + + .. note:: + + The failed prechecks must be cleared before software deployment is + allowed to proceed. + + .. note:: + + Configuration cannot be changed during the software deployment process. + + .. code-block:: + + ~(keystone_admin)]$ software deploy start [ -f ] + Deployment for started + + Then, monitor the progress of :command:`software deploy start` using the + following commands: + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +--------------+------------------+------+---------------+ + | From Release | To Release | RR | State | + +--------------+------------------+------+---------------+ + | 10.0.0 | | True | deploy-start | + +--------------+------------------+------+---------------+ + + ~(keystone_admin)]$ software deploy show + +--------------+------------------+------+-------------------+ + | From Release | To Release | RR | State | + +--------------+------------------+------+-------------------+ + | 10.0.0 | | True | deploy-start-done | + +--------------+------------------+------+-------------------+ + + The :command:`software deploy start` command may take 5-10 mins to reach + the ``deploy-start-done`` state depending on hardware. + + .. note:: + + If :command:`software deploy start` fails, that is, if the state is + `deploy-start-failed`, review ``/var/log/software.log`` on the active + controller for failure details, address the issues, and run the + :command:`software deploy delete` command to delete the deploy and + re-execute the :command:`software deploy start` command. + +#. Deploy the new software release to all hosts. + + - For an |AIO-SX| system + + #. Deploy the new software release to controller-0. + + #. Only if the software deployment is ``RR=True`` (reboot required), + lock controller-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-lock controller-0 + + #. Deploy the new software release to controller-0. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host controller-0 + Host installation request sent to controller-0. + Host installation was successful on controller-0. + + After this command completes: + + - If ``RR=TRUE``, the host is still running the old software + release, however boot parameters have been updated to boot into + the new software release on the next host reboot, which will occur + in the next step which unlocks the host. + + - If ``RR=FALSE``, the host is running the new software release. + + #. Only if the software deployment is ``RR=True``, unlock controller-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock controller-0 + + The host will now reboot into the new software release. Wait for the + host to finish rebooting and become enabled. This may take 3-5 mins + depending on hardware. + + #. Proceed to step :ref:`8 ` (software + deploy activate) of the main procedure. + + - For an |AIO-DX| system or standard system + + #. Deploy the new software release to controller-1 (standby controller). + + #. Only if the software deployment is ``RR=True``, lock controller-1. + + .. code-block:: + + ~(keystone_admin)]$ system host-lock controller-1 + + #. Deploy the new software release to controller-1. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host controller-1 + Host installation request sent to controller-1. + Host installation was successful on controller-1. + + After this command completes: + + - If ``RR=TRUE``, the host is still running the old software + release, however boot parameters have been updated to boot into + the new software release on the next host reboot, which will + occur in the next step which unlocks the host. + + - If ``RR=FALSE``, the host is running the new software release. + + #. Only if the software deployment is ``RR=True``, unlock controller-1. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock controller-1 + + The host will now reboot into the new software release. Wait for the + host to finish rebooting and become enabled. + + This may take 3-5 mins depending on hardware. + + #. Display state of software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +--------------+------------------+------+-------------+ + | From Release | To Release | RR | State | + +--------------+------------------+------+-------------+ + | 10.0.0 | | True | deploy-host | + +--------------+------------------+------+-------------+ + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+--------------+-------------------+-------+----------------------+ + | Host | From Release | To Release | RR | State | + +--------------+--------------+-------------------+-------+----------------------+ + | controller-0 | 10.0.0 | | True | deploy-host-pending | + | controller-1 | 10.0.0 | | True | deploy-host-deployed | + | storage-0 | 10.0.0 | | True | deploy-host-pending | + | storage-1 | 10.0.0 | | True | deploy-host-pending | + | worker-0 | 10.0.0 | | True | deploy-host-pending | + | worker-1 | 10.0.0 | | True | deploy-host-pending | + +--------------+--------------+-------------------+-------+----------------------+ + + #. Switch the activity from controller-0 such that controller-1 becomes active. + + .. code-block:: + + ~(keystone_admin)]$ system host-swact controller-0 + + Wait for the activity to switch to controller-1. + + This may take up to a minute depending on hardware. + + Reconnect to system. + + #. Deploy the new software release to controller-0 (now the standby controller). + + #. Only if the software deployment is ``RR=True``, lock controller-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-lock controller-0 + + #. Deploy the new software release to controller-0. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host controller-0 + Host installation request sent to controller-0. + Host installation was successful on controller-0. + + After this command completes: + + - If ``RR=TRUE``, the host is still running the old software + release, however boot parameters have been updated to boot into + the new software release on the next host reboot, which will occur + in the next step which unlocks the host. + + - If ``RR=FALSE``, the host is running the new software release. + + #. Only if the software deployment is ``RR=True``, unlock controller-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock controller-0 + + The host will now reboot into the new software release. Wait for the + host to finish rebooting and become enabled. + + This may take 3-5 mins depending on hardware. + + #. Display state of software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +--------------+------------------+------+-------------+ + | From Release | To Release | RR | State | + +--------------+------------------+------+-------------+ + | 10.0.0 | | True | deploy-host | + +--------------+------------------+------+-------------+ + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+--------------+-------------------+-------+----------------------+ + | Host | From Release | To Release | RR | State | + +--------------+--------------+-------------------+-------+----------------------+ + | controller-0 | 10.0.0 | | True | deploy-host-deployed | + | controller-1 | 10.0.0 | | True | deploy-host-deployed | + | storage-0 | 10.0.0 | | True | deploy-host-pending | + | storage-1 | 10.0.0 | | True | deploy-host-pending | + | worker-0 | 10.0.0 | | True | deploy-host-pending | + | worker-1 | 10.0.0 | | True | deploy-host-pending | + +--------------+--------------+-------------------+-------+----------------------+ + + #. Check the system health to ensure that there are no unexpected alarms. + + .. code-block:: + + ~(keystone_admin)]$ fm alarm-list + + Clear all the alarms unrelated to the upgrade process. + + #. If storage hosts are present, deploy the new software release to the + storage hosts one at a time. + + #. Deploy the new software release to storage-0. + + #. Only if the software deployment is ``RR=True``, lock storage-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-lock storage-0 + + #. Deploy the new software release to storage-0. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host storage-0 + Host installation request sent to storage-0. + Host installation was successful on storage-0. + + After this command completes: + + - If ``RR=TRUE``, the host is still running the old software + release, however boot parameters have been updated to boot into + the new software release on the next host reboot, which will + occur in the next step which unlocks the host. + + - If ``RR=FALSE``, the host is running the new software release. + + #. Only if the software deployment is ``RR=True``, unlock storage-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock storage-0 + + The host will now reboot into the new software release. Wait for + the host to finish rebooting and become enabled. Wait for all the + alarms to clear after the unlock before proceeding to the next + storage host. + + This may take 3-5 mins depending on hardware. + + #. Display state of software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +--------------+------------------+------+--------------+ + | From Release | To Release | RR | State | + +--------------+------------------+------+--------------+ + | 10.0.0 | | True | deploy-host | + +--------------+------------------+------+--------------+ + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+--------------+-------------------+-------+----------------------+ + | Host | From Release | To Release | RR | State | + +--------------+--------------+-------------------+-------+----------------------+ + | controller-0 | 10.0.0 | | True | deploy-host-deployed | + | controller-1 | 10.0.0 | | True | deploy-host-deployed | + | storage-0 | 10.0.0 | | True | deploy-host-deployed | + | storage-1 | 10.0.0 | | True | deploy-host-pending | + | worker-0 | 10.0.0 | | True | deploy-host-pending | + | worker-1 | 10.0.0 | | True | deploy-host-pending | + +--------------+--------------+-------------------+-------+----------------------+ + + #. Repeat the above steps for each storage host. + + .. note:: + + After upgrading the first storage host, you can expect + alarm 800.003. The alarm is cleared after all the storage hosts are + upgraded. + + #. If worker hosts are present, deploy the new software release to worker + hosts one at a time. + + #. Deploy the new software release to worker-0. + + #. Only if the software deployment is ``RR=True``, lock worker-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-lock worker-0 + + #. Deploy the new software release to worker-0. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host worker-0 + Host installation request sent to worker-0. + Host installation was successful on worker-0. + + After this command completes: + + - If ``RR=TRUE``, the host is still running the old software + release, however boot parameters have been updated to boot into + the new software release on the next host reboot, which will + occur in the next step which unlocks the host. + + - If ``RR=FALSE``, the host is running the new software release. + + #. Only if the software deployment is ``RR=True``, unlock worker-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock worker-0 + + The host will now reboot into the new software release. Wait for + the host to finish rebooting and become enabled. Wait for all the + alarms to clear after the unlock before proceeding to the next + worker host. + + This may take 3-5 mins depending on hardware. + + #. Display state of software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +--------------+------------------+------+--------------+ + | From Release | To Release | RR | State | + +--------------+------------------+------+--------------+ + | 10.0.0 | | True | deploy-host | + +--------------+------------------+------+--------------+ + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+--------------+-------------------+-------+----------------------+ + | Host | From Release | To Release | RR | State | + +--------------+--------------+-------------------+-------+----------------------+ + | controller-0 | 10.0.0 | | True | deploy-host-deployed | + | controller-1 | 10.0.0 | | True | deploy-host-deployed | + | storage-0 | 10.0.0 | | True | deploy-host-deployed | + | storage-1 | 10.0.0 | | True | deploy-host-deployed | + | worker-0 | 10.0.0 | | True | deploy-host-deployed | + | worker-1 | 10.0.0 | | True | deploy-host-pending | + +--------------+--------------+-------------------+-------+----------------------+ + + #. Repeat the above steps for each worker host. + + #. Switch the activity from controller-1 such that controller-0 becomes active. + + .. code-block:: + + ~(keystone_admin)]$ system host-swact controller-1 + + Wait for the activity to switch to controller-0. + + This may take up to a minute depending on hardware. + + Reconnect to system. + +#. Activate the software deployment. + + .. _manual-host-software-deployment-ee17ec6f71a4-step: + + .. code-block:: + + ~(keystone_admin)]$ software deploy activate + Deploy activate has started + + When running the :command:`software deploy activate` command, new configurations are + applied to the controller. The 250.001 (Configuration is out-of-date) + alarms are raised and are cleared as the configurations are applied. + + The software deployment state goes from ``deploy-activate`` to + ``deploy-activate-done`` once deployment is activated. For a major release + software deployment, this may take up to 15-30 mins to complete depending on + system configuration and hardware. + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +--------------+------------------+------+---------------------+ + | From Release | To Release | RR | State | + +--------------+------------------+------+---------------------+ + | 10.0.0 | | True | deploy-activate-done| + +--------------+------------------+------+---------------------+ + + .. note:: + + If :command:`software deploy activate` fails, that is, if the state is + ``deploy-activate-failed``, review ``/var/log/software.log`` on the active + controller for failure details, address the issues, and re-execute the + :command:`software deploy activate` command. + +#. Complete the software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy complete + Deployment has been completed + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +--------------+------------------+------+-------------------+ + | From Release | To Release | RR | State | + +--------------+------------------+------+-------------------+ + | 10.0.0 | | True | deploy-completed | + +--------------+------------------+------+-------------------+ + + .. note:: + + After this command is executed, you can run the Kubernetes version + upgrade procedure, if desired to upversion to new Kubernetes versions + available in the new software release. + +#. Delete the software deployment. + + .. note:: + + If it is a system controller, the deployment should not be deleted + until the subclouds are up-to-date. + + .. note:: + + For a major release deployment, after this command is executed, the + major release software deployment cannot be rolled back. + + .. code-block:: + + ~(keystone_admin)]$ software deploy delete + Deployment has been deleted + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + No deploy in progress + +.. note:: + + After the deploy delete, if there are previous release entries in the + unavailable state, the alarm 900.024 ``Obsolete release in system`` is + raised. + +#. Delete the old major release. + + In the case of software deployment of a new major release, you should remove + the old major release to reclaim disk space. + + .. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-------------+ + | Release | RR | State | + +--------------------------+-------+-------------+ + | starlingx-10.0.0 | True | unavailable | + | | True | deployed | + +--------------------------+-------+-------------+ + + .. code-block:: + + ~(keystone_admin)]$ software delete starlingx-10.0.0 + starlingx-10.0.0 has been deleted. + + ~(keystone_admin)]$ software list + +--------------------------+-------+-------------+ + | Release | RR | State | + +--------------------------+-------+-------------+ + | | True | deployed | + +--------------------------+-------+-------------+ + diff --git a/doc/source/updates/kubernetes/manual-removal-host-software-deployment-24f47e80e518.rst b/doc/source/updates/kubernetes/manual-removal-host-software-deployment-24f47e80e518.rst new file mode 100644 index 000000000..6955b6b85 --- /dev/null +++ b/doc/source/updates/kubernetes/manual-removal-host-software-deployment-24f47e80e518.rst @@ -0,0 +1,86 @@ +.. WARNING: Add no lines of text between the label immediately following +.. and the title. + +.. _manual-removal-host-software-deployment-24f47e80e518: + +======================================= +Manual Removal Host Software Deployment +======================================= + +A fully deployed patch release can be removed (un-deployed) by using the +the :ref:`manual-host-software-deployment-ee17ec6f71a4` procedure and deploying a +previous patch release or the major release. + +.. note:: + + A fully deployed major release cannot be removed (un-deployed). + +For example, the following shows the current software releases deployed: + +.. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-----------+ + | Release | RR | State | + +--------------------------+-------+-----------+ + | starlingx-10.0.0 | True | deployed | + | starlingx-10.0.1 | False | deployed | + | starlingx-10.0.2 | True | deployed | + | starlingx-10.0.3 | False | deployed | + +--------------------------+-------+-----------+ + +To remove patch releases `starlingx-10.0.3` and `starlingx-10.0.2`, follow the +:ref:`manual-host-software-deployment-ee17ec6f71a4` procedure, and deploy (or +go back to) the `starlingx-10.0.1` software release in the software deployment +start step. + +.. code-block:: + + ~(keystone_admin)]$ software deploy start starlingx-10.0.1 + Deployment for starlingx-10.0.1 started + +.. code-block:: + + ~(keystone_admin)]$ software deploy show + +--------------+------------------+------+-------+ + | From Release | To Release | RR | State | + +--------------+------------------+------+-------+ + | 10.0.3 | 10.0.1 | True | start | + +--------------+------------------+------+-------+ + +On the completion of the :ref:`manual-host-software-deployment-ee17ec6f71a4` +procedure, run the following command: + +.. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-----------+ + | Release | RR | State | + +--------------------------+-------+-----------+ + | starlingx-10.0.0 | True | deployed | + | starlingx-10.0.1 | False | deployed | + | starlingx-10.0.2 | True | available | + | starlingx-10.0.3 | False | available | + +--------------------------+-------+-----------+ + +If the removed patches are no longer required on the system, you can delete them. + +.. code-block:: + + ~(keystone_admin)]$ software delete starlingx-10.0.3 + starlingx-10.0.3 has been deleted. + +.. code-block:: + + ~(keystone_admin)]$ software delete starlingx-10.0.2 + starlingx-10.0.2 has been deleted. + +.. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-----------+ + | Release | RR | State | + +--------------------------+-------+-----------+ + | starlingx-10.0.0 | True | deployed | + | starlingx-10.0.1 | False | deployed | + +--------------------------+-------+-----------+ diff --git a/doc/source/updates/kubernetes/manual-rollback-host-software-deployment-9295ce1e6e29.rst b/doc/source/updates/kubernetes/manual-rollback-host-software-deployment-9295ce1e6e29.rst new file mode 100644 index 000000000..6d829fccc --- /dev/null +++ b/doc/source/updates/kubernetes/manual-rollback-host-software-deployment-9295ce1e6e29.rst @@ -0,0 +1,460 @@ +.. WARNING: Add no lines of text between the label immediately following +.. and the title. + +.. _manual-rollback-host-software-deployment-9295ce1e6e29: + +======================================== +Manual Rollback Host Software Deployment +======================================== + +For a major release software deployment, you can roll back the +:ref:`manual-host-software-deployment-ee17ec6f71a4` procedure at any time +between :command:`software deploy start` and :command:`software deploy delete`. +After the software deploy deletion step, aborting and rolling back of the major +release deployment is not possible. + +.. note:: + This section also covers the abort and rollback of a new patched major + release deployment. + +.. rubric:: |prereq| + +You are in the middle of the +:ref:`manual-host-software-deployment-ee17ec6f71a4` procedure for a major +release between :command:`software deploy start` and :command:`software deploy +delete`. + +.. rubric:: |proc| + +#. Abort the current in-progress major release software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy abort + Deployment has been aborted + +#. If the current deploy state is ``deploy-activate-rollback-pending``, then roll back the + activate of the aborted deployment, otherwise proceed to :ref:`3 `. + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +------------------+------------+------+----------------------------------+ + | From Release | To Release | RR | State | + +------------------+------------+------+----------------------------------+ + | | 10.0.0 | True | deploy-activate-rollback-pending | + +------------------+------------+------+----------------------------------+ + + .. code-block:: + + ~(keystone_admin)]$ software deploy activate-rollback + Deploy activate-rollback has started + + When running the :command:`software deploy activate-rollback` command, previous + configurations are applied to the controller. + + Alarm 250.001 (Configuration is out-of-date) is raised and cleared as the + configurations are applied. + + The software deployment state goes from ``activate-rollback-done`` to ``host-rollback``. + + This may take up to 30 mins to complete depending on system configuration + and hardware. + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +------------------+------------+------+---------------+ + | From Release | To Release | RR | State | + +------------------+------------+------+---------------+ + | | 10.0.0 | True | host-rollback | + +------------------+------------+------+---------------+ + + .. note:: + + If :command:`software deploy activate-rollback` fails, that is, if the state is + ``activate-rollback-failed``, review ``/var/log/software.log`` on the + active controller for failure details, address the issues, and + re-execute the :command:`software deploy activate-rollback` command. + +#. If the current deploy state is ``host-rollback``, then roll back the + deployment of all the hosts. + + .. _manual-rollback-host-software-deployment-9295ce1e6e29-step: + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + +------------------+------------+------+-------------------+ + | From Release | To Release | RR | State | + +------------------+------------+------+-------------------+ + | | 10.0.0 | True | host-rollback | + +------------------+------------+------+-------------------+ + + If the state is ``host-rollback``, then proceed with the rest of this step, + otherwise proceed to :ref:`4 `. + + - For an |AIO-SX| system + + #. Roll back the software release on controller-0. + + #. Lock controller-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-lock controller-0 + + #. Roll back the software release on controller-0. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-rollback controller-0 + Host installation request sent to controller-0. + Host installation was successful on controller-0 + + The host is still running the new software release, however boot + parameters have been updated to boot into the previous software + release on the next host reboot, which will occur in the next step + which unlocks the host. + + #. Unlock controller-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock controller-0 + + The host will now reboot into the previous software release. Wait for + the host to finish rebooting and become available. + + This may take 3-5 mins depending on hardware. + + #. Proceed to step :ref:`4 + ` of + the main procedure. + + - For an |AIO-DX| system or standard system + + #. If worker hosts are present, and one or more are in the ``pending-rollback`` + state, then roll back the software release on all worker hosts in the + ``pending-rollback`` state one at a time. Otherwise, proceed to step :ref:`b `. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+------------------+------------+-------+------------------------------+ + | Host | From Release | To Release | RR | State | + +--------------+------------------+------------+-------+------------------------------+ + | controller-0 | | 10.0.0 | True | deploy-host-rollback-pending | + | controller-1 | | 10.0.0 | True | deploy-host-rollback-pending | + | storage-0 | | 10.0.0 | True | deploy-host-rollback-pending | + | storage-1 | | 10.0.0 | True | deploy-host-rollback-pending | + | storage-2 | | 10.0.0 | True | deploy-host-rollback-pending | + | storage-3 | | 10.0.0 | True | deploy-host-rollback-pending | + | worker-0 | | 10.0.0 | True | deploy-host-rollback-pending | + | worker-1 | | 10.0.0 | True | deploy-host-rollback-pending | + | worker-2 | | 10.0.0 | True | deploy-host-rollback-pending | + | worker-3 | | 10.0.0 | True | deploy-host-rollback-deployed| + +--------------+------------------+------------+-------+------------------------------+ + + #. Roll back the software release on worker-0. + + #. Lock worker-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-lock worker-0 + + #. Roll back the software release on worker-0. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-rollback worker-0 + Host installation request sent to worker-0 + Host installation was successful on worker-0. + + The host is still running the new software release, however boot parameters + have been updated to boot into the previous software release on the next + host reboot, which will occur in the next step which unlocks the host. + + #. Unlock worker-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock worker-0 + + The host will now reboot into the previous software release. Wait + for the host to finish rebooting and become available. Wait + for all the alarms to clear after the unlock before proceeding to the + next worker host. + + This may take 3-5 mins depending on hardware. + + #. Display the state of software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+------------------+------------+-------+------------------+ + | Host | From Release | To Release | RR | State | + +--------------+------------------+------------+-------+------------------+ + | controller-0 | | 10.0.0 | True | pending-rollback | + | controller-1 | | 10.0.0 | True | pending-rollback | + | storage-0 | | 10.0.0 | True | pending-rollback | + | storage-1 | | 10.0.0 | True | pending-rollback | + | storage-2 | | 10.0.0 | True | pending-rollback | + | storage-3 | | 10.0.0 | True | pending-rollback | + | worker-0 | | 10.0.0 | True | rolled back | + | worker-1 | | 10.0.0 | True | pending-rollback | + | worker-2 | | 10.0.0 | True | rolled back | + | worker-3 | | 10.0.0 | True | rolled back | + +--------------+------------------+------------+-------+------------------+ + + #. Repeat the above steps for any remaining worker hosts in the ``pending-rollback`` state. + + #. If storage hosts are present, and one or more are in the ``pending-rollback`` state, + then roll back the software release on all storage hosts in the ``pending-rollback`` state, + one at a time. Otherwise, proceed to step :ref:`c `. + + .. _manual-rollback-host-software-deployment-9295ce1e6e29-storagehost: + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+------------------+------------+-------+------------------+ + | Host | From Release | To Release | RR | State | + +--------------+------------------+------------+-------+------------------+ + | controller-0 | | 10.0.0 | True | pending-rollback | + | controller-1 | | 10.0.0 | True | pending-rollback | + | storage-0 | | 10.0.0 | True | pending-rollback | + | storage-1 | | 10.0.0 | True | pending-rollback | + | storage-2 | | 10.0.0 | True | pending-rollback | + | storage-3 | | 10.0.0 | True | pending-rollback | + | worker-0 | | 10.0.0 | True | rolled back | + | worker-1 | | 10.0.0 | True | rolled back | + | worker-2 | | 10.0.0 | True | rolled back | + | worker-3 | | 10.0.0 | True | rolled back | + +--------------+------------------+------------+-------+------------------+ + + #. Roll back the software release on storage-0. + + #. Lock storage-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-lock storage-0 + + #. Roll back the software release on storage-0. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-rollback storage-0 + Host installation request sent to storage-0 + Host installation was successful on storage-0. + + The host is still running the new software release, + however boot parameters have been updated to boot into + the previous software release on the next host reboot, which + will occur in the next step which unlocks the host. + + #. Unlock storage-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock storage-0 + + The host will now reboot into the previous software release. Wait + for the host to finish rebooting and become available. Wait for + all the alarms to clear after the unlock before proceeding to the next + storage host. + + This may take 3-5 mins depending on hardware. + + #. Display the state of software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+------------------+------------+-------+------------------+ + | Host | From Release | To Release | RR | State | + +--------------+------------------+------------+-------+------------------+ + | controller-0 | | 10.0.0 | True | pending-rollback | + | controller-1 | | 10.0.0 | True | pending-rollback | + | storage-0 | | 10.0.0 | True | rolled back | + | storage-1 | | 10.0.0 | True | pending-rollback | + | storage-2 | | 10.0.0 | True | pending-rollback | + | storage-3 | | 10.0.0 | True | pending-rollback | + | worker-0 | | 10.0.0 | True | rolled back | + | worker-1 | | 10.0.0 | True | rolled back | + | worker-2 | | 10.0.0 | True | rolled back | + | worker-3 | | 10.0.0 | True | rolled back | + +--------------+------------------+------------+-------+------------------+ + + #. Repeat the above steps for any remaining storage hosts in the + ``pending-rollback`` state. + + .. note:: + + After rolling back the first storage host, you can expect alarm + 800.003. The alarm is cleared after all the storage hosts are rolled + back. + + #. If both the controllers are in the ``pending-rollback`` state, then roll back + controller-0 first. + + .. _manual-rollback-host-software-deployment-9295ce1e6e29-bothcontrollers: + + #. Ensure that controller-1 is active by switching activity from + controller-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-swact controller-0 + + Wait for the activity to switch to controller-1. This may take up to a + minute depending on hardware. Reconnect to the system. + + #. Roll back the software release on controller-0 (the standby controller). + + #. Lock controller-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-lock controller-0 + + #. Rollback the software release on controller-0. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host controller-0 + Host installation request sent to controller-0. + Host installation was successful on controller-0. + + The host is still running the new software release, + however boot parameters have been updated to boot into + the previous software release on the next host reboot, which + will occur in the next step which unlocks the host. + + #. Unlock controller-0. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock controller-0 + + The host will now reboot into the new software release. Wait for + the host to finish rebooting and become available. + + This may take 3-5 mins depending on hardware. + + #. Display the state of software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+------------------+------------+-------+------------------+ + | Host | From Release | To Release | RR | State | + +--------------+------------------+------------+-------+------------------+ + | controller-0 | | 10.0.0 | True | rolled back | + | controller-1 | | 10.0.0 | True | pending-rollback | + | storage-0 | | 10.0.0 | True | rolled back | + | storage-1 | | 10.0.0 | True | rolled back | + | storage-2 | | 10.0.0 | True | rolled back | + | storage-3 | | 10.0.0 | True | rolled back | + | worker-0 | | 10.0.0 | True | rolled back | + | worker-1 | | 10.0.0 | True | rolled back | + | worker-2 | | 10.0.0 | True | rolled back | + | worker-3 | | 10.0.0 | True | rolled back | + +--------------+------------------+------------+-------+------------------+ + + #. If only controller-1 is in the ``pending-rollback`` state, then roll + back controller-1. + + #. Ensure that controller-0 is active by switching activity from + controller-1. + + .. code-block:: + + ~(keystone_admin)]$ system host-swact controller-1 + + Wait for the activity to switch to controller-0. + + This may take up to a minute depending on hardware. + + Reconnect to the system. + + #. Roll back the software release on controller-1 (the standby controller). + + #. Lock controller-1 + + .. code-block:: + + ~(keystone_admin)]$ system host-lock controller-1 + + #. Roll back the software release on controller-1. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host controller-1 + Host installation request sent to controller-1. + Host installation was successful on controller-1. + + The host is still running the new software release, however boot + parameters have been updated to boot into the previous software + release on the next host reboot, which will occur in the next step + which unlocks the host. + + #. Unlock controller-1. + + .. code-block:: + + ~(keystone_admin)]$ system host-unlock controller-1 + + The host will now reboot into the new software release. Wait for + the host to finish rebooting and become available. + + This may take 3-5 mins depending on hardware. + + #. Display the state of software deployment. + + .. code-block:: + + ~(keystone_admin)]$ software deploy host-list + +--------------+------------------+------------+-------+------------------+ + | Host | From Release | To Release | RR | State | + +--------------+------------------+------------+-------+------------------+ + | controller-0 | | 10.0.0 | True | rolled back | + | controller-1 | | 10.0.0 | True | rolled back | + | storage-0 | | 10.0.0 | True | rolled back | + | storage-1 | | 10.0.0 | True | rolled back | + | storage-2 | | 10.0.0 | True | rolled back | + | storage-3 | | 10.0.0 | True | rolled back | + | worker-0 | | 10.0.0 | True | rolled back | + | worker-1 | | 10.0.0 | True | rolled back | + | worker-2 | | 10.0.0 | True | rolled back | + | worker-3 | | 10.0.0 | True | rolled back | + +--------------+------------------+------------+-------+------------------+ + +#. Delete the software deployment to complete the rollback. + + .. _manual-rollback-host-software-deployment-9295ce1e6e29-deletestep: + + .. code-block:: + + ~(keystone_admin)]$ software deploy delete + Deployment has been deleted + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + No deploy in progress + +#. Confirm that the previous software release is now deployed. + + .. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-----------+ + | Release | RR | State | + +--------------------------+-------+-----------+ + | starlingx-10.0.0 | True | deployed | + | | True | available | + +--------------------------+-------+-----------+ diff --git a/doc/source/updates/kubernetes/manual-upgrade-overview.rst b/doc/source/updates/kubernetes/manual-upgrade-overview.rst deleted file mode 100644 index e7603cb51..000000000 --- a/doc/source/updates/kubernetes/manual-upgrade-overview.rst +++ /dev/null @@ -1,47 +0,0 @@ - -.. mzg1592854560344 -.. _manual-upgrade-overview: - -======================= -Manual Upgrade Overview -======================= - -|prod-long| enables you to upgrade the software across your Simplex, Duplex, -Standard, |prod-dc|, and subcloud deployments. - -.. note:: - Upgrading |prod-dc| is distinct from upgrading other |prod| configurations. - -.. xbooklink For information on updating |prod-dc|, see |distcloud-doc|: :ref:`Upgrade - Management `. - -An upgrade can be performed manually or using the Upgrade Orchestrator, which -automates a rolling install of an update across all of the |prod-long| hosts. -This section describes the manual upgrade procedures. - -.. xbooklink For the orchestrated - procedure, see |distcloud-doc|: :ref:`Orchestration Upgrade Overview - `. - -Before starting the upgrade process, ensure that the following conditions are -met: - -- The system is patch current. - -- There are no management-affecting alarms and the :command:`system - health-query-upgrade` check passes. - -- The new software load has been imported. - -- A valid license file has been installed. - -The upgrade procedure is different for the All-in-One Simplex configuration -versus the All-in-One Duplex, and Standard configurations. For more -information, see: - -.. _manual-upgrade-overview-ul-bcp-ght-cmb: - -- :ref:`Upgrading All-in-One Simplex ` - -- :ref:`Upgrading All-in-One Duplex / Standard ` - diff --git a/doc/source/updates/kubernetes/orchestrated-deployment-host-software-deployment-d234754c7d20.rst b/doc/source/updates/kubernetes/orchestrated-deployment-host-software-deployment-d234754c7d20.rst new file mode 100644 index 000000000..07c10b3db --- /dev/null +++ b/doc/source/updates/kubernetes/orchestrated-deployment-host-software-deployment-d234754c7d20.rst @@ -0,0 +1,670 @@ +.. WARNING: Add no lines of text between the label immediately following +.. and the title. + +.. _orchestrated-deployment-host-software-deployment-d234754c7d20: + +================================================ +Orchestrated Deployment Host Software Deployment +================================================ + +Software deployment orchestration automates the process of upversioning the +|prod| software to a new major release or new patch release (In-Service or +Reboot Required (RR)). It automates the execution of all :command:`software deploy` +steps across all the hosts in a cluster, based on the configured policies. + +.. note:: + + Software deployment orchestration also covers the orchestrated upversioning + to a new patched major release, that is, all the comments in this section + that are specific to major release also apply to a patched major release. + +Software deployment Orchestration supports all standalone configurations: +|AIO-SX|, |AIO-DX| and standard configuration. + +.. note:: + + Orchestrating the software deployment of a |DC| system is different from + orchestrating the software deployment of standalone |prod| configurations. + +Software deployment orchestration automatically iterates through all the hosts +and deploys the new software load on each host: first the controller hosts, +then the storage hosts, and lastly the worker hosts, and finally activates and +completes the software deployment. During software deployment on a worker host +(and duplex |AIO| controllers), pods or |VMs| are automatically moved to the +alternate worker hosts. After software deployment orchestration has deployed +the new software on all hosts, it will activate, complete, and delete the new +software deployment. + +.. note:: + + Software deployment orchestration completes and deletes the new software + deployment only when the ``-delete`` option is selected by the user during + create strategy. In case of a Major Release, if the software deployment is + deleted, it can no longer be rolled back. + +To perform a software deployment orchestration, first create an upgrade +orchestration strategy for the automated software deployment procedure. This +provides polices to perform the software deployment orchestration using the following +parameters: + +- The host types to be software deployed. + +- Whether to deploy the software to hosts serially or in parallel. + + - The maximum number of hosts to deploy in parallel. + +- Maintenance action (stop-start or migrate) for hosted OpenStack |VMs| + on a host that is about to have its software updated. + +- Alarm restrictions, that is, options to specify how the orchestration behaves + when alarms occur. + +Based on these parameters and the state of the hosts, software deployment +orchestration creates a number of stages for the overall software deployment +strategy. Each stage generally consists of deploying software on hosts for a +subset of the hosts on the system. In the case of a reboot required (RR) +software release, each stage consists of moving pods or |VMs|, locking hosts, +deploying software on hosts, and unlocking hosts for a subset of the hosts on +the system. After creating the software deployment orchestration strategy, you +can either apply the entire strategy automatically or apply individual stages +to control and monitor their progress manually. + +.. rubric:: |prereq| + +- No other orchestration strategy exists. Firmware-upgrade, + kubernetes-version-upgrade, system-config-update-strategy, and + kube-rootca-update are other types of orchestration. A software deployment + cannot be orchestrated while another orchestration is in progress. + +- You have the administrator role privileges. + +- The system is clear of alarms except the software deployment in progress alarm. + +- All the hosts are unlocked, enabled, and available. + +- For Duplex systems, the system should be fully redundant. There should be two controller + nodes available, at least one complete storage replication group available + for systems with Ceph backend. + +- Sufficient free capacity or unused worker resources must be available + across the cluster. A rough calculation is: + + ``Required spare capacity ( %) = ( / ) * 100`` + +- For a major release deployment, the license for the new release has been installed using + :command:`system license-install `. + +- The software release to be deployed has been uploaded. + + - For a major release: + + .. code-block:: + + ~(keystone_admin)]$ software upload [ --local ] .iso + .sig is now uploaded + +-------------------------------+-------------------+ + | Uploaded File | Release | + +-------------------------------+-------------------+ + | .iso | | + +-------------------------------+-------------------+ + + This command may take 5-10 mins depending on hardware. + + where `--local` can be used when running this command in an |SSH| session + on the active controller to optimize performance. With this option, the + system will read files directly from the local disk rather than + transferring files over REST APIs backing the |CLI|. + + - For a patch release: + + .. code-block:: + + ~(keystone_admin)]$ software upload .patch + is now uploaded + +-------------------------------+-------------------+ + | Uploaded File | Release | + +-------------------------------+-------------------+ + | .patch | | + +-------------------------------+-------------------+ + + - Ensure that the new software release was successfully uploaded. + + .. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-----------+ + | Release | RR | State | + +--------------------------+-------+-----------+ + | starlingx-10.0.0 | True | deployed | + | | True | available | + +--------------------------+-------+-----------+ + +.. rubric:: |proc| + +#. Create a software deployment orchestration strategy for a specified software + release with desired policies. + + .. _orchestrated-deployment-host-software-deployment-d234754c7d20-step: + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy create [--controller-apply-type {serial,ignore}] + [--storage-apply-type {serial,parallel,ignore}] + [--worker-apply-type {serial,parallel,ignore}] + [--max-parallel-worker-hosts {2,3,4,5,6,7,8,9,10}] + [--instance-action {stop-start,migrate}] + [--alarm-restrictions {strict,relaxed}] + [--delete] + + + strategy-uuid: 5435e049-7002-4403-acfb-7886f6da14af + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: build + current-phase-completion: 0% + state: building + inprogress: true + + where, + + ```` + Specifies the specific software release to deploy. This can be a patch + release or a major release. + + ``[--controller-apply-type {serial,ignore}]`` + (Optional) Specifies whether software should be deployed to controller + hosts in serial or ignored. By default, it is serial. ``ignore`` should + be used only when re-creating and applying a strategy after an abort or + failure. + + ``[--storage-apply-type {serial,parallel,ignore}]`` + (Optional) Specifies whether software should be deployed to storage + hosts in serial, in parallel, or ignored. By default, it is serial. + Software is deployed to storage hosts in parallel by software deploying + a storage host from each storage redundancy group. ``ignore`` should be + used only when re-creating and applying a strategy after an abort or + failure. + + .. note:: + + If parallel apply for storage is used, it will be automatically + replaced with the serial apply for ``--storage-apply-type``. + + ``[--worker-apply-type {serial,parallel,ignore}]`` + (Optional) Specifies whether software should be deployed to worker hosts + in serial, in parallel or ignored. By default, it is serial. The number + of worker hosts that are software deployed in parallel is specified by + ``[--max-parallel-worker-hosts {2,3,4,5,6,7,8,9,10}]``. The default is + 2. ``ignore`` should be used only when re-creating and applying a + strategy after an abort or failure. + + ``[--max-parallel-worker-hosts {2,3,4,5,6,7,8,9,10}]`` + Specifies the number of worker hosts that are software deployed in + parallel that is specified by ``[--max-parallel-worker-hosts + {2,3,4,5,6,7,8,9,10}]``. The default is 2. + + ``[--instance-action {stop-start,migrate}]`` + Applies only to OpenStack |VM| hosted guests. It specifies the action + performed to hosted OpenStack |VMs| on a worker host (or |AIO| + controller) prior to deploying the new software to the host. The default + is ``stop-start``. + + - ``stop-start`` + + Before deploying the software release to the host, all the hosted + OpenStack |VMs| are stopped or shutdown. + + After deploying the software release to the host, all the hosted + OpenStack |VMs| are restarted. + + - ``migrate`` + + Before deploying the software release to the host, all the hosted + OpenStack |VMs| are migrated to another host capable of hosting the + hosted OpenStack |VM| and that is not part of the current stage. + + - Hosts whose software is already updated are preferred over the hosts + whose software has not been updated yet. + + - Live migration is attempted first. If live migration is not + possible for the OpenStack |VM|, cold migration is performed. + + ``[--alarm-restrictions {strict,relaxed}]`` + Lets you determine how to handle alarm restrictions based on the + management affecting statuses of any existing alarms, which takes into + account the alarm type as well as the alarm's current severity. Default + is strict. If set to relaxed, orchestration will be allowed to proceed + if there are no management affecting alarms present. + + Performing management actions without specifically relaxing the alarm + checks will still fail if there are any alarms present in the system + (except for a small list of basic alarms for the orchestration actions, + such as an upgrade operation in progress alarm not impeding upgrade + orchestration). You can use the CLI command :command:`fm alarm-list + --mgmt_affecting` to view the alarms that are management affecting. + + - ``Strict`` maintains alarm restrictions. + + - ``Relaxed`` relaxes the usual alarm restrictions and allows the action to + proceed if there are no alarms present in the system with a severity equal + to or greater than its management affecting severity. That is, it will use + the ``-f`` (force) option on the precheck or start of the deployment. + + ``[--delete]`` + (Optional) Specifies if the software deployment needs to be deleted or not. + +#. Wait for the ``build`` phase of the software deployment orchestration + strategy create to be 100% complete and its state to be ``ready-to-apply``. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: 6282f049-bb9e-46f0-9ca8-97bf626884e0 + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: build + current-phase-completion: 100% + state: ready-to-apply + build-result: success + build-reason: + + .. note:: + + If the build phase fails (``build-result: failed`` that will appear in + the show command), determine the issue from the build error reason + (``build-reason: `` that will appear in the show + command) and/or in ``/var/log/nfv-vim*.log`` on the active controller, + address the issues, delete the strategy, and retry the create. + +#. (Optional) Displays ``--error-details`` (phases and steps) of the build strategy. + + The software deploy strategy consists of one or more stages, which consist + of one or more hosts to have the new software deployed at the same time. + + Each stage will be split into steps (for example, query-alarms, lock-hosts, + upgrade-hosts). + + The new software is deployed on the controller hosts first, followed by the + storage hosts, and then the worker hosts. + + The new software is deployed on the worker hosts with no hosted guests + (Kubernetes pods or OpenStack |VMs|) and before the worker hosts with hosted guests + (Kubernetes pods or OpenStack |VMs|). + + Hosted Kubernetes pods will be relocated off each worker host + (AIO-Controller) if another worker host capable of hosting the Kubernetes + pods is available before the new software is deployed to the worker host + (AIO-Controller). + + Hosted OpenStack |VMs| will be managed according to the requested + ``--instance-action`` on each worker host (AIO-Controller) before the new + software is deployed to the worker host (AIO-Controller). + + The final step in each stage is one of the following: + + ``system-stabilize`` + + This waits for a period of time (up to several minutes) and ensures that the + system is free of alarms. + + This ensures that we do not continue to deploy the new software to more + hosts if the software deployment has caused an issue resulting in an alarm. + + ``wait-data-sync`` + + This waits for a period of time (up to many hours) and ensures that data + synchronization has completed after the upgrade of a controller or storage + node. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show --details + + Strategy Software Deploy Strategy: + strategy-uuid: 6282f049-bb9e-46f0-9ca8-97bf626884e0 + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: build + current-phase-completion: 100% + state: ready-to-apply + + build-phase: + ... + stages: + ... + steps: + ... + + apply-phase: + ... + stages: + ... + steps: + ... + +#. Apply and monitor the software deployment orchestration. + + You can either apply the entire strategy automatically or apply the + individual stages to control and monitor their progress manually. + + #. Apply the entire strategy automatically and monitor its progress: + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy apply + Strategy Software Deploy Strategy: + strategy-uuid: 52873771-fc1a-48cd-b322-ab921d34d01c + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 0% + state: applying + inprogress: true + + Show high-level status of apply. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: 35b48793-66f8-46be-8972-cc22117a93ff + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 7% + state: applying + inprogress: true + + Show details of active stage or step of apply. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show --active + Strategy Software Deploy Strategy: + strategy-uuid: 52873771-fc1a-48cd-b322-ab921d34d01c + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 7% + state: applying + apply-phase: + total-stages: 3 + current-stage: 0 + stop-at-stage: 3 + timeout: 12019 seconds + completion-percentage: 7% + start-date-time: 2024-06-11 12:19:51 + inprogress: true + stages: + stage-id: 0 + stage-name: sw-upgrade-start + total-steps: 3 + current-step: 1 + timeout: 1321 seconds + start-date-time: 2024-06-11 12:19:51 + inprogress: true + steps: + step-id: 1 + step-name: start-upgrade + timeout: 1200 seconds + start-date-time: 2024-06-11 12:19:51 + result: wait + reason: + + #. Apply individual stages. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy apply --stage-id + Strategy Software Deploy Strategy: + strategy-uuid: a0277e08-93cc-4964-ba39-ebab367a547c + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 0% + state: applying + inprogress: true + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: a0277e08-93cc-4964-ba39-ebab367a547c + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 7% + state: applying + inprogress: true + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show --active + Strategy Software Deploy Strategy: + strategy-uuid: a0277e08-93cc-4964-ba39-ebab367a547c + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 7% + state: applying + apply-phase: + total-stages: 3 + current-stage: 0 + stop-at-stage: 1 + timeout: 1322 seconds + completion-percentage: 7% + start-date-time: 2024-06-11 14:40:23 + inprogress: true + stages: + stage-id: 0 + stage-name: sw-upgrade-start + total-steps: 3 + current-step: 1 + timeout: 1321 seconds + start-date-time: 2024-06-11 14:40:23 + inprogress: true + steps: + step-id: 1 + step-name: start-upgrade + timeout: 1200 seconds + start-date-time: 2024-06-11 14:40:23 + result: wait + reason: + +#. While a software deployment orchestration strategy is being applied, it can be aborted. + + The current step will be allowed to complete and if necessary, an abort + phase will be created and applied, which will attempt to unlock any hosts + that were locked. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy abort + Strategy Software Deploy Strategy: + strategy-uuid: 63f48dfc-f833-479b-b597-d11f9219baf5 + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 7% + state: aborting + inprogress: true + + Wait for the abort to complete. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: 63f48dfc-f833-479b-b597-d11f9219baf5 + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: abort + current-phase-completion: 100% + state: aborted + apply-result: failed + apply-reason: + abort-result: success + abort-reason: + + .. note:: + + To view detailed errors, run the following commands: + + .. code-block:: + + [sysadmin@controller-0 ~(keystone_admin)]$ sw-manager sw-deploy-strategy show --error-details + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: <> + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: abort + current-phase-completion: 100% + state: aborted + apply-result: failed + apply-error-response: + abort-result: success + abort-reason: + abort-error-response: + + .. note:: + + After a software deployment strategy has been applied (or aborted), it + must be deleted before another software deployment strategy can be + created. + +#. Otherwise, wait for all the steps of all stages of the software deployment + orchestration strategy to complete. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: 6282f049-bb9e-46f0-9ca8-97bf626884e0 + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: applied + current-phase-completion: 100% + state: applied + apply-result: success + apply-reason: + + If a software deployment strategy apply fails, you must address the issue + that caused the failure, then delete/re-create the strategy before + attempting to apply it again. + + For additional details, run the :command:`sw-manager sw-deploy-strategy show --error-details` command. + +#. Delete the completed software deployment strategy. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy delete + Strategy deleted + +.. rubric:: |postreq| + +After a successful software deployment orchestration, + +- The Kubernetes Version Upgrade procedure can be executed, if desired, to + upversion to a new Kubernetes versions available in the new software release. + +- You should also validate that the system and hosted applications are healthy. + +- In the case of a major release software deployment: + + - If you do not need to rollback the major release software deployment, then + delete the software deployment that was used by the software deployment + orchestration. + + .. code-block:: + + ~(keystone_admin)]$ software deploy delete + Deployment has been deleted + + .. code-block:: + + ~(keystone_admin)]$ software deploy show + No deploy in progress + + - Remove the old major release to reclaim disk space. + + .. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-------------+ + | Release | RR | State | + +--------------------------+-------+-------------+ + | starlingx-10.0.0 | True | unavailable | + | | True | deployed | + +--------------------------+-------+-------------+ + + .. code-block:: + + ~(keystone_admin)]$ software delete starlingx-10.0.0 + starlingx-10.0.0 has been deleted. + + .. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-------------+ + | Release | RR | State | + +--------------------------+-------+-------------+ + | | True | deployed | + +--------------------------+-------+-------------+ diff --git a/doc/source/updates/kubernetes/orchestrated-removal-host-software-deployment-3f542895daf8.rst b/doc/source/updates/kubernetes/orchestrated-removal-host-software-deployment-3f542895daf8.rst new file mode 100644 index 000000000..0cd28b380 --- /dev/null +++ b/doc/source/updates/kubernetes/orchestrated-removal-host-software-deployment-3f542895daf8.rst @@ -0,0 +1,135 @@ +.. WARNING: Add no lines of text between the label immediately following +.. and the title. + +.. _orchestrated-removal-host-software-deployment-3f542895daf8: + +============================================= +Orchestrated Removal Host Software Deployment +============================================= + +A fully deployed patch release can be removed (or un-deployed) by using the +:ref:`orchestrated-deployment-host-software-deployment-d234754c7d20` procedure and deploying a +previous patch release or the major release. + +.. note:: + + A fully deployed major release cannot be removed (or un-deployed). + +For example, the following shows the current software releases deployed: + +.. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-----------+ + | Release | RR | State | + +--------------------------+-------+-----------+ + | starlingx-10.0.0 | True | deployed | + | starlingx-10.0.1 | False | deployed | + | starlingx-10.0.2 | True | deployed | + | starlingx-10.0.3 | False | deployed | + +--------------------------+-------+-----------+ + +To remove patch releases starlingx-10.0.3 and starlingx-10.0.2, follow the +:ref:`orchestrated-deployment-host-software-deployment-d234754c7d20` procedure, +and deploy (or go back to) the starlingx-10.0.1 software release in step :ref:`<1 `. + +.. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy create [--controller-apply-type {serial,ignore}] + [--storage-apply-type {serial,parallel,ignore}] + [--worker-apply-type {serial,parallel,ignore}] + [--max-parallel-worker-hosts {2,3,4,5,6,7,8,9,10}] + [--instance-action {stop-start,migrate}] + [--alarm-restrictions {strict,relaxed}] + starlingx-10.0.1 + + strategy-uuid: 5435e049-7002-4403-acfb-7886f6da14af + release-id: starlingx-10.0.1 + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: build + current-phase-completion: 0% + state: building + inprogress: true + +.. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: 6282f049-bb9e-46f0-9ca8-97bf626884e0 + release-id: starlingx-10.0.1 + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: build + current-phase-completion: 100% + state: ready-to-apply + build-result: success + build-reason: + +.. note:: + + If parallel apply for storage is used, it will be automatically replaced + with the serial apply for ``--storage-apply-type``. + +Apply the software orchestration deployment strategy. + +.. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy apply + Strategy Software Deploy Strategy: + strategy-uuid: 92e69661-2ef2-4d7d-baf2-272b8886d95a + release-id: starlingx-10.0.1 + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: relaxed + current-phase: apply + current-phase-completion: 0% + state: applying + inprogress: true + +On completion of the +:ref:`orchestrated-deployment-host-software-deployment-d234754c7d20` procedure, +run the following command: + +.. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-----------+ + | Release | RR | State | + +--------------------------+-------+-----------+ + | starlingx-10.0.0 | True | deployed | + | starlingx-10.0.1 | False | deployed | + | starlingx-10.0.2 | True | available | + | starlingx-10.0.3 | False | available | + +--------------------------+-------+-----------+ + +If the removed patches are no longer required on the system, you can delete them. + +.. code-block:: + + ~(keystone_admin)]$ software delete starlingx-10.0.3 + starlingx-10.0.3 has been deleted. + +.. code-block:: + + ~(keystone_admin)]$ software delete starlingx-10.0.2 + starlingx-10.0.2 has been deleted. + +.. code-block:: + + ~(keystone_admin)]$ software list + +--------------------------+-------+-----------+ + | Release | RR | State | + +--------------------------+-------+-----------+ + | starlingx-10.0.0 | True | deployed | + | starlingx-10.0.1 | False | deployed | + +--------------------------+-------+-----------+ + diff --git a/doc/source/updates/kubernetes/orchestrated-rollback-host-software-deployment-c6b12f13a8a1.rst b/doc/source/updates/kubernetes/orchestrated-rollback-host-software-deployment-c6b12f13a8a1.rst new file mode 100644 index 000000000..0e1a2d4db --- /dev/null +++ b/doc/source/updates/kubernetes/orchestrated-rollback-host-software-deployment-c6b12f13a8a1.rst @@ -0,0 +1,255 @@ +.. WARNING: Add no lines of text between the label immediately following +.. and the title. + +.. _orchestrated-rollback-host-software-deployment-c6b12f13a8a1: + +============================================== +Orchestrated Rollback Host Software Deployment +============================================== + +For an orchestrated software deployment, the +:ref:`orchestrated-deployment-host-software-deployment-d234754c7d20` procedure +can be aborted and rolled back as long as the postrequisite :command:`software +deploy delete` has not been done either manually or via orchestrated upgrade. +After the postrequisite :command:`software deploy delete` step, an orchestrated +abort and rollback of the deployment is not possible. + +.. note:: + + This section also covers the orchestrated abort and rollback of an updated + release deployment. Deployment cannot be rolled back after the activation + stage. Orchestrated rollback is currently supported only for |AIO-SX|. + +.. rubric:: |prereq| + +You have done either of the following: + +- Aborted an :ref:`orchestrated-deployment-host-software-deployment-d234754c7d20` procedure. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy abort + Strategy Software Deploy Strategy: + strategy-uuid: 63f48dfc-f833-479b-b597-d11f9219baf5 + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 7% + state: aborting + inprogress: true + + and waited for the orchestration to finish its current stage/step and then + abort. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: 63f48dfc-f833-479b-b597-d11f9219baf5 + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: abort + current-phase-completion: 100% + state: aborted + apply-result: aborted + apply-reason: + abort-result: success + abort-reason: + +- Or completed an :ref:`orchestrated-deployment-host-software-deployment-d234754c7d20` procedure: + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: 6282f049-bb9e-46f0-9ca8-97bf626884e0 + release-id: + controller-apply-type: serial + torage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 100% + state: applied + apply-result: success + apply-reason: + +But, you have not executed the postrequisite :command:`software deploy delete` step. + +.. rubric:: |proc| + +#. Delete any old software deployment strategy. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy delete + Strategy deleted + +#. Create a software deployment rollback orchestration strategy with desired + policies to abort and rollback the current software deployment. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy create [--controller-apply-type {serial,ignore}] + [--storage-apply-type {serial,parallel,ignore}] + [--worker-apply-type {serial,parallel,ignore}] + [--max-parallel-worker-hosts {2,3,4,5,6,7,8,9,10}] + [--instance-action {stop-start,migrate}] + [--alarm-restrictions {strict,relaxed}] + --rollback + + strategy-uuid: 5435e049-7002-4403-acfb-7886f6da14af + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: migrate + alarm-restrictions: strict + current-phase: build + current-phase-completion: 0% + state: building + inprogress: true + + where, + + ``--rollback`` + Specifies that this is a software deployment rollback orchestration + strategy for the current software deployment. + + ``[--controller-apply-type {serial,ignore}]`` + (Optional) Specifies whether software should be deployed to the + controller hosts in serial or ignored. The default is serial. + ``ignore`` should only be used when re-creating and applying a strategy + after an abort or failure. + + ``[--storage-apply-type {serial,parallel,ignore}]`` + (Optional) Specifies whether software should be deployed to the storage + hosts in serial, in parallel, or ignored. The default is serial. + Software is deployed to the storage hosts in parallel by software + deploying a storage host from each storage redundancy group at the same + time. ``ignore`` should only be used when recreating and applying a + strategy after an abort or failure. + + .. note:: + + If parallel apply for storage is used, it will be automatically + replaced with the serial apply for ``--storage-apply-type``. + + ``[--worker-apply-type {serial,parallel,ignore}]`` + (Optional) Specifies whether software should be deployed to the worker + hosts in serial, in parallel, or ignored. The default is serial. The + number of worker hosts that are software deployed in parallel is + specified by ``[--max-parallel-worker-hosts {2,3,4,5,6,7,8,9,10}]``. + The default is 2. ``ignore`` should only be used when recreating and + applying a strategy after an abort or failure. + + ``[--max-parallel-worker-hosts {2,3,4,5,6,7,8,9,10}]`` + Specifies the number of worker hosts that are software deployed in + parallel specified by ``[--max-parallel-worker-hosts + {2,3,4,5,6,7,8,9,10}]``. The default is 2. + + ``[--instance-action {stop-start,migrate}]`` + This option only applies to OpenStack |VM| hosted guests. It specifies + the action done to hosted OpenStack |VMs| on a worker host (|AIO| + controller) prior to deploying the new software to the host. The default + is ``stop-start``. + + - ``stop-start`` + Before deploying the software release to the host, all hosted + OpenStack |VMs| are stopped or shutdown. + + After deploying the software release to the host, all hosted + OpenStack |VMs| are restarted. + + - ``migrate`` + Before deploying the software release to the host, all hosted + OpenStack |VMs| are migrated to another host that is capable of hosting + the hosted OpenStack |VM| and that is not part of the current stage. + + Hosts whose software is already updated are preferred over + hosts whose software is not yet updated. Live migration is + attempted first, if not available for the OpenStack |VM|, then cold + migration is performed. + + ``[--alarm-restrictions {strict,relaxed}]`` + This option lets you determine how to handle alarm restrictions based on + the management affecting statuses of any existing alarms, which takes + into account the alarm type as well as the alarm's current severity. + + The default is strict. + + If set to relaxed, orchestration will be allowed to proceed if there are + no management affecting alarms present. + + Performing management actions without specifically relaxing the alarm + checks will still fail if there are any alarms present in the system + (except for a small list of basic alarms for the orchestration actions + such as an upgrade operation in progress alarm not impeding upgrade + orchestration). + + You can use the CLI command :command:`fm alarm-list --mgmt_affecting` to + view the alarms that are management affecting. + + - ``Strict`` + + Maintains alarm restrictions. + + - ``Relaxed`` + + Relaxes the usual alarm restrictions and allows the action to proceed + if there are no alarms present in the system with a severity equal to + or greater than its management affecting severity. + +#. Apply the software deployment rollback orchestration: + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy apply + Strategy Software Deploy Strategy: + strategy-uuid: 52873771-fc1a-48cd-b322-ab921d34d01c + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 0% + state: applying + inprogress: true + +#. Wait for all the steps of all stages of the software deployment rollback + orchestration strategy to complete. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy show + Strategy Software Deploy Strategy: + strategy-uuid: 6282f049-bb9e-46f0-9ca8-97bf626884e0 + release-id: + controller-apply-type: serial + storage-apply-type: serial + worker-apply-type: serial + default-instance-action: stop-start + alarm-restrictions: strict + current-phase: apply + current-phase-completion: 100% + state: done + apply-result: success + apply-reason: + +#. Delete the completed software deployment rollback orchestration strategy. + + .. code-block:: + + ~(keystone_admin)]$ sw-manager sw-deploy-strategy delete + Strategy deleted. diff --git a/doc/source/updates/kubernetes/orchestration-upgrade-overview.rst b/doc/source/updates/kubernetes/orchestration-upgrade-overview.rst deleted file mode 100644 index 473fa708d..000000000 --- a/doc/source/updates/kubernetes/orchestration-upgrade-overview.rst +++ /dev/null @@ -1,139 +0,0 @@ - -.. bla1593031188931 -.. _orchestration-upgrade-overview: - -============================== -Upgrade Orchestration Overview -============================== - -Upgrade Orchestration automates much of the upgrade procedure, leaving a few -manual steps for operator oversight. - -.. contents:: |minitoc| - :local: - :depth: 1 - -.. note:: - Upgrading of |prod-dc| is distinct from upgrading other |prod| - configurations. - -.. xbooklink For information on updating |prod-dc|, see |distcloud-doc|: - :ref:`Upgrade Management `. - -.. note:: - - The upgrade orchestration commands are prefixed with :command:`sw-manager`. - To use upgrade orchestration commands, you need administrator privileges. - You must log in to the active controller as user **sysadmin** and source the - ``/etc/platform/openrc`` script to obtain administrator privileges. Do not use - :command:`sudo`. - -.. code-block:: none - - ~(keystone_admin)]$ sw-manager upgrade-strategy --help - usage: sw-manager upgrade-strategy [-h] ... - - optional arguments: - -h, --help show this help message and exit - - Software Upgrade Commands: - - create Create a strategy - delete Delete a strategy - apply Apply a strategy - abort Abort a strategy - show Show a strategy - -.. _orchestration-upgrade-overview-section-N10029-N10026-N10001: - ----------------------------------- -Upgrade Orchestration Requirements ----------------------------------- - -Upgrade orchestration can only be done on a system that meets the following -conditions: - -.. _orchestration-upgrade-overview-ul-blp-gcx-ry: - -- The system is clear of alarms (with the exception of the alarm upgrade in - progress). - -- All hosts must be unlocked, enabled, and available. - -- The system is fully redundant (two controller nodes available, at least - one complete storage replication group available for systems with Ceph - backend). - -- An upgrade has been started, and controller-1 has been upgraded and is - active. - -- No orchestration strategy exists. Patch, upgrade, firmware, kubernetes, - and kube rootca are all types of orchestration. An upgrade cannot be - orchestrated while another orchestration is in progress. - -- Sufficient free capacity or unused worker resources must be available - across the cluster. A rough calculation is: - - ``Required spare capacity ( %) = ( / ) * 100`` - -.. include:: /_includes/manual-image-cleanup-note-b43d78f2ba4a.rest - -.. _orchestration-upgrade-overview-section-N10081-N10026-N10001: - ---------------------------------- -The Upgrade Orchestration Process ---------------------------------- - -Upgrade orchestration can be initiated after the initial controller host has -been manual upgraded and returned to a stability state. Upgrade orchestration -automatically iterates through the remaining hosts, installing the new software -load on each one: first the other controller host, then the storage hosts, and -finally the worker hosts. During worker host upgrades, pods are automatically -moved to alternate worker hosts. - -You first create an upgrade orchestration strategy, or plan, for the automated -upgrade procedure. This customizes the upgrade orchestration, using parameters -to specify: - -.. _orchestration-upgrade-overview-ul-eyw-fyr-31b: - -- the host types to be upgraded - -- whether to upgrade hosts serially or in parallel - -Based on these parameters, and the state of the hosts, upgrade orchestration -creates a number of stages for the overall upgrade strategy. Each stage -generally consists of moving pods, locking hosts, installing upgrades, and -unlocking hosts for a subset of the hosts on the system. - -After creating the upgrade orchestration strategy, the you can either apply the -entire strategy automatically, or apply individual stages to control and monitor -their progress manually. - -Update and upgrade orchestration are mutually exclusive; they perform -conflicting operations. Only a single strategy (sw-patch or sw-upgrade) is -allowed to exist at a time. If you need to update during an upgrade, you can -abort/delete the sw-upgrade strategy, and then create and apply a sw-patch -strategy before going back to the upgrade. - -Some stages of the upgrade could take a significant amount of time (hours). -For example, after upgrading a storage host, re-syncing the OSD data could take -30 minutes per TB (assuming 500MB/s sync rate, which is about half of a 10G -infrastructure link). - -.. _orchestration-upgrade-overview-section-N10101-N10026-N10001: - ------------------------------- -Upgrade Orchestration Workflow ------------------------------- - -The Upgrade Orchestration procedure has several major parts: - -.. _orchestration-upgrade-overview-ul-r1k-wzj-wy: - -- Manually upgrade controller-1. - -- Orchestrate the automatic upgrade of the remaining controller, all the - storage nodes, and all the worker nodes. - -- Manually complete the upgrade. diff --git a/doc/source/updates/kubernetes/overview-of-upgrade-abort-procedure.rst b/doc/source/updates/kubernetes/overview-of-upgrade-abort-procedure.rst deleted file mode 100644 index f700fbd27..000000000 --- a/doc/source/updates/kubernetes/overview-of-upgrade-abort-procedure.rst +++ /dev/null @@ -1,29 +0,0 @@ - -.. yim1593277634652 -.. _overview-of-upgrade-abort-procedure: - -=================================== -Overview of Upgrade Abort Procedure -=================================== - -You can abort an upgrade procedure if necessary. - -There are two cases for aborting an upgrade: - - -.. _overview-of-upgrade-abort-procedure-ul-q5f-vmz-bx: - -- Before controller-0 has been upgraded (that is, only controller-1 has been - upgraded): In this case the upgrade can be aborted and the system will - remain in service during the abort. - -- After controller-0 has been upgraded (that is, both controllers have been - upgraded): In this case the upgrade can only be aborted with a complete - outage and a re-install of all hosts. This would only be done as a last - resort, if there was absolutely no other way to recover the system. - -- :ref:`Rolling Back a Software Upgrade Before the Second Controller Upgrade - ` - -- :ref:`Rolling Back a Software Upgrade After the Second Controller Upgrade - ` diff --git a/doc/source/updates/kubernetes/performing-an-orchestrated-upgrade-using-the-cli.rst b/doc/source/updates/kubernetes/performing-an-orchestrated-upgrade-using-the-cli.rst deleted file mode 100644 index 00bc02e8e..000000000 --- a/doc/source/updates/kubernetes/performing-an-orchestrated-upgrade-using-the-cli.rst +++ /dev/null @@ -1,340 +0,0 @@ - -.. kad1593196868935 -.. _performing-an-orchestrated-upgrade-using-the-cli: - -============================================= -Perform an Orchestrated Upgrade Using the CLI -============================================= - -The upgrade orchestration CLI is :command:`sw-manager`. - -.. rubric:: |context| - -.. note:: - To use upgrade orchestration commands, you need administrator privileges. - You must log in to the active controller as user **sysadmin** and source the - ``/etc/platform/openrc`` script to obtain administrator privileges. Do not use - :command:`sudo`. - -The upgrade strategy options are shown in the following output: - -.. code-block:: none - - ~(keystone_admin)]$ sw-manager upgrade-strategy --help - usage: sw-manager upgrade-strategy [-h] ... - - optional arguments: - -h, --help show this help message and exit - - Software Upgrade Commands: - - create Create a strategy - delete Delete a strategy - apply Apply a strategy - abort Abort a strategy - show Show a strategy - -You can perform a partially orchestrated upgrade using the |CLI|. Upgrade -orchestration of other |prod| nodes can be initiated after the initial -controller host has been manually upgraded and returned to a stability state. - -.. note:: - Management-affecting alarms cannot be ignored at the indicated severity - level or higher by using relaxed alarm rules during an orchestrated upgrade - operation. For a list of management-affecting alarms, see |fault-doc|: - :ref:`alarm-messages-overview-19c242d3d151`. To display - management-affecting active alarms, use the following command: - - .. code-block:: none - - ~(keystone_admin)]$ fm alarm-list --mgmt_affecting - - During an orchestrated upgrade, the following alarms are ignored even when - strict restrictions are selected: - - - 900.005, Upgrade in progress - - - 900.201, Software upgrade auto apply in progress - -.. _performing-an-orchestrated-upgrade-using-the-cli-ul-qhy-q1p-v1b: - -.. rubric:: |prereq| - -See :ref:`Upgrading All-in-One Duplex / Standard -`, and perform Steps 1-8, to manually -upgrade the initial controller node before doing the upgrade orchestration -described below to upgrade the remaining nodes of the |prod|. - -- The subclouds must use the Redfish platform management service if it is an - All-in-one Simplex subcloud. - -- Duplex (AIODX/Standard) upgrades are supported, and they do not require - remote install via Redfish. - -.. rubric:: |proc| - -.. _performing-an-orchestrated-upgrade-using-the-cli-steps-e45-kh5-sy: - -#. Create a update strategy using the :command:`sw-manager upgrade-strategy create` - command. - - .. code-block:: none - - ~(keystone_admin)]$ sw-manager upgrade-strategy create - - strategy-uuid: 5435e049-7002-4403-acfb-7886f6da14af - controller-apply-type: serial - storage-apply-type: serial - worker-apply-type: serial - default-instance-action: migrate - alarm-restrictions: strict - current-phase: build - current-phase-completion: 0% - state: building - inprogress: true - - Create an upgrade strategy, specifying the following parameters: - - - storage-apply-type: - - - ``serial`` (default): storage hosts will be upgraded one at a time - - - ``parallel``: storage hosts will be upgraded in parallel, ensuring that - only one storage node in each replication group is patched at a - time. - - - ``ignore``: storage hosts will not be upgraded - - - worker-apply-type: - - ``serial`` (default) - Worker hosts will be upgraded one at a time. - - ``ignore`` - Worker hosts will not be upgraded. - - - Alarm Restrictions - - This option lets you determine how to handle alarm restrictions based - on the management affecting statuses of any existing alarms, which - takes into account the alarm type as well as the alarm's current - severity. If set to relaxed, orchestration will be allowed to proceed - if there are no management affecting alarms present. - - Performing management actions without specifically relaxing the alarm - checks will still fail if there are any alarms present in the system - (except for a small list of basic alarms for the orchestration actions - such as an upgrade operation in progress alarm not impeding upgrade - orchestration). - - You can use the CLI command :command:`fm alarm-list --mgmt_affecting` - to view the alarms that are management affecting. - - **Strict** - Maintains alarm restrictions. - - **Relaxed** - Relaxes the usual alarm restrictions and allows the action to - proceed if there are no alarms present in the system with a severity - equal to or greater than its management affecting severity. - - The upgrade strategy consists of one or more stages, which consist of one - or more hosts to be upgraded at the same time. Each stage will be split - into steps (for example, query-alarms, lock-hosts, upgrade-hosts). - Following are some notes about stages: - - - Controller-0 is upgraded first, followed by storage hosts and then - worker hosts. - - - Worker hosts with no instances are upgraded before worker hosts with - application pods. - - - Pods will be relocated off each worker host before it is upgraded. - - - The final step in each stage is one of: - - **system-stabilize** - This waits for a period of time (up to several minutes) and - ensures that the system is free of alarms. This ensures that we do - not continue to upgrade more hosts if the upgrade has caused an - issue resulting in an alarm. - - **wait-data-sync** - This waits for a period of time (up to many hours) and ensures - that data synchronization has completed after the upgrade of a - controller or storage node. - - Examine the upgrade strategy. Pay careful attention to: - - - The sets of hosts that will be upgraded together in each stage. - - - The sets of pods that will be impacted in each stage. - - .. note:: - It is likely that as each stage is applied, pods will be relocated - to worker hosts that have not yet been upgraded. That means that - later stages will be relocating more pods than those originally - listed in the upgrade strategy. The upgrade strategy is NOT - updated, but any additional pods on each worker host will be - relocated before it is upgraded. - -#. Run :command:`sw-manager upgrade-strategy show` command, to display the - current-phase-completion percentage progress indicator in various - increments. Once at 100%, it returns: - - .. code-block:: none - - ~(keystone_admin)]$ sw-manager upgrade-strategy show - - strategy-uuid: 5435e049-7002-4403-acfb-7886f6da14af - controller-apply-type: serial - storage-apply-type: serial - worker-apply-type: serial - default-instance-action: migrate - alarm-restrictions: strict - current-phase: build - current-phase-completion: 100% - state: ready-to-apply - build-result: success - build-reason: - -#. Apply the upgrade strategy. You can optionally apply a single stage at a - time. - - .. code-block:: none - - ~(keystone_admin)]$ sw-manager upgrade-strategy apply - - strategy-uuid: 5435e049-7002-4403-acfb-7886f6da14af - controller-apply-type: serial - storage-apply-type: serial - worker-apply-type: serial - default-instance-action: migrate - alarm-restrictions: strict - current-phase: apply - current-phase-completion: 0% - state: applying - inprogress: true - - While an upgrade strategy is being applied, it can be aborted. This results - in: - - - The current step will be allowed to complete. - - - If necessary an abort phase will be created and applied, which will - attempt to unlock any hosts that were locked. - - After an upgrade strategy has been applied (or aborted) it must be - deleted before another upgrade strategy can be created. If an - upgrade strategy application fails, you must address the issue that caused - the failure, then delete/re-create the strategy before attempting to apply - it again. - -#. Run :command:`sw-manager upgrade-strategy show` command, to display the - current-phase-completion displaying the field goes from 0% to 100% in - various increments. Once at 100%, it returns: - - .. code-block:: none - - ~(keystone_admin)]$ sw-manager upgrade-strategy show - - strategy-uuid: b91d8332-9ece-4578-b4dd-e9cf87b73f18 - controller-apply-type: serial - storage-apply-type: serial - worker-apply-type: serial - default-instance-action: migrate - alarm-restrictions: strict - current-phase: apply - current-phase-completion: 100% - state: applied - apply-result: success - apply-reason: - -#. Activate the upgrade. - - During the running of the :command:`upgrade-activate` command, new - configurations are applied to the controller. 250.001 (**hostname - Configuration is out-of-date**) alarms are raised and are cleared as the - configuration is applied. The upgrade state goes from **activating** to - **activation-complete** once this is done. - - .. only:: partner - - .. include:: /_includes/performing-an-orchestrated-upgrade-using-the-cli.rest - :start-after: deploymentmanager-begin - :end-before: deploymentmanager-end - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-activate - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | activating | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - - The following states apply when this command is executed. - - **activation-requested** - State entered when :command:`system upgrade-activate` is executed. - - **activating** - State entered when we have started activating the upgrade by applying - new configurations to the controller and compute hosts. - - **activation-complete** - State entered when new configurations have been applied to all - controller and compute hosts. - -#. Check the status of the upgrade again to see it has reached - **activation-complete** - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-show - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | activation-complete | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - -#. Complete the upgrade. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-complete - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | completing | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - -#. Delete the imported load. - - .. code-block:: none - - ~(keystone_admin)]$ system load-list - +----+----------+------------------+ - | id | state | software_version | - +----+----------+------------------+ - | 1 | imported | nn.nn | - | 2 | active | nn.nn | - +----+----------+------------------+ - - ~(keystone_admin)]$ system load-delete 1 - Deleted load: load 1 - -.. only:: partner - - .. include:: /_includes/performing-an-orchestrated-upgrade-using-the-cli.rest - :start-after: Orchupgradecli-begin - :end-before: Orchupgradecli-end diff --git a/doc/source/updates/kubernetes/performing-an-orchestrated-upgrade.rst b/doc/source/updates/kubernetes/performing-an-orchestrated-upgrade.rst deleted file mode 100644 index 671eccec7..000000000 --- a/doc/source/updates/kubernetes/performing-an-orchestrated-upgrade.rst +++ /dev/null @@ -1,184 +0,0 @@ - -.. sab1593196680415 -.. _performing-an-orchestrated-upgrade: - -=============================== -Perform an Orchestrated Upgrade -=============================== - -You can perform a partially orchestrated Upgrade of a |prod| system using the -CLI and Horizon Web interface. Upgrade and stability of the initial controller -node must be done manually before using upgrade orchestration to orchestrate the -remaining nodes of the |prod|. - -.. rubric:: |context| - -.. note:: - Management-affecting alarms cannot be ignored at the indicated severity - level or higher by using relaxed alarm rules during an orchestrated upgrade - operation. For a list of management-affecting alarms, see |fault-doc|: - :ref:`alarm-messages-overview-19c242d3d151`. To display - management-affecting active alarms, use the following command: - - .. code-block:: none - - ~(keystone_admin)]$ fm alarm-list --mgmt_affecting - - During an orchestrated upgrade, the following alarms are ignored even when - strict restrictions are selected: - - - 750.006, Generic alarm for any platform-managed applications as they are auto-applied - - - 900.005, Upgrade in progress - - - 900.201, Software upgrade auto apply in progress - -.. _performing-an-orchestrated-upgrade-ul-qhy-q1p-v1b: - -.. rubric:: |prereq| - -See :ref:`Upgrading All-in-One Duplex / Standard -`, and perform Steps 1-8, to manually -upgrade the initial controller node before doing the upgrade orchestration -described below to upgrade the remaining nodes of the |prod| system. - -.. rubric:: |proc| - -.. _performing-an-orchestrated-upgrade-steps-e45-kh5-sy: - -#. Select **Platform** \> **Software Management**, then select the **Upgrade - Orchestration** tab. - -#. Click the **Create Strategy** button. - - The **Create Strategy** dialog appears. - -#. Create an upgrade strategy by specifying settings for the parameters in the - **Create Strategy** dialog box. - - Create an upgrade strategy, specifying the following parameters: - - - storage-apply-type: - - ``serial`` (default) - Storage hosts will be upgraded one at a time. - - ``parallel`` - Storage hosts will be upgraded in parallel, ensuring that only one - storage node in each replication group is upgraded at a time. - - ``ignore`` - Storage hosts will not be upgraded. - - - worker-apply-type: - - ``serial`` (default): - Worker hosts will be upgraded one at a time. - - ``parallel`` - Worker hosts will be upgraded in parallel, ensuring that: - - - At most max-parallel-worker-hosts (see below) worker hosts - will be upgraded at the same time. - - - At most half of the hosts in a host aggregate will be upgraded - at the same time. - - - Worker hosts with no application pods are upgraded before - worker hosts with application pods. - - ``ignore`` - Worker hosts will not be upgraded. - - ``max-parallel-worker-hosts`` - Specify the maximum worker hosts to upgrade in parallel (minimum: - 2, maximum: 10). - - .. note:: - For a maximum worker hosts condition in a Standard configuration - (50), the value shall be at the maximum 2, which represents the - minimum value. - - ``alarm-restrictions`` - This option lets you specify how upgrade orchestration behaves when - alarms are present. - - You can use the CLI command :command:`fm alarm-list - --mgmt_affecting` to view the alarms that are management affecting. - - ``Strict`` - The default strict option will result in upgrade orchestration - failing if there are any alarms present in the system (except - for a small list of alarms). - - ``Relaxed`` - This option allows orchestration to proceed if alarms are - present, as long as none of these alarms are management - affecting. - -#. Click **Create Strategy** to save the upgrade orchestration strategy. - - The upgrade strategy consists of one or more stages, which consist of one - or more hosts to be upgraded at the same time. Each stage will be split - into steps (for example, query-alarms, lock-hosts, upgrade-hosts). - Following are some notes about stages: - - - Controller-0 is upgraded first, followed by storage hosts and then - worker hosts. - - - Worker hosts with no application pods are upgraded before worker hosts - with application pods. - - - Pods will be moved off each worker host before it is upgraded. - - - The final step in each stage is one of: - - **system-stabilize** - This waits for a period of time (up to several minutes) and - ensures that the system is free of alarms. This ensures that we do - not continue to upgrade more hosts if the upgrade has caused an - issue resulting in an alarm. - - **wait-data-sync** - This waits for a period of time (up to many hours) and ensures - that data synchronization has completed after the upgrade of a - controller or storage node. - - Examine the upgrade strategy. Pay careful attention to: - - - The sets of hosts that will be upgraded together in each stage. - - - The sets of pods that will be impacted in each stage. - - .. note:: - It is likely that as each stage is applied, application pods will - be relocated to worker hosts that have not yet been upgraded. That - means that later stages will be migrating more pods than those - originally listed in the upgrade strategy. The upgrade strategy is - NOT updated, but any additional pods on each worker host will be - relocated before it is upgraded. - -#. Apply the upgrade strategy. You can optionally apply a single stage at a - time. - - While an upgrade strategy is being applied, it can be aborted. This results - in: - - - The current step will be allowed to complete. - - - If necessary an abort phase will be created and applied, which will - attempt to unlock any hosts that were locked. - - After an upgrade strategy has been applied (or aborted) it must be - deleted before another upgrade strategy can be created. If an - upgrade strategy application fails, you must address the issue that caused - the failure, then delete/re-create the strategy before attempting to apply - it again. - -For more information, see: :ref:`Perform an Orchestrated Upgrade Using the CLI ` - -.. only:: partner - - .. include:: /_includes/performing-an-orchestrated-upgrade.rest - :start-after: Orchupgrade-begin - :end-before: Orchupgrade-end diff --git a/doc/source/updates/kubernetes/populating-the-storage-area.rst b/doc/source/updates/kubernetes/populating-the-storage-area.rst deleted file mode 100644 index 0747e0354..000000000 --- a/doc/source/updates/kubernetes/populating-the-storage-area.rst +++ /dev/null @@ -1,82 +0,0 @@ - -.. fek1552920702618 -.. _populating-the-storage-area: - -========================= -Populate the Storage Area -========================= - -Software updates (patches) have to be uploaded to the |prod| storage area -before they can be applied. - -.. rubric:: |proc| - -#. Log in as **sysadmin** to the active controller. - -#. Upload the update file to the storage area. - - .. parsed-literal:: - - $ sudo sw-patch upload /home/sysadmin/patches/|pn|-CONTROLLER__PATCH_0001.patch - Cloud_Platform__CONTROLLER__PATCH_0001 is now available - - where in the update file name is the |prod| release number. - - This example uploads a single update to the storage area. You can specify - multiple update files on the same command separating their names with - spaces. - - Alternatively, you can upload all update files stored in a directory using - a single command, as illustrated in the following example: - - .. code-block:: none - - $ sudo sw-patch upload-dir /home/sysadmin/patches - - The update is now available in the storage area, but has not been applied - to the update repository or installed to the nodes in the cluster. - -#. Verify the status of the update. - - .. code-block:: none - - $ sudo sw-patch query - - The update state displays *Available*, indicating that it is included in the - storage area. Further details about the updates can be retrieved as - follows: - - .. code-block:: none - - $ sudo sw-patch show - - The :command:`sudo sw-patch query` command returns a list of patch IDs. - The :command:`sudo sw-patch show` command provides further detail about - the specified . - -#. Delete the update files from the root drive. - - After the updates are uploaded to the storage area, the original files are - no longer required. You must delete them to ensure there is enough disk - space to complete the installation. - - .. code-block:: none - - $ rm /home/sysadmin/patches/* - - .. caution:: - If the original files are not deleted before the updates are applied, - the installation may fail due to a full disk. - -.. rubric:: |postreq| - -When an update in the *Available* state is no longer required, you can delete -it using the following command: - -.. parsed-literal:: - - $ sudo sw-patch delete |pn|-|pvr|-PATCH_0001 - -The update to delete from the storage area is identified by the update -\(patch) ID reported by the :command:`sw-patch query` command. You can provide -multiple patch IDs to the delete command, separating their names by spaces. diff --git a/doc/source/updates/kubernetes/reclaiming-disk-space.rst b/doc/source/updates/kubernetes/reclaiming-disk-space.rst deleted file mode 100644 index 45a99975a..000000000 --- a/doc/source/updates/kubernetes/reclaiming-disk-space.rst +++ /dev/null @@ -1,95 +0,0 @@ - -.. ngk1552920570137 -.. _reclaiming-disk-space: - -================== -Reclaim Disk Space -================== - -You can free up and reclaim disk space taken by previous updates once a newer -version of an update has been committed to the system. - -.. rubric:: |proc| - -#. Run the :command:`query-dependencies` command to show a list of updates - that are required by the specified update (patch), including itself. - - .. code-block:: none - - sw-patch query-dependences [ --recursive ] - - The :command:`query-dependencies` command will show a list of updates that - are required by the specified update (including itself). The - ``--recursive`` option will crawl through those dependencies to return a - list of all the updates in the specified update's dependency tree. This - query is used by the :command:`commit` command in calculating the set of - updates to be committed. For example, - - .. parsed-literal:: - - controller-0:/home/sysadmin# sw-patch query-dependencies |pn|-|pvr|-PATCH_0004 - |pn|-|pvr|-PATCH_0002 - |pn|-|pvr|-PATCH_0003 - |pn|-|pvr|-PATCH_0004 - - controller-0:/home/sysadmin# sw-patch query-dependencies |pn|-|pvr|-PATCH_0004 --recursive - |pn|-|pvr|-PATCH_0001 - |pn|-|pvr|-PATCH_0002 - |pn|-|pvr|-PATCH_0003 - |pn|-|pvr|-PATCH_0004 - -#. Run the :command:`sw-patch commit` command. - - .. code-block:: none - - sw-patch commit [ --dry-run ] [ --all ] [ --release ] [ … ] - - The :command:`sw-patch commit` command allows you to specify a set of - updates to be committed. The commit set is calculated by querying the - dependencies of each specified update. - - The ``--all`` option, without the ``--release`` option, commits all updates - of the currently running release. When two releases are on the system use - the ``--release`` option to specify a particular release's updates if - committing all updates for the non-running release. The ``--dry-run`` - option shows the list of updates to be committed and how much disk space - will be freed up. This information is also shown without the ``--dry-run`` - option, before prompting to continue with the operation. An update can only - be committed once it has been fully applied to the system, and cannot be - removed after. - - Following are examples that show the command usage. - - The following command lists the status of all updates that are in an - *Applied* state. - - .. code-block:: none - - controller-0:/home/sysadmin# sw-patch query - - The following command commits the updates. - - .. parsed-literal:: - - controller-0:/home/sysadmin# sw-patch commit |pvr|-PATCH_0001 |pvr|-PATCH_0002 - The following patches will be committed: - |pvr|-PATCH_0001 - |pvr|-PATCH_0002 - - This commit operation would free 2186.31 MiB - - WARNING: Committing a patch is an irreversible operation. Committed patches - cannot be removed. - - Would you like to continue? [y/N]: y - The patches have been committed. - - The following command shows the updates now in the *Committed* state. - - .. parsed-literal:: - - controller-0:/home/sysadmin# sw-patch query - Patch ID RR Release Patch State - ================ ===== ======== ========= - |pvr|-PATCH_0001 N |pvr| Committed - |pvr|-PATCH_0002 Y |pvr| Committed diff --git a/doc/source/updates/kubernetes/removing-reboot-required-software-updates.rst b/doc/source/updates/kubernetes/removing-reboot-required-software-updates.rst deleted file mode 100644 index a42157f2b..000000000 --- a/doc/source/updates/kubernetes/removing-reboot-required-software-updates.rst +++ /dev/null @@ -1,117 +0,0 @@ - -.. scm1552920603294 -.. _removing-reboot-required-software-updates: - -======================================= -Remove Reboot-Required Software Updates -======================================= - -Updates in the *Applied* or *Partial-Apply* states can be removed if necessary, -for example, when they trigger undesired or unplanned effects on the cluster. - -.. rubric:: |context| - -Rolling back updates is conceptually identical to installing updates. A -roll-back operation can be commanded for an update in either the *Applied* or -the *Partial-Apply* states. As the update is removed, it goes through the -following state transitions: - -**Applied or Partial-Apply to Partial-Remove** - An update in the *Partial-Remove* state indicates that it has been removed - from zero or more, but not from all, the applicable hosts. - - Use the command :command:`sw-patch remove` to trigger this transition. - -**Partial-Remove to Available** - Use the command :command:`sudo sw-patch host-install-async ` - repeatedly targeting each one of the applicable hosts in the cluster. The - transition to the *Available* state is complete when the update is removed - from all target hosts. The update remains in the update storage area as if - it had just been uploaded. - - .. note:: - The command :command:`sudo sw-patch host-install-async ` both - installs and removes updates as necessary. - -The following example describes removing an update that applies only to the -controllers. Update removal can be done using the Horizon Web interface as -discussed in :ref:`Install Reboot-Required Software Updates Using Horizon -`. - -.. rubric:: |proc| - -#. Log in as Keystone user **admin** to the active controller. - -#. Verify the state of the update. - - .. parsed-literal:: - - ~(keystone_admin)]$ sudo sw-patch query - Patch ID Patch State - ========================= =========== - |pn|-|pvr|-PATCH_0001 Applied - - In this example the update is listed in the *Applied* state, but it could - also be in the *Partial-Apply* state. - -#. Remove the update. - - .. parsed-literal:: - - ~(keystone_admin)]$ sudo sw-patch remove |pn|-|pvr|-PATCH_0001 - |pn|-|pvr|-PATCH_0001 has been removed from the repo - - The update is now in the *Partial-Remove* state, ready to be removed from - the impacted hosts where it was currently installed. - -#. Query the updating status of all hosts in the cluster. - - .. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch query-hosts - - Hostname IP Address Patch Current Reboot Required Release State - ============ =============== ============= =============== ======= ===== - worker-0 192.168.204.179 Yes No nn.nn idle - worker-1 192.168.204.173 Yes No nn.nn idle - controller-0 192.168.204.3 No No nn.nn idle - controller-1 192.168.204.4 No No nn.nn idle - storage-0 192.168.204.213 Yes No nn.nn idle - storage-1 192.168.204.181 Yes No nn.nn idle - - - In this example, the controllers have updates ready to be removed, and - therefore must be rebooted. - -#. Remove all pending-for-removal updates from controller-0. - - #. Swact controller services away from controller-0. - - #. Lock controller-0. - - #. Run the updating (patching) sequence. - - .. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch host-install-async controller-0 - - #. Unlock controller-0. - -#. Remove all pending-for-removal updates from controller-1. - - #. Swact controller services away from controller-1. - - #. Lock controller-1. - - #. Run the updating sequence. - - #. Unlock controller-1. - - .. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch host-install-async controller-1 - -.. rubric:: |result| - -The cluster is up to date now. All updates have been removed, and the update -|pn|-|pvr|-PATCH_0001 can be deleted from the storage area if necessary. diff --git a/doc/source/updates/kubernetes/rolling-back-a-software-upgrade-after-the-second-controller-upgrade.rst b/doc/source/updates/kubernetes/rolling-back-a-software-upgrade-after-the-second-controller-upgrade.rst deleted file mode 100644 index c0d9cb514..000000000 --- a/doc/source/updates/kubernetes/rolling-back-a-software-upgrade-after-the-second-controller-upgrade.rst +++ /dev/null @@ -1,160 +0,0 @@ - -.. eiu1593277809293 -.. _rolling-back-a-software-upgrade-after-the-second-controller-upgrade: - -================================================================ -Roll Back a Software Upgrade After the Second Controller Upgrade -================================================================ - -After the second controller is upgraded, you can still roll back a software -upgrade, however, the rollback will impact the hosting of applications. - -The upgrade abort procedure can only be applied before the -:command:`upgrade-complete` command is issued. Once this command is issued -the upgrade cannot be aborted. If you must revert to the previous release, -then restore the system using the backup data taken prior to the upgrade. - -In some scenarios additional actions will be required to complete the upgrade -abort. It may be necessary to restore the system from a backup. - -.. rubric:: |proc| - -#. Run the :command:`upgrade-abort` command to abort the upgrade (running from - controller-1). - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-abort - - Once this is done there is no going back; the upgrade must be completely - aborted. - - The following state applies when you run this command. - - - aborting-reinstall: - - - State entered when :command:`system upgrade-abort` is executed - after upgrading controller-0. - - - Remain in this state until the abort is completed. - -#. Lock controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock controller-0 - -#. Lock all storage and worker nodes that don't have ceph-mon configured - (ceph-mon usually on worker-0 or storage-0). Execute the - :command:`system ceph-mon-list` command to determine which hosts are running - ceph-mon. - - .. code-block:: none - - ~(keystone_admin)]$ system ceph-mon-list - - .. note:: - - Skip this step if doing this procedure on a |prod| Duplex - system. - -#. Use wipedisk on all worker and storage nodes, except on storage-0 - or on the worker node that has ceph-mon configured (worker-0 usually). - - .. note:: - Skip this step if doing this procedure on a |prod| Duplex system. - - #. Execute :command:`wipedisk` from the shell on each storage or worker - host. - - #. Power down each host. - -#. Power off all storage and worker nodes except the node with ceph-mon. - - .. note:: - Skip this step if doing this procedure on a |prod| Duplex system. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock - -#. Downgrade controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-downgrade controller-0 - - The host is re-installed with the previous release load. - -#. Unlock controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock controller-0 - - .. note:: - Wait for controller-0 to become unlocked-enabled. Wait for the - |DRBD| sync 400.001 Services-related alarm to be raised and then cleared. - -#. Swact to controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-swact controller-1 - - Swacting back to controller-0 will switch back to using the previous - release databases, which were frozen at the time of the swact to - controller-1. This is essentially the same result as a system restore. - -#. Lock controller-1. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock controller-1 - - The host is re-installed with the previous release load. - -#. Downgrade controller-1. - - .. code-block:: none - - ~(keystone_admin)]$ system host-downgrade controller-1 - -#. Unlock controller-1. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock controller-1 - -#. Run wipedisk on the worker node that was online (or the storage-0 node) and - power off the host. - - .. note:: - Skip this step if doing this procedure on a |prod| Duplex system. - -#. Power up and unlock storage, then worker hosts one at a time. - - .. note:: - Skip this step if doing this procedure on a |prod| Duplex system. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock - - The hosts are re-installed with the previous release load. As each worker - host goes online, application pods will be automatically recovered by the - system. - -#. Complete the upgrade. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-abort-complete - - This cleans up the upgrade release, configuration, databases, and so forth. - -#. Delete the upgrade release load. - - .. code-block:: none - - ~(keystone_admin)]$ system load-delete diff --git a/doc/source/updates/kubernetes/rolling-back-a-software-upgrade-before-the-second-controller-upgrade.rst b/doc/source/updates/kubernetes/rolling-back-a-software-upgrade-before-the-second-controller-upgrade.rst deleted file mode 100644 index 8d2495072..000000000 --- a/doc/source/updates/kubernetes/rolling-back-a-software-upgrade-before-the-second-controller-upgrade.rst +++ /dev/null @@ -1,78 +0,0 @@ - -.. wyr1593277734184 -.. _rolling-back-a-software-upgrade-before-the-second-controller-upgrade: - -================================================================= -Roll Back a Software Upgrade Before the Second Controller Upgrade -================================================================= - -After the first controller is upgraded, you can still perform an in-service -abort of an upgrade before the second Controller (controller-0 in the examples -of this procedure) has been upgraded. The :command:`system upgrade-abort` -command can be run from the node that is updated with the latest release and -has upgraded successfully. - -.. rubric:: |proc| - -#. Abort the upgrade with the :command:`upgrade-abort` command. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-abort - - The upgrade state is set to aborting. Once this is executed, there is no - canceling; the upgrade must be completely aborted. - - The following states apply when you execute this command. - - - aborting: - - - State entered when :command:`system upgrade-abort` is executed - before upgrading controller-0. - - - Remain in this state until the abort is completed. - -#. Make controller-0 active. - - .. code-block:: none - - ~(keystone_admin)]$ system host-swact controller-1 - - If controller-1 was active with the new upgrade release, swacting back to - controller-0 will switch back to using the previous release databases, - which were frozen at the time of the swact to controller-1. Any changes to - the system that were made while controller-1 was active will be lost. - -#. Lock and downgrade controller-1. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock controller-1 - ~(keystone_admin)]$ system host-downgrade controller-1 - - The host is re-installed with the previous release load. - - .. note:: - The downgrade process will take a minimum of 20 to 30 minutes to - complete. - - You can view the downgrade progress on controller-1 using the - serial console. - -#. Unlock controller-1. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock controller-1 - -#. Complete the upgrade. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-complete - -#. Delete the newer upgrade release that has been aborted. - - .. code-block:: none - - ~(keystone_admin)]$ system load-delete diff --git a/doc/source/updates/kubernetes/software-update-space-reclamation.rst b/doc/source/updates/kubernetes/software-update-space-reclamation.rst deleted file mode 100644 index 514138458..000000000 --- a/doc/source/updates/kubernetes/software-update-space-reclamation.rst +++ /dev/null @@ -1,18 +0,0 @@ - -.. qbz1552920585263 -.. _software-update-space-reclamation: - -================================= -Software Update Space Reclamation -================================= - -|prod-long| provides functionality for reclaiming disk space used by older -versions of software updates once newer versions have been committed. - -The :command:`sw-patch commit` command allows you to “commit” a set of software -updates, which effectively locks down those updates and makes them unremovable. -In doing so, |prod-long| is able to free up the disk space in patch storage and -the software repo. - -.. caution:: - This action is irreversible. diff --git a/doc/source/updates/kubernetes/software-updates-and-upgrades-software-updates.rst b/doc/source/updates/kubernetes/software-updates-and-upgrades-software-updates.rst index 0b9b7bd0e..d845cd1e6 100644 --- a/doc/source/updates/kubernetes/software-updates-and-upgrades-software-updates.rst +++ b/doc/source/updates/kubernetes/software-updates-and-upgrades-software-updates.rst @@ -27,9 +27,6 @@ following items: Software updates can be installed manually or by the Update Orchestrator, which automates a rolling install of an update across all of the |prod-long| hosts. -For more information on manual updates, see :ref:`Manage Software Updates -`. For more information on upgrade orchestration, -see :ref:`Orchestrated Software Update `. .. warning:: Do NOT use the |updates-doc| guide for |prod-dc| orchestrated @@ -42,8 +39,10 @@ see :ref:`Orchestrated Software Update `. |prod| handles multiple updates being applied and removed at once. Software updates can modify and update any area of |prod| software, including the kernel -itself. For information on populating, installing and removing software -updates, see :ref:`Manage Software Updates `. +itself. + +.. For information on populating, installing and removing software +.. updates, see :ref:`Manage Software Updates `. There are two different kinds of Software updates that you can use to update the |prod| software: @@ -59,8 +58,7 @@ the |prod| software: which automates a rolling install of an update across all of the |prod-long| hosts. - For information on populating, installing and removing software updates, - see :ref:`Manage Software Updates `. + .. For information on populating, installing and removing software updates, see :ref:`Manage Software Updates `. .. note:: A 10 GB internal management network is required for reboot-required diff --git a/doc/source/updates/kubernetes/software-upgrades.rst b/doc/source/updates/kubernetes/software-upgrades.rst index 09565260b..4e9a48df6 100644 --- a/doc/source/updates/kubernetes/software-upgrades.rst +++ b/doc/source/updates/kubernetes/software-upgrades.rst @@ -17,11 +17,11 @@ Software Upgrades hosts are upgraded one at time while continuing to provide its hosting services to its hosted applications. An upgrade can be performed manually or using Upgrade Orchestration, which automates much of the upgrade procedure, leaving a -few manual steps to prevent operator oversight. For more information on manual -upgrades, see :ref:`Manual Platform Components Upgrade -`. For more information on upgrade orchestration, see -:ref:`Orchestrated Platform Component Upgrade -`. +few manual steps to prevent operator oversight. + +.. For more information on manual upgrades, see :ref:`Manual Platform Components Upgrade `. + +.. For more information on upgrade orchestration, see :ref:`Orchestrated Platform Component Upgrade `. .. warning:: Do NOT use information in the |updates-doc| guide for |prod-dc| @@ -59,9 +59,9 @@ running in a 'compatibility' mode where all inter-node messages are using message formats from the old release of software. Prior to upgrading the second controller, you reach a "point-of-no-return for an in-service abort" of the upgrades process. The second controller is loaded with the new release of -software and becomes the new Standby controller. For more information on manual -upgrades, see :ref:`Manual Platform Components Upgrade -` . +software and becomes the new Standby controller. + +.. For more information on manual upgrades, see :ref:`Manual Platform Components Upgrade ` . If present, storage nodes are locked, upgraded and unlocked one at a time in order to respect the redundancy model of |prod| storage nodes. Storage nodes @@ -107,13 +107,13 @@ abort: - Before controller-0 has been upgraded (that is, only controller-1 has been upgraded): In this case the upgrade can be aborted and the system will - remain in service during the abort, see, :ref:`Rolling Back a Software - Upgrade Before the Second Controller Upgrade - `. + remain in service during the abort. + + .. See, :ref:`Rolling Back a Software Upgrade Before the Second Controller Upgrade `. - After controller-0 has been upgraded (that is, both controllers have been upgraded): In this case the upgrade can only be aborted with a complete outage and a reinstall of all hosts. This would only be done as a last - resort, if there was absolutely no other way to recover the system, see, - :ref:`Rolling Back a Software Upgrade After the Second Controller Upgrade - `. + resort, if there was absolutely no other way to recover the system. + + .. See, :ref:`Rolling Back a Software Upgrade After the Second Controller Upgrade `. diff --git a/doc/source/updates/kubernetes/update-orchestration-cli.rst b/doc/source/updates/kubernetes/update-orchestration-cli.rst deleted file mode 100644 index d18dcfa19..000000000 --- a/doc/source/updates/kubernetes/update-orchestration-cli.rst +++ /dev/null @@ -1,69 +0,0 @@ - -.. agv1552920520258 -.. _update-orchestration-cli: - -======================== -Update Orchestration CLI -======================== - -The update orchestration CLI is :command:`sw-manager`. Use this to create your -update strategy. - -The commands and options map directly to the parameter descriptions in the web -interface dialog, described in :ref:`Configuring Update Orchestration -`. - -.. note:: - To use update orchestration commands, you need administrator privileges. - You must log in to the active controller as user **sysadmin** and source - the ``/etc/platform/openrc`` script to obtain administrator privileges. Do not - use :command:`sudo`. - -.. note:: - Management-affecting alarms cannot be ignored at the indicated severity - level or higher by using relaxed alarm rules during an orchestrated update - operation. For a list of management-affecting alarms, see |fault-doc|: - :ref:`Alarm Messages <100-series-alarm-messages-starlingx>`. To display - management-affecting active alarms, use the following command: - - .. code-block:: none - - ~(keystone_admin)$ fm alarm-list --mgmt_affecting - - During an orchestrated update operation, the following alarms are ignored - even when strict restrictions are selected: - - - 200.001, Maintenance host lock alarm - - - 900.001, Patch in progress - - - 900.005, Upgrade in progress - - - 900.101, Software patch auto apply in progress - -.. _update-orchestration-cli-ul-qhy-q1p-v1b: - -Help is available for the overall command and also for each sub-command. For -example: - -.. code-block:: none - - ~(keystone_admin)]$ sw-manager patch-strategy --help - usage: sw-manager patch-strategy [-h] ... - - optional arguments: - -h, --help show this help message and exit - -Update orchestration commands include: - -.. _update-orchestration-cli-ul-cvv-gdd-nx: - -- :command:`create` - Create a strategy - -- :command:`delete` - Delete a strategy - -- :command:`apply` - Apply a strategy - -- :command:`abort` - Abort a strategy - -- :command:`show` - Show a strategy diff --git a/doc/source/updates/kubernetes/update-orchestration-overview.rst b/doc/source/updates/kubernetes/update-orchestration-overview.rst deleted file mode 100644 index 4f2793a91..000000000 --- a/doc/source/updates/kubernetes/update-orchestration-overview.rst +++ /dev/null @@ -1,95 +0,0 @@ - -.. kzb1552920557323 -.. _update-orchestration-overview: - -============================= -Update Orchestration Overview -============================= - -Update orchestration allows an entire |prod| system to be updated with a single -operation. - -.. contents:: |minitoc| - :local: - :depth: 1 - -You can configure and run update orchestration using the CLI, the Horizon Web -interface, or the ``stx-nfv`` REST API. - -.. note:: - Updating of |prod-dc| is distinct from updating of other |prod| - configurations. - -.. xbooklink For information on updating |prod-dc|, see |distcloud-doc|: - :ref:`Update Management for Distributed Cloud - `. - -.. _update-orchestration-overview-section-N10031-N10023-N10001: - ---------------------------------- -Update Orchestration Requirements ---------------------------------- - -Update orchestration can only be done on a system that meets the following -conditions: - -.. _update-orchestration-overview-ul-e1y-t4c-nx: - -- The system is clear of alarms (with the exception of alarms for locked - hosts, and update applications in progress). - - .. note:: - When configuring update orchestration, you have the option to ignore - alarms with a severity less than management-affecting severity. For - more information, see :ref:`Configuring Update Orchestration - `. - -- All hosts must be unlocked-enabled-available. - -- Two controller hosts must be available. - -- All storage hosts must be available. - -- When installing reboot required updates, there must be spare worker - capacity to move hosted application pods off the worker host\(s) being - updated such that hosted application services are not impacted. - -.. _update-orchestration-overview-section-N1009D-N10023-N10001: - --------------------------------- -The Update Orchestration Process --------------------------------- - -Update orchestration automatically iterates through all hosts on the system and -installs the applied updates to each host: first the controller hosts, then the -storage hosts, and finally the worker hosts. During the worker host updating, -hosted application pod re-locations are managed automatically. The controller -hosts are always updated serially. The storage hosts and worker hosts can be -configured to be updated in parallel in order to reduce the overall update -installation time. - -Update orchestration can install one or more applied updates at the same time. -It can also install reboot-required updates or in-service updates or both at -the same time. Update orchestration only locks and unlocks (that is, reboots) -a host to install an update if at least one reboot-required update has been -applied. - -You first create an update orchestration strategy, or plan, for the automated -updating procedure. This customizes the update orchestration, using parameters -to specify: - -.. _update-orchestration-overview-ul-eyw-fyr-31b: - -- the host types to be updated - -- whether to update hosts serially or in parallel - -Based on these parameters, and the state of the hosts, update orchestration -creates a number of stages for the overall update strategy. Each stage -generally consists of re-locating hosted application pods, locking hosts, -installing updates, and unlocking hosts for a subset of the hosts on the -system. - -After creating the update orchestration strategy, the user can either apply the -entire strategy automatically, or manually apply individual stages to control -and monitor the update progress. diff --git a/doc/source/updates/kubernetes/update-status-and-lifecycle.rst b/doc/source/updates/kubernetes/update-status-and-lifecycle.rst deleted file mode 100644 index d3561c20e..000000000 --- a/doc/source/updates/kubernetes/update-status-and-lifecycle.rst +++ /dev/null @@ -1,76 +0,0 @@ - -.. utq1552920689344 -.. _update-status-and-lifecycle: - -=========================== -Update Status and Lifecycle -=========================== - -|prod| software updates move through different status levels as the updates are -being applied. - -.. rubric:: |context| - -After adding an update (patch) to the storage area you must move it to the -repository, which manages distribution for the cluster. From there, you can -install the updates to the hosts that require them. - -Some of the available updates may be required on controller hosts only, while -others may be required on worker or storage hosts. Use :command:`sw-patch -query-hosts` to see which hosts are impacted by the newly applied (or -removed) updates. You can then use :command:`sw-patch host-install` to update -the software on individual hosts. - -To keep track of software update installation, you can use the -:command:`sw-patch query` command. - -.. parsed-literal:: - - ~(keystone_admin)]$ sudo sw-patch query - Patch ID Patch State - =========== ============ - |pvr|-._PATCH_0001 Applied - -where . in the update filename is the |prod| release number. - -This shows the 'Patch State' for each of the updates in the storage area: - -``Available`` - An update in the *Available* state has been added to the storage area, but - is not currently in the repository or installed on the hosts. - -``Partial-Apply`` - An update in the *Partial-Apply* state has been added to the software - updates repository using the :command:`sw-patch apply` command, but has not - been installed on all hosts that require it. It may have been installed on - some but not others, or it may not have been installed on any hosts. If any - reboot-required update is in a partial state (Partial-Apply or - Partial-Remove), you cannot update the software on any given host without - first locking it. If, for example, you had one reboot-required update and - one in-service update, both in a Partial-Apply state and both applicable to - node X, you cannot just install the non-reboot-required update to the - unlocked node X. - -``Applied`` - An update in the *Applied* state has been installed on all hosts that - require it. - -You can use the :command:`sw-patch query-hosts` command to see which hosts are -fully updated (Patch Current). This also shows which hosts require -reboot, either because they are not fully updated, or because they are fully -updated but not yet rebooted. - -.. code-block:: none - - ~(keystone_admin)]$ sudo sw-patch query-hosts - - Hostname IP Address Patch Current Reboot Required Release State - ============ ============== ============= =============== ======= ===== - worker-0 192.168.204.95 Yes No nn.nn idle - worker-1 192.168.204.63 Yes No nn.nn idle - worker-2 192.168.204.99 Yes No nn.nn idle - worker-3 192.168.204.49 Yes No nn.nn idle - controller-0 192.168.204.3 Yes No nn.nn idle - controller-1 192.168.204.4 Yes No nn.nn idle - storage-0 192.168.204.37 Yes No nn.nn idle - storage-1 192.168.204.90 Yes No nn.nn idle diff --git a/doc/source/updates/kubernetes/upgrading-all-in-one-duplex-or-standard.rst b/doc/source/updates/kubernetes/upgrading-all-in-one-duplex-or-standard.rst deleted file mode 100644 index 517ee21aa..000000000 --- a/doc/source/updates/kubernetes/upgrading-all-in-one-duplex-or-standard.rst +++ /dev/null @@ -1,566 +0,0 @@ - -.. btn1592861794542 -.. _upgrading-all-in-one-duplex-or-standard: - -==================================== -Upgrade All-in-One Duplex / Standard -==================================== - -You can upgrade the |prod| Duplex or Standard configurations with a new release -of |prod| software. - -.. rubric:: |prereq| - -.. _upgrading-all-in-one-duplex-or-standard-ul-ezb-b11-cx: - -- Validate the list of new images with the target release. If you are using a - private registry for installs/upgrades, you must populate your private - registry with the new images prior to bootstrap and/or patch application. - -- Perform a full backup to allow recovery. - - .. note:: - Back up files in the ``/home/sysadmin`` and ``/root`` directories prior - to doing an upgrade. Home directories are not preserved during backup or - restore operations, blade replacement, or upgrades. - -- The system must be 'patch current'. All updates available for the current - release running on the system must be applied, and all patches must be - committed. To find and download applicable updates, visit the |dnload-loc|. - -- Transfer the new release software load to controller-0 (or onto a USB - stick); controller-0 must be active. - - .. note:: - Make sure that the filesystem containing the ``/home/sysadmin`` - directory has enough space (at least 2GB of free space), - otherwise the upgrade may fail. If more space is needed, it is - recommended to delete the ``.iso bootimage`` recently imported after - running the :command:`load-import` command. - -- Transfer the new release software license file to controller-0, (or onto a - USB stick). - -- Transfer the new release software signature to controller-0 (or onto a USB - stick). - -- Unlock all hosts. - - - All nodes must be unlocked as the health check prevents the upgrade - cannot if there are locked nodes. - -.. include:: /_includes/manual-image-cleanup-note-b43d78f2ba4a.rest - -.. note:: - - The upgrade procedure includes steps to resolve system health issues. - -.. note:: - - Upgrading hosts should be completed within 24 hours to avoid a kubeadm token - timeout. - -.. rubric:: |proc| - -#. Ensure that controller-0 is the active controller. - -#. Install the license file for the release you are upgrading. - - .. code-block:: none - - ~(keystone_admin)]$ system license-install - - For example, - - .. code-block:: none - - ~(keystone_admin)]$ system license-install license.lic - -#. Import the new release. - - #. Run the :command:`load-import` command on controller-0 to import - the new release. - - Source ``/etc/platform/openrc``. Also, you can specify either the full - file path or relative paths to the ``*.iso`` bootimage file and to the - ``*.sig`` bootimage signature file. - - .. code-block:: none - - $ source /etc/platform/openrc - ~(keystone_admin)]$ system load-import [--local] /home/sysadmin/.iso .sig - - +--------------------+-----------+ - | Property | Value | - +--------------------+-----------+ - | id | 2 | - | state | importing | - | software_version | nn.nn | - | compatible_version | nn.nn | - | required_patches | | - +--------------------+-----------+ - - The :command:`load-import` must be done on controller-0. - - (Optional) If ``--local`` is specified, the ISO and sig files are - transferred directly from the active controller filesystem to the - load directory, otherwise, they are transferred via the API. - - .. note:: - - If ``--local`` is specified, the ISO and sig files are transferred - directly from the active controller filesystem to the load directory, - if it is not specified, the files are transferred via the API. - - .. note:: - This will take a few minutes to complete. - - #. Check to ensure the load was successfully imported. - - .. code-block:: none - - ~(keystone_admin)]$ system load-list - +----+----------+------------------+ - | id | state | software_version | - +----+----------+------------------+ - | 1 | active | nn.nn | - | 2 | imported | nn.nn | - +----+----------+------------------+ - -#. Apply any required software updates. - - The system must be 'patch current'. All software updates related to your - current |prod| software release must be uploaded, applied, and installed. - - All software updates to the new |prod| release only need to be uploaded - and applied. The install of these software updates will occur automatically - during the software upgrade procedure as the hosts are reset to load the - new release of software. - - To find and download applicable updates, visit the |dnload-loc|. - - For more information, see :ref:`Manage Software Updates - `. - -#. Confirm that the system is healthy. - - Check the current system health status, resolve any alarms and other issues - reported by the :command:`system health-query-upgrade` command, then - recheck the system health status to confirm that all **System Health** - fields are set to *OK*. For example: - - .. code-block:: none - - ~(keystone_admin)]$ system health-query-upgrade - - System Health: - All hosts are provisioned: [OK] - All hosts are unlocked/enabled: [OK] - All hosts have current configurations: [OK] - All hosts are patch current: [OK] - Ceph Storage Healthy: [OK] - No alarms: [OK] - All kubernetes nodes are ready: [OK] - All kubernetes control plane pods are ready: [OK] - All PodSecurityPolicies are removed: [OK] - Required patches are applied: [OK] - License valid for upgrade: [OK] - No instances running on controller-1: [OK] - All kubernetes applications are in a valid state: [OK] - Active controller is controller-0: [OK] - Disk space requirement: [OK] - Boot Device and Root file system Device: [OK] - - By default, the upgrade process cannot be run with active alarms present. - Use the command :command:`system upgrade-start --force` to force the upgrade - process to start and ignore non-management-affecting alarms. - - .. note:: - It is strongly recommended that you clear your system of any and all - alarms before doing an upgrade. While the :command:`--force` option is - available to run the upgrade, it is a best practice to clear any - alarms. - -#. Start the upgrade from controller-0. - - Make sure that controller-0 is the active controller, and you are logged - into controller-0 as **sysadmin** and your present working directory is - your home directory. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-start - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | starting | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - - - This will make a copy of the upgrade data onto a |DRBD| file system to be - used in the upgrade. Configuration changes are not allowed after this point - until the swact to controller-1 is completed. - - The following upgrade state applies once this command is executed: - - - ``started``: - - - State entered after :command:`system upgrade-start` completes. - - - Release . system data (for example, postgres databases) has - been exported to be used in the upgrade. - - - Configuration changes must not be made after this point, until the - upgrade is completed. - - As part of the upgrade, the upgrade process checks the health of the system - and validates that the system is ready for an upgrade. - - The upgrade process checks that no alarms are active before starting an - upgrade. - - .. note:: - - Use the command :command:`system upgrade-start --force` to force the - upgrade process to start and ignore non-management-affecting alarms. - This should **ONLY** be done if you ascertain that these alarms will - interfere with the upgrades process. - - On systems with Ceph storage, the process also checks that the Ceph cluster - is healthy. - -#. Upgrade controller-1. - - #. Lock controller-1. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock controller-1 - - #. Upgrade controller-1. - - Controller-1 installs the update and reboots, then performs data - migration. - - .. code-block:: none - - ~(keystone_admin)]$ system host-upgrade controller-1 - - Wait for controller-1 to reinstall with the load N+1 and becomes - **locked-disabled-online** state. - - The following data migration states apply when this command is - executed: - - - ``data-migration``: - - - State entered when :command:`system host-upgrade controller-1` - is executed. - - - System data is being migrated from release N to release N+1. - - .. note:: - The upgrade process will take a minimum of 20 to 30 minutes to - complete. - - You can view the upgrade progress on controller-1 using the - serial console. - - - ``data-migration-complete or upgrading-controllers``: - - - State entered when controller-1 upgrade is complete. - - - System data has been successfully migrated from release . - to the newer Version. - - - ``data-migration-failed``: - - - State entered if data migration on controller-1 fails. - - - Upgrade must be aborted. - - .. note:: - Review the ``/var/log/sysinv.log`` on the active controller for - more details on data migration failure. - - #. Check the upgrade state. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-show - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | e7c8f6bc-518c-46d4-ab81-7a59f8f8e64b | - | state | data-migration-complete | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - - If the :command:`upgrade-show` status indicates - *data-migration-failed*, then there is an issue with the data - migration. Check the issue before proceeding to the next step. - - #. Unlock controller-1. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock controller-1 - - Wait for controller-1 to enter the state *unlocked-enabled*. Wait for - the |DRBD| sync **400.001** Services-related alarm to be raised and then - cleared. - - The **upgrading-controllers** state applies when this command is executed. - This state is entered after controller-1 has been upgraded to release nn.nn - and data migration is successfully completed. - - If the controller transitions to **unlocked-disabled-failed**, check the - issue before proceeding to the next step. The alarms may indicate a - configuration error. Check the result of the configuration logs on - controller-1, (for example, Error logs in - controller1:``/var/log/puppet``). - -#. Set controller-1 as the active controller. Swact to controller-1. - - .. code-block:: none - - ~(keystone_admin)]$ system host-swact controller-0 - - Wait until services have become active on the new active controller-1 before - proceeding to the next step. The swact is complete when all services on - controller-1 are in the state ``enabled-active``. Use the command ``system - servicegroup-list`` to monitor progress. - -#. Upgrade controller-0. - - #. Lock controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock controller-0 - - #. Upgrade controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-upgrade controller-0 - - - #. Unlock controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock controller-0 - - Wait until the |DRBD| sync **400.001** Services-related alarm is raised - and then cleared before proceeding to the next step. - - - upgrading-hosts: - - - State entered when both controllers are running release nn.nn - software. - - .. note:: - |AIO-DX| or Controllers of Standard configurations can be - upgraded, using steps 1-9 above. - -#. Check the system health to ensure that there are no unexpected alarms. - - .. code-block:: none - - ~(keystone_admin)]$ fm alarm-list - - Clear all alarms unrelated to the upgrade process. - -#. If using Ceph a storage backend, upgrade the storage nodes one at a time. - - .. note:: - Proceed to step 13 if no storage/worker node is present. - - The storage node must be locked and all OSDs must be down in order to do - the upgrade. - - #. Lock storage-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock storage-0 - - #. Verify that the |OSDs| are down after the storage node is locked. - - In the Horizon interface, navigate to **Admin** \> **Platform** \> - **Storage Overview** to view the status of the |OSDs|. - - #. Upgrade storage-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-upgrade storage-0 - - The upgrade is complete when the node comes online. At that point - you can safely unlock the node. - - After upgrading a storage node, but before unlocking, there are Ceph - synchronization alarms (that appear to be making progress in - synching), and there are infrastructure network interface alarms - (since the infrastructure network interface configuration has not been - applied to the storage node yet, as it has not been unlocked). - - Unlock the node as soon as the upgraded storage node comes online. - - #. Unlock storage-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock storage-0 - - Wait for all alarms to clear after the unlock before proceeding to - upgrade the next storage host. - - #. Repeat the above steps for each storage host. - - .. note:: - After upgrading the first storage node you can expect alarm - **800.003**. The alarm is cleared after all storage nodes are - upgraded. - -#. Upgrade worker hosts, if any, one at a time. - - #. Lock worker-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock worker-0 - - #. Upgrade worker-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-upgrade worker-0 - - Wait for the host to run the installer, reboot, and go online before - unlocking it in the next step. - - #. Unlock worker-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock worker-0 - - After the unlock wait for all alarms to clear before proceeding to the - next worker host. - - #. Repeat the above steps for each worker host. - -#. Set controller-0 as the active controller. Swact to controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-swact controller-1 - - Wait until services have become available on the active controller-0 before - proceeding to the next step. When all services on controller-0 are in the - ``enabled-active`` state, the swact is complete. - -#. Activate the upgrade. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-activate - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | activating | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - - When running the :command:`upgrade-activate` command, new - configurations are applied to the controller. 250.001 (**hostname - Configuration is out-of-date**) alarms are raised and are cleared as the - configuration is applied. The upgrade state goes from ``activating`` to - ``activation-complete`` once this is done. - - .. only:: partner - - .. include:: /_includes/upgrading-all-in-one-duplex-or-standard.rest - :start-after: deploymentmanager-begin - :end-before: deploymentmanager-end - - The following states apply when this command is executed. - - ``activation-requested`` - State entered when :command:`system upgrade-activate` is executed. - - ``activating`` - State entered when the system has started activating the upgrade by applying - new configurations to the controller and compute hosts. - - ``activating-hosts`` - State entered when applying host-specific configurations. This state is - entered only if needed. - - ``activation-complete`` - State entered when new configurations have been applied to all - controller and compute hosts. - - #. Check the status of the upgrade again to see it has reached - ``activation-complete``. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-show - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | activation-complete | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - - .. note:: - This can take more than half an hour to complete. - - **activation-failed** - Check ``/var/log/sysinv.log`` for further information. - -#. Complete the upgrade. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-complete - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | completing | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - -#. Delete the imported load. - - .. code-block:: none - - ~(keystone_admin)]$ system load-list - +----+----------+------------------+ - | id | state | software_version | - +----+----------+------------------+ - | 1 | imported | nn.nn | - | 2 | active | nn.nn | - +----+----------+------------------+ - - ~(keystone_admin)]$ system load-delete 1 - Deleted load: load 1 - -.. only:: partner - - .. include:: /_includes/upgrading-all-in-one-duplex-or-standard.rest - :start-after: upgradeDX-begin - :end-before: upgradeDX-end diff --git a/doc/source/updates/kubernetes/upgrading-all-in-one-simplex.rst b/doc/source/updates/kubernetes/upgrading-all-in-one-simplex.rst deleted file mode 100644 index efa477b1a..000000000 --- a/doc/source/updates/kubernetes/upgrading-all-in-one-simplex.rst +++ /dev/null @@ -1,483 +0,0 @@ - -.. nfq1592854955302 -.. _upgrading-all-in-one-simplex: - -========================== -Upgrade All-in-One Simplex -========================== - -You can upgrade a |prod| Simplex configuration with a new release of |prod| -software. - -.. rubric:: |prereq| - - -.. _upgrading-all-in-one-simplex-ul-ezb-b11-cx: - -- Validate the list of new images with the target release. If you are using a - private registry for installs/upgrades, you must populate your private - registry with the new images prior to bootstrap and/or patch application. - -- Perform a full backup to allow recovery. - - .. note:: - - Back up files in the ``/root`` directory prior to doing an upgrade, the - ``/home`` size limit is 2000 MB. Although container image sizes are - preserved, ``/root`` directory is not preserved during backup or restore - operations, server replacement, or upgrades. - - For more details on backup and what is included see :ref:`Back Up System - Data `. - - .. note:: - - The backup and restore function of an upgrade is specific to an |AIO-SX| - deployment only. - -- Ensure that the following conditions are met: - - - The system is patch current. - - - There should be sufficient free space in ``/opt/platform-backup.``. - Remove any unused files if necessary. - - - The new software load has been imported. - - - A valid license file has been installed. - -- Transfer the new release software load to controller-0 (or onto a USB - stick); controller-0 must be active. - - .. note:: - - Make sure that the ``/home/sysadmin`` directory has enough space (at - least 2GB of free space), otherwise the upgrade may fail once it starts. - If more space is needed, it is recommended to delete the ``.iso`` - bootimage previously imported after the :command:`load-import` - command. - -- Transfer the new release software license file to controller-0 (or onto a - USB stick). - -- Transfer the new release software signature to controller-0 (or onto a USB - stick). - -.. include:: /_includes/manual-image-cleanup-note-b43d78f2ba4a.rest - -.. note:: - The upgrade procedure includes steps to resolve system health issues. - -End user container images in ``registry.local`` will be backed up during the -upgrade process. This only includes images other than |prod| system and -application images. These images are limited to 5 GB in total size. If the -system contains more than 5 GB of these images, the upgrade start will fail. -For more details, see :ref:`Contents of System Backup -`. - -.. rubric:: |proc| - -#. Source the platform environment. - - .. code-block:: none - - $ source /etc/platform/openrc - ~(keystone_admin)]$ - -#. Install the license file for the release you are upgrading to. - - .. code-block:: none - - ~(keystone_admin)]$ system license-install - - For example, - - .. code-block:: none - - ~(keystone_admin)]$ system license-install license.lic - -#. Import the new release. - - #. Run the :command:`load-import` command on controller-0 to import - the new release. - - Source ``/etc/platform/openrc``. Also, you can specify either the - full file path or relative paths to the ``*.iso`` bootimage file and to - the ``*.sig`` bootimage signature file. - - .. code-block:: none - - $ source /etc/platform/openrc - ~(keystone_admin)]$ system load-import [--local] /home/sysadmin/.iso .sig - - +--------------------+-----------+ - | Property | Value | - +--------------------+-----------+ - | id | 2 | - | state | importing | - | software_version | nn.nn | - | compatible_version | nn.nn | - | required_patches | | - +--------------------+-----------+ - - The :command:`load-import` must be done on controller-0. - - (Optional) If ``--local`` is specified, the ISO and sig files are - uploaded directly from the active controller, where `` - and `` are paths on the active controller to load - ISO files and sig files respectively. - - .. note:: - - If ``--local`` is specified, the ISO and sig files are transferred - directly from the active controller filesystem to the load directory, - if it is not specified, the files are transferred via the API. - - .. note:: - This will take a few minutes to complete. - - #. Check to ensure the load was successfully imported. - - .. code-block:: none - - ~(keystone_admin)]$ system load-list - +----+----------+------------------+ - | id | state | software_version | - +----+----------+------------------+ - | 1 | active | nn.nn | - | 2 | imported | nn.nn | - +----+----------+------------------+ - -#. Apply any required software updates. - - The system must be 'patch current'. All software updates related to your - current |prod| software release must be uploaded, applied, and installed. - - All software updates to the new |prod| release only need to be uploaded - and applied. The install of these software updates will occur automatically - during the software upgrade procedure as the hosts are reset to load the - new release of software. - - To find and download applicable updates, visit the |dnload-loc|. - - For more information, see :ref:`Manage Software Updates - `. - -#. Confirm that the system is healthy. - - .. note:: - Do not modify protected filesystem directories before backup. - - Check the current system health status, resolve any alarms and other issues - reported by the :command:`system health-query-upgrade` command, then - recheck the system health status to confirm that all **System Health** - fields are set to *OK*. - - .. code-block:: none - - ~(keystone_admin)]$ system health-query-upgrade - System Health: - All hosts are provisioned: [OK] - All hosts are unlocked/enabled: [OK] - All hosts have current configurations: [OK] - All hosts are patch current: [OK] - Ceph Storage Healthy: [OK] - No alarms: [OK] - All kubernetes nodes are ready: [OK] - All kubernetes control plane pods are ready: [OK] - All PodSecurityPolicies are removed: [OK] - Required patches are applied: [OK] - License valid for upgrade: [OK] - No instances running on controller-1: [OK] - All kubernetes applications are in a valid state: [OK] - Active controller is controller-0: [OK] - Disk space requirement: [OK] - Boot Device and Root file system Device: [OK] - - By default, the upgrade process cannot be run with Active Alarms present. - However, management affecting alarms can be ignored with the - :command:`--force` option with the :command:`system upgrade-start` command - to force the upgrade process to start. - - .. note:: - It is strongly recommended that you clear your system of all - alarms before doing an upgrade. While the :command:`--force` option is - available to run the upgrade, it is a best practice to clear any - alarms. - -#. Start the upgrade. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-start - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | starting | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - - This will back up the system data and images to ``/opt/platform-backup.``. - ``/opt/platform-backup.`` is preserved when the host is reinstalled. With the - platform backup, the size of ``/home/sysadmin`` must be less than 2GB. - - This process may take several minutes. - - When the upgrade state is upgraded to *started* the process is complete. - - Any changes made to the system after this point will be lost when the data - is restored. - - The following upgrade state applies once this command is executed: - - - ``started``: - - - State entered after :command:`system upgrade-start` completes. - - - Release . system data (for example, postgres databases) has - been exported to be used in the upgrade. - - - Configuration changes must not be made after this point, until the - upgrade is completed. - - The upgrade process checks the health of the system and validates that the - system is ready for an upgrade. - - The upgrade process checks that no alarms are active before starting an - upgrade. - - .. note:: - - Use the command :command:`system upgrade-start --force` to force the - upgrades process to start and to ignore management affecting alarms. - This should **ONLY** be done if you have ascertained that these alarms - will not interfere with the upgrades process. - -#. Check the upgrade state. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-show - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | started | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - - Ensure the upgrade state is *started*. It will take several minutes to - transition to the *started* state. - -#. (Optional) Copy the upgrade data from the system to an alternate safe - location (such as a USB drive or remote server). - - The upgrade data is located under ``/opt/platform-backup``. Example file names - are: - - **lost+found upgrade_data_2020-06-23T033950\_61e5fcd7-a38d-40b0-ab83-8be55b87fee2.tgz** - - .. code-block:: none - - ~(keystone_admin)]$ ls /opt/platform-backup/ - -#. Lock controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-lock controller-0 - -#. Upgrade controller-0. - - This is the point of no return. All data except ``/opt/platform-backup/`` - will be erased from the system. This will wipe the ``rootfs`` and reboot the - host. The new release must then be manually installed (via network or - USB). - - .. code-block:: none - - ~(keystone_admin)]$ system host-upgrade controller-0 - WARNING: THIS OPERATION WILL COMPLETELY ERASE ALL DATA FROM THE SYSTEM. - Only proceed once the system data has been copied to another system. - Are you absolutely sure you want to continue? [yes/N]: yes - -#. Install the new release of |prod-long| Simplex software via network or USB. - -#. Verify and configure IP connectivity. External connectivity is required to - run the Ansible upgrade playbook. The |prod-long| boot image will |DHCP| out - all interfaces so the server may have obtained an IP address and have - external IP connectivity if a |DHCP| server is present in your environment. - Verify this using the :command:`ip addr` command. Otherwise, manually - configure an IP address and default IP route. - -#. Restore the upgrade data. - - .. code-block:: none - - ~(keystone_admin)]$ ansible-playbook /usr/share/ansible/stx-ansible/playbooks/upgrade_platform.yml - - .. only:: starlingx - - .. important:: - - If you are upgrading from r7.0 to r8.0 use the command below instead: - - .. code-block:: none - - ansible-playbook /usr/share/ansible/stx-ansible/playbooks/upgrade_platform.yml -e "upgrade_mode=old" - - .. only:: partner - - .. include:: /_includes/upgrading-all-in-one-simplex.rest - :start-after: note-upgrade-begin - :end-before: note-upgrade-end - - Once the host has installed the new load, this will restore the upgrade - data and migrate it to the new load. - - The playbook can be run locally or remotely and must be provided with the - following parameter: - - ``ansible_become_pass`` - The ansible playbook will check ``/home/sysadmin/.yml`` for - these user configuration override files for hosts. For example, if - running ansible locally, ``/home/sysadmin/localhost.yml``. - - By default the playbook will search for the upgrade data file under - ``/opt/platform-backup``. If required, use the ``upgrade_data_file`` - parameter to specify the path to the ``upgrade_data``. - - .. note:: - This playbook does not support replay. - - .. note:: - This can take more than one hour to complete. - - Once the data restoration is complete the upgrade state will be set to - *upgrading-hosts*. - -#. Check the status of the upgrade. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-show - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | upgrading-hosts | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - -#. Unlock controller-0. - - .. code-block:: none - - ~(keystone_admin)]$ system host-unlock controller-0 - - This step is required only for Simplex systems that are not a subcloud. - -#. Activate the upgrade. - - During the running of the :command:`upgrade-activate` command, new - configurations are applied to the controller. 250.001 (**hostname - Configuration is out-of-date**) alarms are raised and then cleared as the - configuration is applied. The upgrade state goes from *activating* to - *activation-complete* once this is done. - - .. only:: partner - - .. include:: /_includes/upgrading-all-in-one-simplex.rest - :start-after: deploymentmanager-begin - :end-before: deploymentmanager-end - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-activate - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | activating | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - - The following states apply when this command is executed. - - ``activation-requested`` - State entered when :command:`system upgrade-activate` is executed. - - ``activating`` - State entered when we have started activating the upgrade by applying - new configurations to the controller and compute hosts. - - ``activating-hosts`` - State entered when applying host-specific configurations. This state is - entered only if needed. - - ``activation-complete`` - State entered when new configurations have been applied to all - controller and compute hosts. - - - #. Check the status of the upgrade again to see it has reached - ``activation-complete``. - - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-show - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | activation-complete | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - - .. note:: - This can take more than half an hour to complete. - - **activation-failed** - Check ``/var/log/sysinv.log`` for further information. - -#. Complete the upgrade. - - .. code-block:: none - - ~(keystone_admin)]$ system upgrade-complete - +--------------+--------------------------------------+ - | Property | Value | - +--------------+--------------------------------------+ - | uuid | 61e5fcd7-a38d-40b0-ab83-8be55b87fee2 | - | state | completing | - | from_release | nn.nn | - | to_release | nn.nn | - +--------------+--------------------------------------+ - -#. Delete the imported load. - - .. code-block:: none - - ~(keystone_admin)]$ system load-list - +----+----------+------------------+ - | id | state | software_version | - +----+----------+------------------+ - | 1 | imported | nn.nn | - | 2 | active | nn.nn | - +----+----------+------------------+ - - ~(keystone_admin)]$ system load-delete 1 - Deleted load: load 1 - -.. only:: partner - - .. include:: /_includes/upgrading-all-in-one-simplex.rest - :start-after: upgradeAIO-begin - :end-before: upgradeAIO-end