Enable enroll-init: post factory-install reconfiguration

These changes ensure that a standalone node is in the correct state
for reconfiguration (for DC enrollment)  after a successful factory-install.
This is achieved by ensuring:
- Factory install services are cleaned up.
- Cloud-init services remain enabled and active.

To facilitate enrollment initialization, two new scripts
are being introduced in the platform-util package:
1. enroll-init-cleanup: Removes the cloud-init preset set by factory-install
   and sets service disabled flag.
2. enroll-init-reconfigure: A newly introduced script that allows OAM
   and password reconfiguration, ensuring:
     - The password change is done after system services are active.
     - OAM reconfiguration is done via system commands.
    This is required as outlined in
    https://review.opendev.org/c/starlingx/distcloud/+/921719

These scripts must be independent of the factory-install services
and available on the platform for DCManager operations. Hence, they
are not part of factory-install services but are included
in the platform-util package.

Additionally, a minor restructuring of factory-install services is done as part of these changes:
- Moved the host config folder to the parent folder.
This is more appropriate as the top-level folder already holds
the cloud-init configurable files. The factory-install folder
is meant for the static  service files.
- Introduced a utils folder/scripts for the factory-install services.

Test plan:
- PASS: Validate factory-install services:
        - config files copied correctly to home dir
        - factory install utils copied to
          /var/lib/factory-install dir
        - cloud-init preset after successful install
        - factory install services cleaned up
- PASS: Validate full factory install
- PASS: Build iso and install load to ensure
        platform-util package is installed
        with enroll-init-cleanup and enroll-init-reconfigure
        scripts in /usr/local/bin
- PASS: Validate enroll-init-cleanup and enroll-init-reconfigure scripts:
        - Password persisted and OAM reconfiguration verified with
          system oam-show + endpoints updated
        - cloud-init services disabled (not restarted on reboot)

Story: 2011100
Task: 50164

Change-Id: I9a99c53c6fe6590716ad3d5d59663c8e6c475db5
Signed-off-by: Salman Rana <salman.rana@windriver.com>
This commit is contained in:
Salman Rana 2024-06-10 22:57:54 -04:00
parent 8e99b25f76
commit a3f8b7adda
14 changed files with 232 additions and 13 deletions

View File

@ -35,13 +35,15 @@ check_rc_die $? "mkdir failed"
cp -r "${NOCLOUD}"/factory-install/scripts "${FACTORY_INSTALL}"/scripts && \
cp -r "${NOCLOUD}"/factory-install/setup "${FACTORY_INSTALL}"/setup && \
cp -r "${NOCLOUD}"/factory-install/tests "${FACTORY_INSTALL}"/tests && \
cp -r "${NOCLOUD}"/factory-install/config "${FACTORY_INSTALL}"/config
cp -r "${NOCLOUD}"/factory-install/systemd/utils "${FACTORY_INSTALL}"/utils && \
cp -r "${NOCLOUD}"/config "${FACTORY_INSTALL}"/config
check_rc_die $? "copy failed"
# Ensure files are executable for run-parts
chmod a+x "${FACTORY_INSTALL}"/scripts/* && \
chmod a+x "${FACTORY_INSTALL}"/setup/* && \
chmod a+x "${FACTORY_INSTALL}"/tests/*
chmod a+x "${FACTORY_INSTALL}"/tests/* && \
chmod a+x "${FACTORY_INSTALL}"/utils/*
check_rc_die $? "chmod failed"
# Copy configuration files required for running bootstrap and deployment configuration services
@ -60,8 +62,7 @@ check_rc_die $? "mkdir failed (factory-install.target.wants)"
cp "${NOCLOUD}"/factory-install/systemd/*.{path,service,target} /etc/systemd/system/
check_rc_die $? "Copy failed (systemd path,service,target)"
cp "${NOCLOUD}"/factory-install/systemd/20-factory-install.preset /etc/systemd/system-preset/ && \
chmod a+x /etc/systemd/system-preset/20-factory-install.preset
cp "${FACTORY_INSTALL}"/utils/*.preset /etc/systemd/system-preset/
check_rc_die $? "Copy failed (systemd preset)"
echo "Factory Install Setup - Complete"

View File

@ -5,7 +5,9 @@
# SPDX-License-Identifier: Apache-2.0
#
# cloud-init script to Perform hardware and firmware checks
# TODO: Sample only. Replace with real hardware checks
#
# SAMPLE ONLY - REPLACE WITH REAL HARDWARE CHECKS
#
echo "Hardware Check - Start"

View File

@ -4,8 +4,8 @@
#
# SPDX-License-Identifier: Apache-2.0
#
# cloud-init script to Perform hardware and firmware checks
# TODO: Sample only. Replace with real hardware checks
# cloud-init script to finish the factory install setup and
# trigger the first stage (booststrap)
#
FACTORY_INSTALL=/var/lib/factory-install

View File

@ -11,7 +11,8 @@ User=sysadmin
ExecStart=/usr/bin/run-parts --verbose --exit-on-error /var/lib/factory-install/tests
ExecStartPost=+/usr/bin/touch /var/lib/factory-install/state/tests
ExecStartPost=+/usr/bin/touch /var/lib/factory-install/stage/final
ExecStartPost=+/usr/bin/rm /var/lib/factory-install/enabled
ExecStartPost=+/var/lib/factory-install/utils/disable-factory-install
StandardOutput=append:/var/log/factory-install.log
RemainAfterExit=yes
Restart=no

View File

@ -0,0 +1,6 @@
enable cloud-init.target
enable cloud-init-local.service
enable cloud-init.service
enable cloud-config.service
enable cloud-final.service

View File

@ -0,0 +1,14 @@
#!/bin/bash
#
# Copyright (c) 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# script to disable the factory install services after the installation is complete
#
rm -f /var/lib/factory-install/enabled
rm -f /etc/systemd/system-preset/20-factory-install.preset
systemctl daemon-reload
systemctl preset-all

View File

@ -6,20 +6,24 @@
#
# Factory install system health checks triggered during the tests stage
#
# SAMPLE ONLY - REPLACE WITH REAL SYSTEM HEALTH CHECKS
#
echo "System Health Checks - Start"
fail () {
echo "FAIL: $1" && exit 1
log_failure () {
echo "FAIL: $1"
exit ${2}
}
# check for service impacting alarms
# TODO update alarm check - currently checks for any alarms
source /etc/platform/openrc
fm --timeout 10 alarm-list --nowrap|grep -e "major\|minor\|warning\|critical"
if [ $? == 0 ]; then
fail "service impacting alarms present"
# Log the health check failure and exit 0 to allow factory-install to finish up.
# Modify to exit 1 if factory-install should retry check until success.
log_failure "service impacting alarms present" 0
fi
echo "System Health Checks - Complete"

View File

@ -10,3 +10,5 @@ scripts/update_docker_registry_auth.sh usr/local/bin
scripts/change_system_private_registry.sh usr/local/bin
scripts/local_starlingxrc usr/local/bin
scripts/kubeconfig-setup usr/local/bin
scripts/enroll-init-cleanup usr/local/bin
scripts/enroll-init-reconfigure usr/local/bin

View File

@ -10,3 +10,5 @@
/usr/local/bin/change_system_private_registry.sh
/usr/local/bin/local_starlingxrc
/usr/local/bin/kubeconfig-setup
/usr/local/bin/enroll-init-cleanup
/usr/local/bin/enroll-init-reconfigure

View File

@ -44,6 +44,8 @@ override_dh_auto_install:
install -m 555 scripts/kubeconfig-setup $(DEBIAN_BUILDDIR)/usr/local/bin/
install -m 755 scripts/connectivity_test $(DEBIAN_BUILDDIR)/usr/local/bin/
install -m 750 scripts/set_keystone_user_option.sh $(DEBIAN_BUILDDIR)/usr/local/bin/
install -m 750 scripts/enroll-init-cleanup $(DEBIAN_BUILDDIR)/usr/local/bin/
install -m 750 scripts/enroll-init-reconfigure $(DEBIAN_BUILDDIR)/usr/local/bin/
install -d $(DEBIAN_BUILDDIR)/usr/local/sbin/
install -m 700 -p -D scripts/patch-restart-mtce $(DEBIAN_BUILDDIR)/usr/local/sbin/
@ -56,4 +58,4 @@ override_dh_fixperms:
dh_fixperms -Xupdate-iso.sh -Xpatch-dm.sh -Xgen-bootloader-iso.sh -Xstx-iso-utils.sh \
-Xshow-certs.sh -Xupdate_docker_registry_auth.sh -Xchange_system_private_registry.sh \
-Xis-rootdisk-device.sh -Xlocal_starlingxrc -Xkubeconfig-setup -Xpatch-restart-* \
-Xconnectivity_test -Xset_keystone_user_option.sh
-Xconnectivity_test -Xset_keystone_user_option.sh -Xenroll-init-*

View File

@ -0,0 +1,14 @@
#!/bin/bash
#
# Copyright (c) 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# script to cleanup the cloud-init services
#
touch /etc/cloud/cloud-init.disabled
rm -f /etc/systemd/system-preset/20-cloud-init.preset
systemctl daemon-reload
systemctl preset-all

View File

@ -0,0 +1,171 @@
#!/bin/bash
#
# Copyright (c) 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# Utility to reconfigure OAM and update sysadmin password
# by first ensuring sys-inv and mtc services are active
# and ready to accept the password change.
#
function check_rc_die {
local -i rc=${1}
msg=${2}
if [ ${rc} -ne 0 ]; then
log_fatal "${msg} [rc=${rc}]"
fi
}
function log_fatal {
echo "$(tput setaf 1)$(date "+%F %H:%M:%S") FATAL: ${*}$(tput sgr0)" >&2
exit 1
}
function log_warn {
echo "$(tput setaf 3)$(date "+%F %H:%M:%S"): WARN: ${*}$(tput sgr0)" >&2
}
function log_info {
echo "$(date "+%F %H:%M:%S"): INFO: $*" >&2
}
function usage {
cat <<ENDUSAGE
Utility to reconfigure OAM and update sysadmin password.
Usage:
$(basename "$0") --oam_subnet <oam_subnet>
--oam_gateway_ip <oam_gateway_ip>
--oam_ip <oam_ip>
--new_password <new_password>
--oam_subnet <subnet>: Specify OAM subnet
--oam_gateway_ip <ip>: Specify OAM gateway IP
--oam_ip <ip>: Specify OAM IP
--new_password <password>: Specify new password for sysadmin user
ENDUSAGE
}
function verify_factory_install {
log_info "Checking factory-install..."
if [ ! -f /var/lib/factory-install/stage/final ]; then
log_fatal "/var/lib/factory-install/stage/final does not exist. Ensure factory-install was successful."
fi
log_info "factory-install check successfull."
}
function check_services_status {
log_info "Checking services status..."
max_retries=10
retries=0
while [ $retries -lt $max_retries ]; do
sm_output=$(sm-dump)
sysinv_status=$(echo "$sm_output" | awk '/sysinv-inv/ {print $2}')
mtc_status=$(echo "$sm_output" | awk '/mtc-agent/ {print $2}')
if [ "$sysinv_status" == "enabled-active" ] && [ "$mtc_status" == "enabled-active" ]; then
log_info "Required services are now enabled-active."
return 0
else
log_warn "Required services are not yet enabled-active. Retrying... "
fi
retries=$((retries + 1))
sleep 30
done
log_fatal "Required services are not enabled-active after $max_retries attempts."
}
function load_credentials {
log_info "Loading credentials..."
max_retries=10
retries=0
if [ ! -f /etc/platform/openrc ]; then
log_fatal "/etc/platform/openrc does not exist."
fi
while [ $retries -lt $max_retries ]; do
if source /etc/platform/openrc; then
log_info "Credentials loaded successfully."
return 0
else
log_warn "Failed to load credentials (waiting for the host to become active). Retrying..."
fi
retries=$((retries + 1))
sleep 30
done
log_fatal "Failed to load credentials."
}
function reconfigure_OAM {
log_info "Reconfiguring OAM with subnet: $OAM_SUBNET, gateway IP: $OAM_GATEWAY_IP, OAM IP: $OAM_IP..."
system oam-modify oam_subnet="$OAM_SUBNET" oam_gateway_ip="$OAM_GATEWAY_IP" oam_ip="$OAM_IP"
check_rc_die $? "system oam-modify failed"
}
function reconfigure_password {
log_info "Reconfiguring sysadmin password..."
echo "sysadmin:$NEW_PASSWORD" | sudo chpasswd -e
check_rc_die $? "chpasswd failed"
}
# Declare required variables
OAM_SUBNET=""
OAM_GATEWAY_IP=""
OAM_IP=""
NEW_PASSWORD=""
log_info "Starting enroll-init reconfiguration..."
# Parse command line arguments
while [[ "$#" -gt 0 ]]; do
case $1 in
--help)
usage
exit 0
;;
--oam_subnet)
OAM_SUBNET="$2"
shift 2
;;
--oam_gateway_ip)
OAM_GATEWAY_IP="$2"
shift 2
;;
--oam_ip)
OAM_IP="$2"
shift 2
;;
--new_password)
NEW_PASSWORD="$2"
shift 2
;;
*)
log_fatal "Unexpected option: $1"
;;
esac
done
# Ensure all required arguments are provided
if [ -z "$OAM_SUBNET" ] || [ -z "$OAM_GATEWAY_IP" ] || [ -z "$OAM_IP" ] || [ -z "$NEW_PASSWORD" ]; then
usage
log_fatal "Missing required arguments"
fi
# Main execution flow
verify_factory_install
load_credentials
check_services_status
reconfigure_OAM
reconfigure_password
log_info "Successfully reconfigured OAM network and system password."