diff --git a/SYSTEMD.rst b/SYSTEMD.rst new file mode 100644 index 0000000000..b6ed19335d --- /dev/null +++ b/SYSTEMD.rst @@ -0,0 +1,177 @@ +=========================== + Using Systemd in DevStack +=========================== + +.. note:: + + This is an in progress document as we work out the way forward here + with DevStack and systemd. + +DevStack can be run with all the services as systemd unit +files. Systemd is now the default init system for nearly every Linux +distro, and systemd encodes and solves many of the problems related to +poorly running processes. + +Why this instead of screen? +=========================== + +The screen model for DevStack was invented when the number of services +that a DevStack user was going to run was typically < 10. This made +screen hot keys to jump around very easy. However, the landscape has +changed (not all services are stoppable in screen as some are under +Apache, there are typically at least 20 items) + +There is also a common developer workflow of changing code in more +than one service, and needing to restart a bunch of services for that +to take effect. + +To enable this add the following to your local.conf:: + + USE_SYSTEMD=True + + + +Unit Structure +============== + +.. note:: + + Originally we actually wanted to do this as user units, however + there are issues with running this under non interactive + shells. For now, we'll be running as system units. Some user unit + code is left in place in case we can switch back later. + +All DevStack user units are created as a part of the DevStack slice +given the name ``devstack@$servicename.service``. This lets us do +certain operations at the slice level. + +Manipulating Units +================== + +Assuming the unit ``n-cpu`` to make the examples more clear. + +Enable a unit (allows it to be started):: + + sudo systemctl enable devstack@n-cpu.service + +Disable a unit:: + + sudo systemctl disable devstack@n-cpu.service + +Start a unit:: + + sudo systemctl start devstack@n-cpu.service + +Stop a unit:: + + sudo systemctl stop devstack@n-cpu.service + +Restart a unit:: + + sudo systemctl restart devstack@n-cpu.service + +See status of a unit:: + + sudo systemctl status devstack@n-cpu.service + + +Querying Logs +============= + +One of the other major things that comes with systemd is journald, a +consolidated way to access logs (including querying through structured +metadata). This is accessed by the user via ``journalctl`` command. + + +Logs can be accessed through ``journalctl``. journalctl has powerful +query facilities. We'll start with some common options. + +Follow logs for a specific service:: + + journalctl -f --unit devstack@n-cpu.service + +Following logs for multiple services simultaneously:: + + journalctl -f --unit devstack@n-cpu.service --user-unit + devstack@n-cond.service + +Use higher precision time stamps:: + + journalctl -f -o short-precise --unit devstack@n-cpu.service + + +Known Issues +============ + +Be careful about systemd python libraries. There are 3 of them on +pypi, and they are all very different. They unfortunately all install +into the ``systemd`` namespace, which can cause some issues. + +- ``systemd-python`` - this is the upstream maintained library, it has + a version number like systemd itself (currently ``233``). This is + the one you want. +- ``systemd`` - a python 3 only library, not what you want. +- ``python-systemd`` - another library you don't want. Installing it + on a system will break ansible's ability to run. + + +If we were using user units, the ``[Service]`` - ``Group=`` parameter +doesn't seem to work with user units, even though the documentation +says that it should. This means that we will need to do an explicit +``/usr/bin/sg``. This has the downside of making the SYSLOG_IDENTIFIER +be ``sg``. We can explicitly set that with ``SyslogIdentifier=``, but +it's really unfortunate that we're going to need this work +around. This is currently not a problem because we're only using +system units. + +Future Work +=========== + +oslo.log journald +----------------- + +Journald has an extremely rich mechanism for direct logging including +structured metadata. We should enhance oslo.log to take advantage of +that. It would let us do things like:: + + journalctl REQUEST_ID=...... + + journalctl INSTANCE_ID=...... + +And get all lines related to the request id or instance id. + +sub targets/slices +------------------ + +We might want to create per project slices so that it's easy to +follow, restart all services of a single project (like swift) without +impacting other services. + +log colorizing +-------------- + +We lose log colorization through this process. We might want to build +a custom colorizer that we could run journalctl output through +optionally for people. + +user units +---------- + +It would be great if we could do services as user units, so that there +is a clear separation of code being run as not root, to ensure running +as root never accidentally gets baked in as an assumption to +services. However, user units interact poorly with devstack-gate and +the way that commands are run as users with ansible and su. + +Maybe someday we can figure that out. + +References +========== + +- Arch Linux Wiki - https://wiki.archlinux.org/index.php/Systemd/User +- Python interface to journald - + https://www.freedesktop.org/software/systemd/python-systemd/journal.html +- Systemd documentation on service files - + https://www.freedesktop.org/software/systemd/man/systemd.service.html +- Systemd documentation on exec (can be used to impact service runs) - + https://www.freedesktop.org/software/systemd/man/systemd.exec.html diff --git a/functions b/functions index 872f21691a..f6679fdebe 100644 --- a/functions +++ b/functions @@ -575,7 +575,9 @@ function vercmp { function setup_logging { local conf_file=$1 local other_cond=${2:-"False"} - if [ "$LOG_COLOR" == "True" ] && [ "$SYSLOG" == "False" ] && [ "$other_cond" == "False" ]; then + if [[ "$USE_SYSTEMD" == "True" ]]; then + setup_systemd_logging $conf_file + elif [ "$LOG_COLOR" == "True" ] && [ "$SYSLOG" == "False" ] && [ "$other_cond" == "False" ]; then setup_colorized_logging $conf_file else setup_standard_logging_identity $conf_file @@ -601,6 +603,17 @@ function setup_colorized_logging { iniset $conf_file $conf_section logging_exception_prefix "%(color)s%(asctime)s.%(msecs)03d TRACE %(name)s %(instance)s" } +function setup_systemd_logging { + local conf_file=$1 + local conf_section="DEFAULT" + local project_var="project_name" + local user_var="user_name" + iniset $conf_file $conf_section logging_context_format_string "%(levelname)s %(name)s [%(request_id)s %("$project_var")s %("$user_var")s] %(instance)s%(message)s" + iniset $conf_file $conf_section logging_default_format_string "%(levelname)s %(name)s [-] %(instance)s%(color)s%(message)s" + iniset $conf_file $conf_section logging_debug_format_suffix "from (pid=%(process)d) %(funcName)s %(pathname)s:%(lineno)d" + iniset $conf_file $conf_section logging_exception_prefix "ERROR %(name)s %(instance)s" +} + function setup_standard_logging_identity { local conf_file=$1 iniset $conf_file DEFAULT logging_user_identity_format "%(project_name)s %(user_name)s" diff --git a/functions-common b/functions-common index a86cfd8a63..ec68644757 100644 --- a/functions-common +++ b/functions-common @@ -1443,6 +1443,59 @@ function _run_process { exit 0 } +function write_user_unit_file { + local service=$1 + local command="$2" + local group=$3 + local user=$4 + local extra="" + if [[ -n "$group" ]]; then + extra="Group=$group" + fi + local unitfile="$SYSTEMD_DIR/$service" + mkdir -p $SYSTEMD_DIR + + iniset -sudo $unitfile "Unit" "Description" "Devstack $service" + iniset -sudo $unitfile "Service" "User" "$user" + iniset -sudo $unitfile "Service" "ExecStart" "$command" + if [[ -n "$group" ]]; then + iniset -sudo $unitfile "Service" "Group" "$group" + fi + iniset -sudo $unitfile "Install" "WantedBy" "multi-user.target" + + # changes to existing units sometimes need a refresh + $SYSTEMCTL daemon-reload +} + +function _run_under_systemd { + local service=$1 + local command="$2" + local cmd=$command + local systemd_service="devstack@$service.service" + local group=$3 + local user=${4:-$STACK_USER} + write_user_unit_file $systemd_service "$cmd" "$group" "$user" + + $SYSTEMCTL enable $systemd_service + $SYSTEMCTL start $systemd_service + _journal_log $service $systemd_service +} + +function _journal_log { + local service=$1 + local unit=$2 + local logfile="${service}.log.${CURRENT_LOG_TIME}" + local real_logfile="${LOGDIR}/${logfile}" + if [[ -n ${LOGDIR} ]]; then + $JOURNALCTL_F $2 > "$real_logfile" & + bash -c "cd '$LOGDIR' && ln -sf '$logfile' ${service}.log" + if [[ -n ${SCREEN_LOGDIR} ]]; then + # Drop the backward-compat symlink + ln -sf "$real_logfile" ${SCREEN_LOGDIR}/screen-${service}.log + fi + fi +} + # Helper to remove the ``*.failure`` files under ``$SERVICE_DIR/$SCREEN_NAME``. # This is used for ``service_check`` when all the ``screen_it`` are called finished # Uses globals ``SCREEN_NAME``, ``SERVICE_DIR`` @@ -1478,16 +1531,24 @@ function run_process { local service=$1 local command="$2" local group=$3 - local subservice=$4 + local user=$4 - local name=${subservice:-$service} + local name=$service time_start "run_process" if is_service_enabled $service; then - if [[ "$USE_SCREEN" = "True" ]]; then + if [[ "$USE_SYSTEMD" = "True" ]]; then + _run_under_systemd "$name" "$command" "$group" "$user" + elif [[ "$USE_SCREEN" = "True" ]]; then + if [[ "$user" == "root" ]]; then + command="sudo $command" + fi screen_process "$name" "$command" "$group" else # Spawn directly without screen + if [[ "$user" == "root" ]]; then + command="sudo $command" + fi _run_process "$name" "$command" "$group" & fi fi @@ -1618,6 +1679,14 @@ function stop_process { if is_service_enabled $service; then # Kill via pid if we have one available + if [[ "$USE_SYSTEMD" == "True" ]]; then + # Only do this for units which appear enabled, this also + # catches units that don't really exist for cases like + # keystone without a failure. + $SYSTEMCTL stop devstack@$service.service + $SYSTEMCTL disable devstack@$service.service + fi + if [[ -r $SERVICE_DIR/$SCREEN_NAME/$service.pid ]]; then pkill -g $(cat $SERVICE_DIR/$SCREEN_NAME/$service.pid) # oslo.service tends to stop actually shutting down diff --git a/lib/dstat b/lib/dstat index 517e4237ac..982b70387e 100644 --- a/lib/dstat +++ b/lib/dstat @@ -24,12 +24,12 @@ function start_dstat { # To enable memory_tracker add: # enable_service memory_tracker # to your localrc - run_process memory_tracker "sudo $TOP_DIR/tools/memory_tracker.sh" + run_process memory_tracker "$TOP_DIR/tools/memory_tracker.sh" "" "root" # remove support for the old name when it's no longer used (sometime in Queens) if is_service_enabled peakmem_tracker; then deprecated "Use of peakmem_tracker in devstack is deprecated, use memory_tracker instead" - run_process peakmem_tracker "sudo $TOP_DIR/tools/memory_tracker.sh" + run_process peakmem_tracker "$TOP_DIR/tools/memory_tracker.sh" "" "root" fi } diff --git a/lib/keystone b/lib/keystone index 530f3b42d9..af607c344b 100644 --- a/lib/keystone +++ b/lib/keystone @@ -602,8 +602,11 @@ function start_keystone { tail_log key /var/log/$APACHE_NAME/keystone.log tail_log key-access /var/log/$APACHE_NAME/keystone_access.log else # uwsgi - run_process key "$KEYSTONE_BIN_DIR/uwsgi $KEYSTONE_PUBLIC_UWSGI_FILE" "" "key-p" - run_process key "$KEYSTONE_BIN_DIR/uwsgi $KEYSTONE_ADMIN_UWSGI_FILE" "" "key-a" + # TODO(sdague): we should really get down to a single keystone here + enable_service key-p + enable_service key-a + run_process key-p "$KEYSTONE_BIN_DIR/uwsgi $KEYSTONE_PUBLIC_UWSGI_FILE" "" + run_process key-a "$KEYSTONE_BIN_DIR/uwsgi $KEYSTONE_ADMIN_UWSGI_FILE" "" fi echo "Waiting for keystone to start..." diff --git a/lib/swift b/lib/swift index 5b510e5930..6c2af61551 100644 --- a/lib/swift +++ b/lib/swift @@ -38,6 +38,15 @@ fi # Set up default directories GITDIR["python-swiftclient"]=$DEST/python-swiftclient +# Swift virtual environment +if [[ ${USE_VENV} = True ]]; then + PROJECT_VENV["swift"]=${SWIFT_DIR}.venv + SWIFT_BIN_DIR=${PROJECT_VENV["swift"]}/bin +else + SWIFT_BIN_DIR=$(get_python_exec_prefix) +fi + + SWIFT_DIR=$DEST/swift SWIFT_AUTH_CACHE_DIR=${SWIFT_AUTH_CACHE_DIR:-/var/cache/swift} SWIFT_APACHE_WSGI_DIR=${SWIFT_APACHE_WSGI_DIR:-/var/www/swift} @@ -807,10 +816,10 @@ function start_swift { local proxy_port=${SWIFT_DEFAULT_BIND_PORT} start_tls_proxy swift '*' $proxy_port $SERVICE_HOST $SWIFT_DEFAULT_BIND_PORT_INT fi - run_process s-proxy "swift-proxy-server ${SWIFT_CONF_DIR}/proxy-server.conf -v" + run_process s-proxy "$SWIFT_BIN_DIR/swift-proxy-server ${SWIFT_CONF_DIR}/proxy-server.conf -v" if [[ ${SWIFT_REPLICAS} == 1 ]]; then for type in object container account; do - run_process s-${type} "swift-${type}-server ${SWIFT_CONF_DIR}/${type}-server/1.conf -v" + run_process s-${type} "$SWIFT_BIN_DIR/swift-${type}-server ${SWIFT_CONF_DIR}/${type}-server/1.conf -v" done fi diff --git a/stackrc b/stackrc index c3b94d02f8..61501b5696 100644 --- a/stackrc +++ b/stackrc @@ -87,6 +87,23 @@ HORIZON_APACHE_ROOT="/dashboard" # be disabled for automated testing by setting this value to False. USE_SCREEN=$(trueorfalse True USE_SCREEN) +# Whether to use SYSTEMD to manage services +USE_SYSTEMD=$(trueorfalse False USE_SYSTEMD) +USER_UNITS=$(trueorfalse False USER_UNITS) +if [[ "$USER_UNITS" == "True" ]]; then + SYSTEMD_DIR="$HOME/.local/share/systemd/user" + SYSTEMCTL="systemctl --user" + JOURNALCTL_F="journalctl -f -o short-precise --user-unit" +else + SYSTEMD_DIR="/etc/systemd/system" + SYSTEMCTL="sudo systemctl" + JOURNALCTL_F="journalctl -f -o short-precise --unit" +fi + +if [[ "$USE_SYSTEMD" == "True" ]]; then + USE_SCREEN=False +fi + # When using screen, should we keep a log file on disk? You might # want this False if you have a long-running setup where verbose logs # can fill-up the host.