0799782ce8
There are cases when a multinode deployment ends up in unusable keystone public wsgi on some nodes. The root cause is that keystone public wsgi doesn't find fernet keys on startup - and then persists on sending 500 errors to any requests - due to a race condition between fernet_setup/fernet-push.sh and keystone startup. Depends-On: https://review.opendev.org/703742/ Change-Id: I63709c2e3f6a893db82a05640da78f492bf8440f Closes-Bug: #1846789
36 lines
1.3 KiB
Django/Jinja
36 lines
1.3 KiB
Django/Jinja
#!/bin/bash
|
|
|
|
set -o errexit
|
|
set -o pipefail
|
|
|
|
# Get data on the fernet tokens
|
|
# NOTE(mnasiadka): Check for existence of at least two tokens (should exist after bootstrap)
|
|
TOKEN_CHECK=$(/usr/bin/python{{ distro_python_version }} /usr/bin/fetch_fernet_tokens.py -t {{ fernet_token_expiry }} -n 2)
|
|
|
|
# Ensure tokens are populated
|
|
n=0
|
|
while /usr/bin/python{{ distro_python_version }} /usr/bin/fetch_fernet_tokens.py -t 86400 -n 1 | grep -q '"populated": false'; do
|
|
if [ $n -lt 10 ]; then
|
|
n=$(( n + 1 ))
|
|
echo "ERROR: Fernet tokens have not been populated, rechecking in 1 minute"
|
|
echo "DEBUG: /etc/keystone/fernet-keys contents:"
|
|
ls -l /etc/keystone/fernet-keys/
|
|
sleep 60
|
|
else
|
|
echo "CRITICAL: Waited for 10 minutes - failing"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
# Ensure the primary token exists and is not stale
|
|
if $(echo "$TOKEN_CHECK" | grep -q '"update_required": false'); then
|
|
exit 0;
|
|
fi
|
|
|
|
# For each host node sync tokens
|
|
{% for host in groups['keystone'] %}
|
|
{% if inventory_hostname != host %}
|
|
/usr/bin/rsync -azu --delete -e 'ssh -i /var/lib/keystone/.ssh/id_rsa -p {{ hostvars[host]['keystone_ssh_port'] }} -F /var/lib/keystone/.ssh/config' keystone@{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:/etc/keystone/fernet-keys/ /etc/keystone/fernet-keys
|
|
{% endif %}
|
|
{% endfor %}
|