openstack-helm-infra/postgresql/templates/bin/_patroni_conversion.sh.tpl
Doug Aaser 9efb353b83 Patroni inclusion work for HA Postgres
This patchset aims to add HA Clustering support for Postgres. HA Clustering
provides automatic failover in the event of the database going down in addition
to keeping replicas of the database for rebuilding in the event of a node
going down. To achieve this clustering we use
[Patroni](https://github.com/zalando/patroni) which offers HA clustering
support for Postgres.

Patroni is a daemon that runs in the background and keeps track of which
node in your cluster is currently the leader node and routes all traffic
on the Postgresql endpoint to that node. If the leader node goes down,
Patroni holds an election to chose a new leader and updates the endpoint
to route traffic accordingly. All communication between nodes is done by
a Patroni created endpoint, seperate from the externally facing Postgres
endpoint.

Note that, although the postgresql helm chart can be upgraded from
non-patroni to patroni clustering, the previous `postgresql`
endpoints object (which is not directly managed by helm) must be
deleted via an out-of-band mechanism so that it may be replaced by the
patroni-managed endpoints.  If Postgres itself is leveraged for the
deployment process, this must be done with careful timing.  Note that
the old endpoints had a port named "db", and the new endpoints has
a port named "postgresql".

- Picking up patchset: https://review.openstack.org/#/c/591663

Co-authored-by: Tony Sorrentino <as1413@att.com>
Co-authored-by: Randeep Jalli <rj2083@att.com>
Co-authored-by: Pete Birley <pete@port.direct>
Co-authored-by: Matt McEuen <mm9745@att.com>

Change-Id: I721b745017dc1ea7ae05dfd9f8d5dd08d0965985
2019-05-28 19:13:13 +00:00

122 lines
3.9 KiB
Smarty

#!/bin/bash
{{/*
Copyright 2019 The Openstack-Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
# This script creates the patroni replication user if it doesn't exist.
# This is only needed for brownfield upgrade scenarios, on top of sites that
# were greenfield-deployed with a pre-patroni version of postgres.
#
# For greenfield deployments, the patroni-enabled postgresql chart will
# create this user automatically.
#
# If any additional conversion steps are found to be needed, they can go here.
set -e
function patroni_started() {
HOST=$1
PORT=$2
STATUS=$(timeout 10 bash -c "exec 3<>/dev/tcp/${HOST}/${PORT};
echo -e \"GET / HTTP/1.1\r\nConnection: close\r\n\" >&3;
cat <&3 | tail -n1 | grep -o \"running\"")
[[ x${STATUS} == "xrunning" ]]
}
PGDATABASE=${PGDATABASE:-'postgres'}
PGHOST=${PGHOST:-'127.0.0.1'}
PGPORT={{- tuple "postgresql" "internal" "postgresql" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
PSQL="psql -h ${PGHOST} -p ${PGPORT} -d ${PGDATABASE}"
PVC_MNT={{- .Values.storage.mount.path }}
FILE_MADE_BY_POSTGRES=${PVC_MNT}/pgdata/pg_xlog
FILE_MADE_BY_PATRONI=${PVC_MNT}/pgdata/patroni.dynamic.json
TIMEOUT=0
# Only need to add the user once, on the first replica
if [ "x${POD_NAME}" != "xpostgresql-0" ]; then
echo "Nothing to do on ${POD_NAME}"
exit 0
fi
# Look for a file-based clue that we're migrating from vanilla pg to patroni.
# This is lighter-weight than checking in the database for the user, since
# we have to fire up the database at this point to do the check.
if [[ -e "${FILE_MADE_BY_POSTGRES}" && ! -e "${FILE_MADE_BY_PATRONI}" ]]
then
echo "We are upgrading to Patroni -- checking for replication user"
# Fire up a temporary postgres
/docker-entrypoint.sh postgres &
while ! $PSQL -c "select 1;"; do
sleep 1
if [[ $TIMEOUT -gt 120 ]]; then
exit 1
fi
TIMEOUT=$((TIMEOUT+1))
done
TIMEOUT=0
# Add the replication user if it doesn't exist
USER_COUNT=$(${PSQL} -qt -c \
"SELECT COUNT(*) FROM pg_roles \
WHERE rolname='${PATRONI_REPLICATION_USERNAME}'")
if [ ${USER_COUNT} -eq 0 ]; then
echo "The patroni replication user ${PATRONI_REPLICATION_USERNAME} doesn't exist yet; creating:"
${PSQL} -c "CREATE USER ${PATRONI_REPLICATION_USERNAME} \
WITH REPLICATION ENCRYPTED PASSWORD '${PATRONI_REPLICATION_PASSWORD}';"
echo "done."
else
echo "The patroni replication user ${PATRONI_REPLICATION_USERNAME} already exists: nothing to do."
fi
# Start Patroni to assimilate the postgres
sed "s/POD_IP_PATTERN/${PATRONI_KUBERNETES_POD_IP}/g" \
/tmp/patroni-templated.yaml > /tmp/patroni.yaml
READY_FLAG="i am the leader with the lock"
PATRONI_LOG=/tmp/patroni_conversion.log
/usr/bin/python3 /usr/local/bin/patroni /tmp/patroni-templated.yaml &> ${PATRONI_LOG} &
# Sleep until patroni is running
while ! grep -q "${READY_FLAG}" ${PATRONI_LOG}; do
sleep 5
if [[ $TIMEOUT -gt 24 ]]; then
echo "A timeout occurred. Patroni logs:"
cat ${PATRONI_LOG}
exit 1
fi
TIMEOUT=$((TIMEOUT+1))
done
TIMEOUT=0
# Gracefully stop postgres and patroni
while pkill INT --uid postgres; do
sleep 5
if [[ $TIMEOUT -gt 24 ]]; then
echo "A timeout occurred. Patroni logs:"
cat ${PATRONI_LOG}
exit 1
fi
TIMEOUT=$((TIMEOUT+1))
done
else
echo "Patroni is already in place: nothing to do."
fi