diff --git a/playbooks/group_vars/graphite.yaml b/playbooks/group_vars/graphite.yaml index e8a5c57513..639d3988cf 100644 --- a/playbooks/group_vars/graphite.yaml +++ b/playbooks/group_vars/graphite.yaml @@ -1,4 +1,7 @@ iptables_extra_allowed_hosts: + - hostname: bridge.openstack.org + port: 8125 + protocol: udp - hostname: git.openstack.org port: 8125 protocol: udp diff --git a/playbooks/roles/ansible-cron/tasks/main.yaml b/playbooks/roles/ansible-cron/tasks/main.yaml index c25dbea5f6..4e65e12b67 100644 --- a/playbooks/roles/ansible-cron/tasks/main.yaml +++ b/playbooks/roles/ansible-cron/tasks/main.yaml @@ -13,7 +13,7 @@ cron: name: run_all.sh state: present - job: 'flock -n /var/run/ansible/run_all.lock bash /opt/system-config/run_all.sh >> /var/log/ansible/run_all_cron.log 2>&1' + job: 'flock -n /var/run/ansible/run_all.lock bash /opt/system-config/run_all.sh -c >> /var/log/ansible/run_all_cron.log 2>&1' minute: "{{ update_cron_interval.minute }}" hour: "{{ update_cron_interval.hour }}" day: "{{ update_cron_interval.day }}" diff --git a/run_all.sh b/run_all.sh index eadf347af0..2f4fc11598 100755 --- a/run_all.sh +++ b/run_all.sh @@ -22,6 +22,41 @@ set -e SYSTEM_CONFIG=/opt/system-config ANSIBLE_PLAYBOOKS=$SYSTEM_CONFIG/playbooks +# We only send stats if running under cron +UNDER_CRON=0 + +while getopts ":c" arg; do + case $arg in + c) + UNDER_CRON=1 + ;; + esac +done + +GLOBAL_START_TIME=$(date '+%s') + +# Send a timer stat to statsd +# send_timer metric [start_time] +# * uses timer metric bridge.ansible.run_all.<$1> +# * time will be taken from last call of start_timer, or $2 if set +function send_timer { + # Only send stats under cron conditions + if [[ ${UNDER_CRON} != 1 ]]; then + return + fi + + local current=$(date '+%s') + local name=$1 + local start=${2-$_START_TIME} + local elapsed_ms=$(( (current - start) * 1000 )) + + echo "bridge.ansible.run_all.${name}:${elapsed_ms}|ms" | nc -w 1 -u graphite.openstack.org 8125 +} +# See send_timer +function start_timer { + _START_TIME=$(date '+%s') +} + echo "--- begin run @ $(date -Is) ---" # It's possible for connectivity to a server or manifest application to break @@ -33,24 +68,43 @@ set +e # stuck if they are oomkilled # Clone system-config and install modules and roles +start_timer timeout -k 2m 120m ansible-playbook ${ANSIBLE_PLAYBOOKS}/update-system-config.yaml +send_timer update_system_config # Update the code on bridge +start_timer timeout -k 2m 120m ansible-playbook ${ANSIBLE_PLAYBOOKS}/bridge.yaml +send_timer bridge # Run the base playbook everywhere +start_timer timeout -k 2m 120m ansible-playbook -f 50 ${ANSIBLE_PLAYBOOKS}/base.yaml +send_timer base # Update the puppet version +start_timer timeout -k 2m 120m ansible-playbook -f 50 ${ANSIBLE_PLAYBOOKS}/update_puppet_version.yaml +send_timer update_puppet_version # Run the git/gerrit/zuul sequence, since it's important that they all work together +start_timer timeout -k 2m 120m ansible-playbook -f 50 ${ANSIBLE_PLAYBOOKS}/remote_puppet_git.yaml +send_timer git + # Run AFS changes separately so we can make sure to only do one at a time # (turns out quorum is nice to have) +start_timer timeout -k 2m 120m ansible-playbook -f 1 ${ANSIBLE_PLAYBOOKS}/remote_puppet_afs.yaml +send_timer afs + # Run everything else. We do not care if the other things worked +start_timer timeout -k 2m 120m ansible-playbook -f 50 ${ANSIBLE_PLAYBOOKS}/remote_puppet_else.yaml +send_timer else + +# Send the combined time for everything +send_timer total $GLOBAL_START_TIME echo "--- end run @ $(date -Is) ---" echo