Work around CHILD_MAX bash limitation for async
Apparently bash (via POSIX) only guarantees a small (32ish) number of children can be started and their statuses retrieved at any given point. On larger jobs with lots of plugins and additional work, we may go over that limit, especially for long-lived children, such as the install_tempest task. This works around that issue by creating a fifo for each child at spawn time. When the child is complete, it will block on a read against that fifo (and thus not exit). When the parent goes to wait on the child, it first writes to that fifo, unblocking the child so that it can exit near the time we go to wait. Closes-Bug: #1923728 Change-Id: Id755bdb1e7f1664ec08742d034c174e87a3d2902
This commit is contained in:
parent
ef1e9ada9b
commit
aa5c38727b
14
inc/async
14
inc/async
@ -57,6 +57,7 @@ function async_log {
|
|||||||
function async_inner {
|
function async_inner {
|
||||||
local name="$1"
|
local name="$1"
|
||||||
local rc
|
local rc
|
||||||
|
local fifo=${DEST}/async/${name}.fifo
|
||||||
shift
|
shift
|
||||||
set -o xtrace
|
set -o xtrace
|
||||||
if $* >${DEST}/async/${name}.log 2>&1; then
|
if $* >${DEST}/async/${name}.log 2>&1; then
|
||||||
@ -69,6 +70,8 @@ function async_inner {
|
|||||||
async_log "$name" "FAILED with rc $rc"
|
async_log "$name" "FAILED with rc $rc"
|
||||||
fi
|
fi
|
||||||
iniset ${DEST}/async/${name}.ini job end_time $(date "+%s%3N")
|
iniset ${DEST}/async/${name}.ini job end_time $(date "+%s%3N")
|
||||||
|
# Block on the fifo until we are signaled to exit by the main process
|
||||||
|
cat $fifo
|
||||||
return $rc
|
return $rc
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,12 +89,14 @@ function async_run {
|
|||||||
local name="$1"
|
local name="$1"
|
||||||
shift
|
shift
|
||||||
local inifile=${DEST}/async/${name}.ini
|
local inifile=${DEST}/async/${name}.ini
|
||||||
|
local fifo=${DEST}/async/${name}.fifo
|
||||||
|
|
||||||
touch $inifile
|
touch $inifile
|
||||||
iniset $inifile job command "$*"
|
iniset $inifile job command "$*"
|
||||||
iniset $inifile job start_time $(date +%s%3N)
|
iniset $inifile job start_time $(date +%s%3N)
|
||||||
|
|
||||||
if [[ "$DEVSTACK_PARALLEL" = "True" ]]; then
|
if [[ "$DEVSTACK_PARALLEL" = "True" ]]; then
|
||||||
|
mkfifo $fifo
|
||||||
async_inner $name $* &
|
async_inner $name $* &
|
||||||
iniset $inifile job pid $!
|
iniset $inifile job pid $!
|
||||||
async_log "$name" "running: %command"
|
async_log "$name" "running: %command"
|
||||||
@ -119,17 +124,23 @@ function async_wait {
|
|||||||
xtrace=$(set +o | grep xtrace)
|
xtrace=$(set +o | grep xtrace)
|
||||||
set +o xtrace
|
set +o xtrace
|
||||||
|
|
||||||
local pid rc running inifile runtime
|
local pid rc running inifile runtime fifo
|
||||||
rc=0
|
rc=0
|
||||||
for name in $*; do
|
for name in $*; do
|
||||||
running=$(ls ${DEST}/async/*.ini 2>/dev/null | wc -l)
|
running=$(ls ${DEST}/async/*.ini 2>/dev/null | wc -l)
|
||||||
inifile="${DEST}/async/${name}.ini"
|
inifile="${DEST}/async/${name}.ini"
|
||||||
|
fifo=${DEST}/async/${name}.fifo
|
||||||
|
|
||||||
if pid=$(async_pidof "$name"); then
|
if pid=$(async_pidof "$name"); then
|
||||||
async_log "$name" "Waiting for completion of %command" \
|
async_log "$name" "Waiting for completion of %command" \
|
||||||
"($running other jobs running)"
|
"($running other jobs running)"
|
||||||
time_start async_wait
|
time_start async_wait
|
||||||
if [[ "$pid" != "self" ]]; then
|
if [[ "$pid" != "self" ]]; then
|
||||||
|
# Signal the child to go ahead and exit since we are about to
|
||||||
|
# wait for it to collect its status.
|
||||||
|
echo "Signaling exit"
|
||||||
|
echo WAKEUP > $fifo
|
||||||
|
echo "Signaled"
|
||||||
# Do not actually call wait if we ran synchronously
|
# Do not actually call wait if we ran synchronously
|
||||||
if wait $pid; then
|
if wait $pid; then
|
||||||
rc=0
|
rc=0
|
||||||
@ -137,6 +148,7 @@ function async_wait {
|
|||||||
rc=$?
|
rc=$?
|
||||||
fi
|
fi
|
||||||
cat ${DEST}/async/${name}.log
|
cat ${DEST}/async/${name}.log
|
||||||
|
rm -f $fifo
|
||||||
fi
|
fi
|
||||||
time_stop async_wait
|
time_stop async_wait
|
||||||
local start_time
|
local start_time
|
||||||
|
Loading…
Reference in New Issue
Block a user