Merge "Implement CNI cache file cleanup for stale files"
This commit is contained in:
commit
4bc0c7dd3d
@ -170,6 +170,7 @@ kubernetes-1.21.3-kubeadm
|
||||
kubernetes-1.21.3-client
|
||||
containerd
|
||||
k8s-pod-recovery
|
||||
k8s-cni-cache-cleanup
|
||||
containernetworking-plugins
|
||||
|
||||
# resource-agents
|
||||
|
@ -66,6 +66,7 @@ kubernetes/chartmuseum
|
||||
kubernetes/armada-helm-toolkit
|
||||
kubernetes/armada
|
||||
kubernetes/k8s-pod-recovery
|
||||
kubernetes/k8s-cni-cache-cleanup
|
||||
kubernetes/plugins/isolcpus-device-plugin
|
||||
python/python-kubernetes
|
||||
grub/grubby
|
||||
|
4
kubernetes/k8s-cni-cache-cleanup/centos/build_srpm.data
Normal file
4
kubernetes/k8s-cni-cache-cleanup/centos/build_srpm.data
Normal file
@ -0,0 +1,4 @@
|
||||
SRC_DIR="."
|
||||
COPY_LIST="$FILES_BASE/*"
|
||||
|
||||
TIS_PATCH_VER=PKG_GITREVCOUNT
|
@ -0,0 +1,214 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2021 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
NAME=$(basename $0)
|
||||
RESULTSDIR="/var/lib/cni/results"
|
||||
MULTUSDIR="/var/lib/cni/multus"
|
||||
PODS=$(crictl ps -v 2> /dev/null | grep -w -E 'PodID|pod.name')
|
||||
PODIDS=($(echo "$PODS" | grep PodID | awk '{print $2}'))
|
||||
PODNAMES=($(echo "$PODS" | grep -w pod.name | awk '{print $3}'))
|
||||
KUBELET_UPTIME_MINUTES=5
|
||||
POD_ID_LENGTH=64
|
||||
DELETE="no"
|
||||
OLDERTHANHOURS=1
|
||||
|
||||
# Log info message to /var/log/daemon.log
|
||||
function LOG {
|
||||
logger -p daemon.info -t "${NAME}($$): " "${@}"
|
||||
}
|
||||
|
||||
# Log error message to /var/log/daemon.log
|
||||
function ERROR {
|
||||
logger -p daemon.error -t "${NAME}($$): " "${@}"
|
||||
}
|
||||
|
||||
# Determine the age of a file in hours.
|
||||
function file_age {
|
||||
local file=${1}
|
||||
local SECONDSPERHOUR=3600
|
||||
now=$(date +%s)
|
||||
old=$(stat -c %Z ${file})
|
||||
diff=$(((${now} - ${old})/${SECONDSPERHOUR}))
|
||||
echo ${diff}
|
||||
}
|
||||
|
||||
# Determine the pod id associated with a result CNI cache file.
|
||||
function results_cni_cache_file_to_pod_id {
|
||||
local path=${1}
|
||||
local ret=""
|
||||
file=$(basename ${path})
|
||||
|
||||
# A valid CNI cache results file looks like:
|
||||
# type-pod_id-interface_name
|
||||
RESULTS_REGEX='^.*-([0-9a-zA-Z]{64})-[0-9a-zA-Z]+$'
|
||||
|
||||
if [[ ${file} =~ ${RESULTS_REGEX} ]]; then
|
||||
ret=${BASH_REMATCH[1]}
|
||||
fi
|
||||
|
||||
echo ${ret}
|
||||
}
|
||||
|
||||
# Determine the pod id associated with a multus CNI cache file.
|
||||
function multus_cni_cache_file_to_pod_id {
|
||||
local path=${1}
|
||||
local ret=""
|
||||
file=$(basename ${path})
|
||||
|
||||
# A valid CNI cache multus file is simply the pod id
|
||||
MULTUS_REGEX='^([0-9a-zA-Z]{64})$'
|
||||
|
||||
if [[ ${file} =~ ${MULTUS_REGEX} ]]; then
|
||||
ret=${BASH_REMATCH[1]}
|
||||
fi
|
||||
|
||||
echo ${ret}
|
||||
}
|
||||
|
||||
# Determine the pod id associated with a CNI cache file.
|
||||
function cni_cache_file_to_pod_id {
|
||||
local path=${1}
|
||||
local ret=""
|
||||
dir=$(dirname ${path})
|
||||
|
||||
if [[ "${dir}" == "${RESULTSDIR}" ]]; then
|
||||
ret=$(results_cni_cache_file_to_pod_id ${path})
|
||||
elif [[ "${dir}" == "${MULTUSDIR}" ]]; then
|
||||
ret=$(multus_cni_cache_file_to_pod_id ${path})
|
||||
fi
|
||||
|
||||
echo ${ret}
|
||||
}
|
||||
|
||||
# Determine the original pod name from a CNI cache file (if any).
|
||||
function cache_file_to_pod_name {
|
||||
local path=${1}
|
||||
local ret="unknown"
|
||||
|
||||
grep -q "K8S_POD_NAME" ${path}
|
||||
if [ ${?} -eq 0 ]; then
|
||||
ret=$(cat ${path} | sed "s/.*K8S_POD_NAME\",\"//g" | cut -f1 -d"\"")
|
||||
fi
|
||||
|
||||
echo ${ret}
|
||||
}
|
||||
|
||||
# Given a CNI cache id, return the existing pod name (if any).
|
||||
function get_pod {
|
||||
local cacheid=${1}
|
||||
local ret=""
|
||||
|
||||
for i in ${!PODIDS[@]}; do
|
||||
podid=${PODIDS[${i}]}
|
||||
if [[ "${podid}" == "${cacheid}" ]]; then
|
||||
ret=${PODNAMES[${i}]}
|
||||
fi
|
||||
done
|
||||
|
||||
echo ${ret}
|
||||
}
|
||||
|
||||
# Determine if the CNI cache file is old enough to process.
|
||||
function check_cache_file_age {
|
||||
local age=${1}
|
||||
local ret=""
|
||||
|
||||
if [ -n ${OLDERTHANHOURS} ]; then
|
||||
if [[ ${age} -ge ${OLDERTHANHOURS} ]]; then
|
||||
ret=${age}
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ${ret}
|
||||
}
|
||||
|
||||
# Determine how long kubelet has been up in minutes
|
||||
function kubelet_uptime {
|
||||
local SECONDSPERMINUTE=60
|
||||
|
||||
kubelet_uptime=$(systemctl show kubelet --property WatchdogTimestamp | awk -F= '{print $2}')
|
||||
[[ -n ${kubelet_uptime} ]]
|
||||
if [ ${?} -ne 0 ]; then
|
||||
ERROR "Failed to get kubelet uptime."
|
||||
minutes=0
|
||||
else
|
||||
uptime=$(date --date="${kubelet_uptime}" +%s)
|
||||
now=$(date +%s)
|
||||
minutes=$(((${now}-${uptime})/${SECONDSPERMINUTE}))
|
||||
fi
|
||||
|
||||
echo ${minutes}
|
||||
}
|
||||
|
||||
# Wait for kubelet to be up for long enough to process CNI cache files.
|
||||
function check_kubelet {
|
||||
local retries=0
|
||||
|
||||
while [ ${retries} -le 30 ]; do
|
||||
uptime=$(kubelet_uptime)
|
||||
if [ ${uptime} -ge ${KUBELET_UPTIME_MINUTES} ]; then
|
||||
return 0
|
||||
fi
|
||||
remaining=$((${KUBELET_UPTIME_MINUTES}-${uptime}))
|
||||
LOG "Waiting for kubelet to be up for ${remaining} minutes ..."
|
||||
retries=$((${retries}+1))
|
||||
sleep 30
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
while getopts :o:d OPT; do
|
||||
case ${OPT} in
|
||||
o|--older-than)
|
||||
OLDERTHANHOURS=${OPTARG}
|
||||
;;
|
||||
d|+d)
|
||||
DELETE="yes"
|
||||
;;
|
||||
*)
|
||||
echo "usage: ${0##*/} [-d] [-o older_than_hours]"
|
||||
exit 2
|
||||
esac
|
||||
done
|
||||
|
||||
check_kubelet
|
||||
if [[ ${?} -ne 0 ]]; then
|
||||
LOG "Kubelet must be up for a minimum of ${KUBELET_UPTIME_MINUTES} minutes. Not running CNI cache cleanup."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for f in ${RESULTSDIR}/* ${MULTUSDIR}/*; do
|
||||
cacheid=$(cni_cache_file_to_pod_id ${f})
|
||||
if [[ ${#cacheid} -ne ${POD_ID_LENGTH} ]]; then
|
||||
# Unrecognized file pattern, skip.
|
||||
continue
|
||||
fi
|
||||
|
||||
existing_podname=$(get_pod ${cacheid})
|
||||
if [[ ${existing_podname} ]]; then
|
||||
LOG "Pod ${existing_podname} exists. Not cleaning up CNI cache file(s)."
|
||||
continue
|
||||
fi
|
||||
|
||||
age=$(file_age ${f})
|
||||
if [[ ! $(check_cache_file_age ${age}) ]]; then
|
||||
LOG "Stale CNI cache file ${f} detected. Cleanup to occur after $((${OLDERTHANHOURS} - ${age})) hour(s)."
|
||||
continue
|
||||
fi
|
||||
|
||||
if [[ "${DELETE}" == "yes" ]]; then
|
||||
rm -f ${f}
|
||||
action="Deleted"
|
||||
else
|
||||
action="Detected"
|
||||
fi
|
||||
|
||||
cache_podname=$(cache_file_to_pod_name ${f})
|
||||
LOG "${action} stale CNI cache file ${f}: [age: ${age} hours old, podname: ${cache_podname}]."
|
||||
done
|
||||
|
@ -0,0 +1,27 @@
|
||||
Name: k8s-cni-cache-cleanup
|
||||
Version: 1.0
|
||||
Release: 0%{?_tis_dist}.%{tis_patch_ver}
|
||||
Summary: Kubernetes CNI Cache Cleanup Utility
|
||||
License: Apache-2.0
|
||||
Group: base
|
||||
Packager: Wind River <info@windriver.com>
|
||||
URL: unknown
|
||||
Source0: k8s-cni-cache-cleanup
|
||||
|
||||
Requires: /bin/bash
|
||||
|
||||
%description
|
||||
%{summary}
|
||||
|
||||
%define local_dir /usr/local
|
||||
%define local_sbindir %{local_dir}/sbin
|
||||
|
||||
%prep
|
||||
|
||||
%install
|
||||
install -d %{buildroot}%{local_sbindir}
|
||||
install -m 755 %{SOURCE0} %{buildroot}%{local_sbindir}/k8s-cni-cache-cleanup
|
||||
|
||||
%files
|
||||
%defattr(-,root,root,-)
|
||||
%{local_sbindir}/k8s-cni-cache-cleanup
|
@ -19,7 +19,7 @@
|
||||
|
||||
. /etc/platform/platform.conf
|
||||
|
||||
export PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
|
||||
export PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/usr/local/sbin
|
||||
export KUBECONFIG=/etc/kubernetes/admin.conf
|
||||
CONF_DIR=/etc/k8s-post-recovery.d
|
||||
SLEEP_DELAY_SEC=15
|
||||
@ -74,6 +74,16 @@ function _wait_for_systemd {
|
||||
done
|
||||
}
|
||||
|
||||
function _do_cni_cache_cleanup {
|
||||
# Cleanup any stale CNI cache files (not associated with any running pod)
|
||||
# that are older than 1 hour old
|
||||
LOG "Starting CNI cache cleanup..."
|
||||
k8s-cni-cache-cleanup -o 1 -d
|
||||
if [[ ${?} -ne 0 ]]; then
|
||||
ERROR "Failed to run CNI cache cleanup."
|
||||
fi
|
||||
}
|
||||
|
||||
function _wait_for_pod_stabilization {
|
||||
|
||||
local extra_args=$1
|
||||
@ -298,6 +308,7 @@ function start {
|
||||
_wait_for_systemd
|
||||
_examine_pods 'recover'
|
||||
_examine_pods 'verify'
|
||||
_do_cni_cache_cleanup
|
||||
}
|
||||
|
||||
function stop {
|
||||
|
Loading…
Reference in New Issue
Block a user