vmware-nsx/neutron/agent/linux/external_process.py
Miguel Angel Ajo 96685da7bd Fix race condition in ProcessMonitor
If any new external process was enabled/disabled during
the process monitoring loop time, a RuntimeError: dictionary
changed size during iteration was thrown. This is fixed
by pre-building the service list from the dictionary
for the iteration.

Closes-Bug: #1401042

Change-Id: Id0a3af22fb85992bde35d8c691bee3ddf435639d
2014-12-11 09:05:16 +01:00

259 lines
10 KiB
Python

# Copyright 2012 New Dream Network, LLC (DreamHost)
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import collections
import eventlet
from oslo.config import cfg
from neutron.agent.linux import ip_lib
from neutron.agent.linux import utils
from neutron.i18n import _LE
from neutron.openstack.common import lockutils
from neutron.openstack.common import log as logging
LOG = logging.getLogger(__name__)
OPTS = [
cfg.StrOpt('external_pids',
default='$state_path/external/pids',
help=_('Location to store child pid files')),
cfg.StrOpt('check_child_processes_action', default='respawn',
choices=['respawn', 'exit'],
help=_('Action to be executed when a child process dies')),
cfg.IntOpt('check_child_processes_interval', default=0,
help=_('Interval between checks of child process liveness '
'(seconds), use 0 to disable')),
]
cfg.CONF.register_opts(OPTS)
class ProcessManager(object):
"""An external process manager for Neutron spawned processes.
Note: The manager expects uuid to be in cmdline.
"""
def __init__(self, conf, uuid, root_helper='sudo',
namespace=None, service=None, pids_path=None,
default_cmd_callback=None,
cmd_addl_env=None):
self.conf = conf
self.uuid = uuid
self.root_helper = root_helper
self.namespace = namespace
self.default_cmd_callback = default_cmd_callback
self.cmd_addl_env = cmd_addl_env
self.pids_path = pids_path or self.conf.external_pids
if service:
self.service_pid_fname = 'pid.' + service
self.service = service
else:
self.service_pid_fname = 'pid'
self.service = 'default-service'
def enable(self, cmd_callback=None, reload_cfg=False):
if not self.active:
if not cmd_callback:
cmd_callback = self.default_cmd_callback
cmd = cmd_callback(self.get_pid_file_name(ensure_pids_dir=True))
ip_wrapper = ip_lib.IPWrapper(self.root_helper, self.namespace)
ip_wrapper.netns.execute(cmd, addl_env=self.cmd_addl_env)
elif reload_cfg:
self.reload_cfg()
def reload_cfg(self):
self.disable('HUP')
def disable(self, sig='9'):
pid = self.pid
if self.active:
cmd = ['kill', '-%s' % (sig), pid]
utils.execute(cmd, self.root_helper)
# In the case of shutting down, remove the pid file
if sig == '9':
utils.remove_conf_file(self.pids_path,
self.uuid,
self.service_pid_fname)
elif pid:
LOG.debug('Process for %(uuid)s pid %(pid)d is stale, ignoring '
'signal %(signal)s', {'uuid': self.uuid, 'pid': pid,
'signal': sig})
else:
LOG.debug('No process started for %s', self.uuid)
def get_pid_file_name(self, ensure_pids_dir=False):
"""Returns the file name for a given kind of config file."""
return utils.get_conf_file_name(self.pids_path,
self.uuid,
self.service_pid_fname,
ensure_pids_dir)
@property
def pid(self):
"""Last known pid for this external process spawned for this uuid."""
return utils.get_value_from_conf_file(self.pids_path,
self.uuid,
self.service_pid_fname,
int)
@property
def active(self):
pid = self.pid
if pid is None:
return False
cmdline = '/proc/%s/cmdline' % pid
try:
with open(cmdline, "r") as f:
return self.uuid in f.readline()
except IOError:
return False
ServiceId = collections.namedtuple('ServiceId', ['uuid', 'service'])
class ProcessMonitor(object):
def __init__(self, config, root_helper, resource_type, exit_handler):
"""Handle multiple process managers and watch over all of them.
:param config: oslo config object with the agent configuration.
:type config: oslo.config.ConfigOpts
:param root_helper: root helper to be used with new ProcessManagers
:type root_helper: str
:param resource_type: can be dhcp, router, load_balancer, etc.
:type resource_type: str
:param exit_handler: function to execute when agent exit has to
be executed, it should take care of actual
exit
:type exit_hanlder: function
"""
self._config = config
self._root_helper = root_helper
self._resource_type = resource_type
self._exit_handler = exit_handler
self._process_managers = {}
if self._config.check_child_processes_interval:
self._spawn_checking_thread()
def enable(self, uuid, cmd_callback, namespace=None, service=None,
reload_cfg=False, cmd_addl_env=None):
"""Creates a process and ensures that it is monitored.
It will create a new ProcessManager and tie it to the uuid/service.
"""
process_manager = ProcessManager(conf=self._config,
uuid=uuid,
root_helper=self._root_helper,
namespace=namespace,
service=service,
default_cmd_callback=cmd_callback,
cmd_addl_env=cmd_addl_env)
process_manager.enable(reload_cfg=reload_cfg)
service_id = ServiceId(uuid, service)
self._process_managers[service_id] = process_manager
def disable(self, uuid, namespace=None, service=None):
"""Disables the process and stops monitoring it."""
service_id = ServiceId(uuid, service)
process_manager = self._process_managers.pop(service_id, None)
# we could be trying to disable a process_manager which was
# started on a separate run of this agent, or during netns-cleanup
# therefore we won't know about such uuid and we need to
# build the process_manager to kill it
if not process_manager:
process_manager = ProcessManager(conf=self._config,
uuid=uuid,
root_helper=self._root_helper,
namespace=namespace,
service=service)
process_manager.disable()
def disable_all(self):
for service_id in self._process_managers.keys():
self.disable(uuid=service_id.uuid, service=service_id.service)
def get_process_manager(self, uuid, service=None):
"""Returns a process manager for manipulation"""
service_id = ServiceId(uuid, service)
return self._process_managers.get(service_id)
def _get_process_manager_attribute(self, attribute, uuid, service=None):
process_manager = self.get_process_manager(uuid, service)
if process_manager:
return getattr(process_manager, attribute)
else:
return False
def is_active(self, uuid, service=None):
return self._get_process_manager_attribute('active', uuid, service)
def get_pid(self, uuid, service=None):
return self._get_process_manager_attribute('pid', uuid, service)
def _spawn_checking_thread(self):
eventlet.spawn(self._periodic_checking_thread)
@lockutils.synchronized("_check_child_processes")
def _check_child_processes(self):
# we build the list of keys before iterating in the loop to cover
# the case where other threads add or remove items from the
# dictionary which otherwise will cause a RuntimeError
for service_id in list(self._process_managers):
pm = self._process_managers.get(service_id)
if pm and not pm.active:
LOG.error(_LE("%(service)s for %(resource_type)s "
"with uuid %(uuid)s not found. "
"The process should not have died"),
{'service': pm.service,
'resource_type': self._resource_type,
'uuid': service_id.uuid})
self._execute_action(service_id)
eventlet.sleep(0)
def _periodic_checking_thread(self):
while True:
eventlet.sleep(self._config.check_child_processes_interval)
eventlet.spawn(self._check_child_processes)
def _execute_action(self, service_id):
action_function = getattr(
self, "_%s_action" % self._config.check_child_processes_action)
action_function(service_id)
def _respawn_action(self, service_id):
LOG.error(_LE("respawning %(service)s for uuid %(uuid)s"),
{'service': service_id.service,
'uuid': service_id.uuid})
self._process_managers[service_id].enable()
def _exit_action(self, service_id):
LOG.error(_LE("Exiting agent as programmed in check_child_processes_"
"actions"))
self._exit_handler(service_id.uuid, service_id.service)