diff --git a/reddwarf/common/remote.py b/reddwarf/common/remote.py index 5dc5aafccb..35bcb5bb70 100644 --- a/reddwarf/common/remote.py +++ b/reddwarf/common/remote.py @@ -17,13 +17,14 @@ from reddwarf.common import config from novaclient.v1_1.client import Client -from reddwarf.guestagent.api import API + CONFIG = config.Config def create_guest_client(context, id): + from reddwarf.guestagent.api import API return API(context, id) diff --git a/reddwarf/common/utils.py b/reddwarf/common/utils.py index e058ef71ec..235a427933 100644 --- a/reddwarf/common/utils.py +++ b/reddwarf/common/utils.py @@ -20,6 +20,7 @@ import datetime import inspect import logging import re +import signal import sys import uuid @@ -27,9 +28,10 @@ from eventlet import event from eventlet import greenthread from eventlet import semaphore from eventlet.green import subprocess +from eventlet.timeout import Timeout from reddwarf.openstack.common import utils as openstack_utils - +from reddwarf.common import exception LOG = logging.getLogger(__name__) import_class = openstack_utils.import_class @@ -202,3 +204,24 @@ def get_id_from_href(href): """ return urlparse.urlsplit("%s" % href).path.split('/')[-1] + + +def execute_with_timeout(*args, **kwargs): + time = kwargs.get('timeout', 30) + def cb_timeout(): + raise exception.ProcessExecutionError("Time out after waiting " + + str(time) + " seconds when running proc: " + str(args) + + str(kwargs)) + + timeout = Timeout(time) + try: + return execute(*args, **kwargs) + except Timeout as t: + if t is not timeout: + raise + else: + raise exception.ProcessExecutionError("Time out after waiting " + + str(time) + " seconds when running proc: " + str(args) + + str(kwargs)) + finally: + timeout.cancel() diff --git a/reddwarf/guestagent/api.py b/reddwarf/guestagent/api.py index aabc96c82e..dd2f0c1c9d 100644 --- a/reddwarf/guestagent/api.py +++ b/reddwarf/guestagent/api.py @@ -89,8 +89,8 @@ class API(object): self._cast("create_database", databases=databases) def list_databases(self): - """Make an asynchronous call to list database users""" - LOG.debug(_("Listing Users for Instance %s"), self.id) + """Make an asynchronous call to list databases""" + LOG.debug(_("Listing databases for Instance %s"), self.id) return self._call("list_databases") def delete_database(self, database): diff --git a/reddwarf/guestagent/dbaas.py b/reddwarf/guestagent/dbaas.py index cd2f09675d..7a83555a68 100644 --- a/reddwarf/guestagent/dbaas.py +++ b/reddwarf/guestagent/dbaas.py @@ -67,8 +67,8 @@ def generate_random_password(): def get_auth_password(): - pwd, err = utils.execute("sudo", "awk", "/password\\t=/{print $3}", - "/etc/mysql/my.cnf") + pwd, err = utils.execute_with_timeout("sudo", "awk", + "/password\\t=/{print $3}", "/etc/mysql/my.cnf") if err: LOG.err(err) raise RuntimeError("Problem reading my.cnf! : %s" % err) @@ -83,8 +83,8 @@ def get_engine(): if ENGINE: return ENGINE #ENGINE = create_engine(name_or_url=url) - pwd, err = utils.execute("sudo", "awk", "/password\\t=/{print $3}", - "/etc/mysql/my.cnf") + pwd, err = utils.execute_with_timeout("sudo", "awk", + "/password\\t=/{print $3}", "/etc/mysql/my.cnf") if not err: ENGINE = create_engine("mysql://%s:%s@localhost:3306" % (ADMIN_USER_NAME, pwd.strip()), @@ -155,6 +155,7 @@ class MySqlAppStatus(object): Updates the database with the actual MySQL status. """ LOG.info("Ending install or restart.") + self.restart_mode = False real_status = self._get_actual_db_status() LOG.info("Updating status to %s" % real_status) self.set_status(real_status) @@ -168,14 +169,15 @@ class MySqlAppStatus(object): def _get_actual_db_status(self): global MYSQLD_ARGS try: - out, err = utils.execute("/usr/bin/mysqladmin", "ping", - run_as_root=True) + out, err = utils.execute_with_timeout("/usr/bin/mysqladmin", + "ping", run_as_root=True) LOG.info("Service Status is RUNNING.") return rd_models.ServiceStatuses.RUNNING except ProcessExecutionError as e: LOG.error("Process execution ") try: - out, err = utils.execute("ps", "-C", "mysqld", "h") + out, err = utils.execute_with_timeout("/bin/ps", "-C", "mysqld", + "h") pid = out.split()[0] # TODO(rnirmal): Need to create new statuses for instances # where the mysql service is up, but unresponsive @@ -256,7 +258,7 @@ class MySqlAppStatus(object): LOG.info("Waiting for MySQL status to change to %s..." % status) actual_status = self._get_actual_db_status() LOG.info("MySQL status was %s after %d seconds." - % (status, waited_time)) + % (actual_status, waited_time)) if actual_status == status: if update_db: self.set_status(actual_status) @@ -472,7 +474,7 @@ class DBaaSAgent(object): return MySqlAdmin().delete_database(database) def delete_user(self, user): - MySqlAdmin().delete_user() + MySqlAdmin().delete_user(user) def list_databases(self): return MySqlAdmin().list_databases() @@ -600,7 +602,7 @@ class MySqlApp(object): def _internal_stop_mysql(self, update_db=False): LOG.info(_("Stopping mysql...")) - utils.execute("sudo", "service", "mysql", "stop") + utils.execute_with_timeout("sudo", "/etc/init.d/mysql", "stop") if not self.status.wait_for_real_status_to_change_to( rd_models.ServiceStatuses.SHUTDOWN, self.state_change_wait_time, update_db): @@ -638,10 +640,11 @@ class MySqlApp(object): def _replace_mycnf_with_template(self, template_path, original_path): if os.path.isfile(template_path): - utils.execute("sudo", "mv", original_path, "%(name)s.%(date)s" - % {'name': original_path, - 'date': date.today().isoformat()}) - utils.execute("sudo", "cp", template_path, original_path) + utils.execute_with_timeout("sudo", "mv", original_path, + "%(name)s.%(date)s" % {'name': original_path, + 'date': date.today().isoformat()}) + utils.execute_with_timeout("sudo", "cp", template_path, + original_path) def _write_temp_mycnf_with_admin_account(self, original_file_path, temp_file_path, password): @@ -657,8 +660,10 @@ class MySqlApp(object): def wipe_ib_logfiles(self): LOG.info(_("Wiping ib_logfiles...")) - utils.execute("sudo", "rm", "%s/ib_logfile0" % MYSQL_BASE_DIR) - utils.execute("sudo", "rm", "%s/ib_logfile1" % MYSQL_BASE_DIR) + utils.execute_with_timeout("sudo", "rm", "%s/ib_logfile0" + % MYSQL_BASE_DIR) + utils.execute_with_timeout("sudo", "rm", "%s/ib_logfile1" + % MYSQL_BASE_DIR) def _write_mycnf(self, pkg, update_memory_mb, admin_password): """ @@ -690,17 +695,33 @@ class MySqlApp(object): admin_password) # permissions work-around LOG.info(_("Moving tmp into final.")) - utils.execute("sudo", "mv", TMP_MYCNF, FINAL_MYCNF) + utils.execute_with_timeout("sudo", "mv", TMP_MYCNF, FINAL_MYCNF) LOG.info(_("Removing original my.cnf.")) - utils.execute("sudo", "rm", ORIG_MYCNF) + utils.execute_with_timeout("sudo", "rm", ORIG_MYCNF) LOG.info(_("Symlinking final my.cnf.")) - utils.execute("sudo", "ln", "-s", FINAL_MYCNF, ORIG_MYCNF) + utils.execute_with_timeout("sudo", "ln", "-s", FINAL_MYCNF, ORIG_MYCNF) self.wipe_ib_logfiles() def _start_mysql(self, update_db=False): LOG.info(_("Starting mysql...")) - utils.execute("sudo", "service", "mysql", "start") + # This is the site of all the trouble in the restart tests. + # Essentially what happens is thaty mysql start fails, but does not + # die. It is then impossible to kill the original, so + + try: + utils.execute_with_timeout("sudo", "/etc/init.d/mysql", "start") + except ProcessExecutionError: + # If it won't start, but won't die either, kill it by hand so we + # don't let a rouge process wander around. + try: + utils.execute_with_timeout("sudo", "pkill", "-9", "mysql") + except ProcessExecutionError, p: + LOG.error("Error killing stalled mysql start command.") + LOG.error(p) + # There's nothing more we can do... + raise RuntimeError("Can't start MySQL!") + if not self.status.wait_for_real_status_to_change_to( rd_models.ServiceStatuses.RUNNING, self.state_change_wait_time, update_db): diff --git a/reddwarf/guestagent/manager.py b/reddwarf/guestagent/manager.py index 0875aa9435..3b6bb4822f 100644 --- a/reddwarf/guestagent/manager.py +++ b/reddwarf/guestagent/manager.py @@ -26,6 +26,7 @@ handles RPC calls relating to Platform specific operations. import functools import logging +import traceback from reddwarf.common import exception from reddwarf.common import utils @@ -75,7 +76,11 @@ class GuestManager(service.Manager): self.driver) if raise_on_error: raise ae - method() + try: + method() + except Exception as e: + LOG.error("Got an error during periodic tasks!") + LOG.debug(traceback.format_exc()) def upgrade(self, context): """Upgrade the guest agent and restart the agent""" @@ -88,10 +93,14 @@ class GuestManager(service.Manager): def _mapper(self, method, context, *args, **kwargs): """ Tries to call the respective driver method """ try: - method = getattr(self.driver, method) + func = getattr(self.driver, method) except AttributeError: LOG.error(_("Method %s not found for driver %s"), method, self.driver) raise exception.NotFound("Method %s is not available for the " "chosen driver.") - method(*args, **kwargs) + try: + return func(*args, **kwargs) + except Exception as e: + LOG.error("Got an error running %s!" % method) + LOG.debug(traceback.format_exc()) diff --git a/reddwarf/instance/models.py b/reddwarf/instance/models.py index 7b66721871..9c023b2cd4 100644 --- a/reddwarf/instance/models.py +++ b/reddwarf/instance/models.py @@ -23,6 +23,7 @@ import netaddr from reddwarf import db from reddwarf.common import config +#from reddwarf.guestagent import api as guest_api from reddwarf.common import exception as rd_exceptions from reddwarf.common import utils from reddwarf.instance.tasks import InstanceTask @@ -33,7 +34,6 @@ from novaclient import exceptions as nova_exceptions from reddwarf.common.models import NovaRemoteModelBase from reddwarf.common.remote import create_nova_client from reddwarf.common.remote import create_guest_client -from reddwarf.guestagent.db import models as guest_models CONFIG = config.Config @@ -60,6 +60,7 @@ def populate_databases(dbs): Create a serializable request with user provided data for creating new databases. """ + from reddwarf.guestagent.db import models as guest_models try: databases = [] for database in dbs: @@ -283,7 +284,19 @@ class Instance(object): self.db_info.task_status = InstanceTasks.NONE self.db_info.save() - def validate_can_perform_action_on_instance(self): + def validate_can_perform_restart_or_reboot(self): + """ + Raises exception if an instance action cannot currently be performed. + """ + if self.db_info.task_status != InstanceTasks.NONE or \ + not self.service_status.status.restart_is_allowed: + msg = "Instance is not currently available for an action to be " \ + "performed (task status was %s, service status was %s)." \ + % (self.db_info.task_status, self.service_status.status) + LOG.error(msg) + raise rd_exceptions.UnprocessableEntity(msg) + + def validate_can_perform_resize(self): """ Raises exception if an instance action cannot currently be performed. """ @@ -524,6 +537,12 @@ class ServiceStatus(object): def is_valid_code(code): return code in ServiceStatus._lookup + @property + def restart_is_allowed(self): + return self._code in [ServiceStatuses.RUNNING._code, + ServiceStatuses.SHUTDOWN._code, ServiceStatuses.CRASHED._code, + ServiceStatuses.BLOCKED._code] + def __str__(self): return self._description diff --git a/reddwarf/instance/service.py b/reddwarf/instance/service.py index c01affd48d..1ade375b2a 100644 --- a/reddwarf/instance/service.py +++ b/reddwarf/instance/service.py @@ -92,7 +92,6 @@ class InstanceController(BaseController): % (id, tenant_id)) context = req.environ[wsgi.CONTEXT_KEY] instance = models.Instance.load(context, id) - instance.validate_can_perform_action_on_instance() _actions = { 'restart': self._action_restart, 'resize': self._action_resize @@ -114,6 +113,7 @@ class InstanceController(BaseController): raise rd_exceptions.BadRequest(_("Invalid request body.")) def _action_restart(self, instance, body): + instance.validate_can_perform_restart_or_reboot() instance.restart() return webob.exc.HTTPAccepted() @@ -127,6 +127,7 @@ class InstanceController(BaseController): If the body has both we will throw back an error. """ + instance.validate_can_perform_resize() options = { 'volume': self._action_resize_volume, 'flavorRef': self._action_resize_flavor diff --git a/reddwarf/instance/tasks.py b/reddwarf/instance/tasks.py index f7b1b1f4c4..9f8c60c227 100644 --- a/reddwarf/instance/tasks.py +++ b/reddwarf/instance/tasks.py @@ -57,7 +57,7 @@ class InstanceTask(object): class InstanceTasks(object): NONE = InstanceTask(0x01, 'NONE') DELETING = InstanceTask(0x02, 'DELETING') - REBOOTING = InstanceTask(0x02, 'REBOOTING') + REBOOTING = InstanceTask(0x03, 'REBOOTING') # Dissuade further additions at run-time. diff --git a/reddwarf/tests/fakes/guestagent.py b/reddwarf/tests/fakes/guestagent.py index 752d619ec0..4f1afb8886 100644 --- a/reddwarf/tests/fakes/guestagent.py +++ b/reddwarf/tests/fakes/guestagent.py @@ -65,7 +65,7 @@ class FakeGuest(object): def list_users(self): return [self.users[name] for name in self.users] - def prepare(self, memory_mb, databases): + def prepare(self, memory_mb, databases, users): from reddwarf.instance.models import InstanceServiceStatus from reddwarf.instance.models import ServiceStatuses def update_db():