diff --git a/etc/valet/api/app.apache2 b/etc/valet/api/app.apache2 index 8cc8d13..185e510 100644 --- a/etc/valet/api/app.apache2 +++ b/etc/valet/api/app.apache2 @@ -1,7 +1,7 @@ # valet user/group required (or substitute as needed). # Place in /opt/apache2/sites-available, symlink from # /opt/apache2/sites-enabled, and run 'apachectl restart' as root. -# Optional: Append python-path=PATH_TO_VENV_PACKAGES to WSGIDaemonProcess +# Optional: Append python-path=PATH_TO_VENV_PACKAGES to WSGIDaemonProcess Listen 8090 ServerName valet @@ -9,7 +9,7 @@ ServerName valet ServerName valet - WSGIDaemonProcess valet user=m04060 group=m04060 threads=5 + WSGIDaemonProcess valet user=m04060 group=m04060 threads=20 WSGIScriptAlias / /var/www/valet/app.wsgi SetEnv APACHE_RUN_USER m04060 diff --git a/etc/valet/valet.conf b/etc/valet/valet.conf index 9f0edf3..70c827e 100644 --- a/etc/valet/valet.conf +++ b/etc/valet/valet.conf @@ -69,7 +69,7 @@ pid = /var/run/valet/ostro-daemon.pid # Management configuration #------------------------------------------------------------------------------------------------------------ # Inform the name of datacenter (region name), where Valet/Ostro is deployed. -# datacenter_name = bigsite +# datacenter_name = aic # Set the naming convention rules. # Currently, 3 chars of CLLI + region number + 'r' + rack id number + 1 char of node type + node id number. @@ -90,11 +90,11 @@ pid = /var/run/valet/ostro-daemon.pid # Set trigger time or frequency for checking compute hosting server status (i.e., call Nova) # Note that currently, compute (Nova) should be triggered first then trigger topology. # compute_trigger_time = 01:00 -# compute_trigger_frequency = 3600 +# compute_trigger_frequency = 14400 # Set trigger time or frequency for checking datacenter topology -# topology_trigger_time = 02:00 -# topology_trigger_frequency = 3600 +# topology_trigger_time = 01:40 +# topology_trigger_frequency = 28800 # Set default overbooking ratios. Note that each compute node can have its own ratios. # default_cpu_allocation_ratio = 16 @@ -102,9 +102,9 @@ pid = /var/run/valet/ostro-daemon.pid # default_disk_allocation_ratio = 1 # Set static unused percentages of resources (i.e., standby) that are set aside for applications's workload spikes. -# static_cpu_standby_ratio = 20 -# static_mem_standby_ratio = 20 -# static_local_disk_standby_ratio = 20 +# static_cpu_standby_ratio = 0 +# static_mem_standby_ratio = 0 +# static_local_disk_standby_ratio = 0 # Set Ostro execution mode # mode = [live|sim], sim will let Ostro simulate datacenter, while live will let it handle a real datacenter diff --git a/requirements.txt b/requirements.txt index f4293c6..dc7e812 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,6 @@ simplejson<=3.3.1 #pymysql #sqlalchemy pika<=0.10.0 -python-daemon<=2.1.1 +python-daemon #oslo.messaging!=1.17.0,!=1.17.1,!=2.6.0,!=2.6.1,!=2.7.0,!=2.8.0,!=2.8.1,!=2.9.0,!=3.1.0,>=1.16.0 # Apache-2.0 #oslo.messaging==1.8.3 diff --git a/valet/engine/optimizer/app_manager/app_handler.py b/valet/engine/optimizer/app_manager/app_handler.py index 8a729ab..6947231 100755 --- a/valet/engine/optimizer/app_manager/app_handler.py +++ b/valet/engine/optimizer/app_manager/app_handler.py @@ -15,12 +15,9 @@ """App Handler.""" -import json - from valet.engine.optimizer.app_manager.app_topology import AppTopology from valet.engine.optimizer.app_manager.app_topology_base import VM from valet.engine.optimizer.app_manager.application import App -from valet.engine.optimizer.util import util as util class AppHandler(object): @@ -51,57 +48,6 @@ class AppHandler(object): app_topology = AppTopology(self.resource, self.logger) - for app in _app_data: - self.logger.debug("AppHandler: parse app") - - stack_id = None - if "stack_id" in app.keys(): - stack_id = app["stack_id"] - else: - stack_id = "none" - - application_name = None - if "application_name" in app.keys(): - application_name = app["application_name"] - else: - application_name = "none" - - action = app["action"] - if action == "ping": - self.logger.debug("AppHandler: got ping") - elif action == "replan" or action == "migrate": - re_app = self._regenerate_app_topology(stack_id, app, - app_topology, action) - if re_app is None: - self.apps[stack_id] = None - self.status = "cannot locate the original plan for " \ - "stack = " + stack_id - return None - - if action == "replan": - self.logger.debug("AppHandler: got replan: " + stack_id) - elif action == "migrate": - self.logger.debug("AppHandler: got migration: " + stack_id) - - app_id = app_topology.set_app_topology(re_app) - - if app_id is None: - self.logger.error("AppHandler: " + app_topology.status) - self.status = app_topology.status - self.apps[stack_id] = None - return None - else: - app_id = app_topology.set_app_topology(app) - - if app_id is None: - self.logger.error("AppHandler: " + app_topology.status) - self.status = app_topology.status - self.apps[stack_id] = None - return None - - new_app = App(stack_id, application_name, action) - self.apps[stack_id] = new_app -======= self.logger.debug("AppHandler: parse app") stack_id = None @@ -118,23 +64,24 @@ class AppHandler(object): action = _app["action"] if action == "ping": - self.logger.debug("AppHandler: got ping") + self.logger.debug("got ping") elif action == "replan" or action == "migrate": - re_app = self._regenerate_app_topology(stack_id, _app, app_topology, action) + re_app = self._regenerate_app_topology(stack_id, _app, + app_topology, action) if re_app is None: self.apps[stack_id] = None self.status = "cannot locate the original plan for stack = " + stack_id return None if action == "replan": - self.logger.debug("AppHandler: got replan: " + stack_id) + self.logger.debug("got replan: " + stack_id) elif action == "migrate": - self.logger.debug("AppHandler: got migration: " + stack_id) + self.logger.debug("got migration: " + stack_id) app_id = app_topology.set_app_topology(re_app) if app_id is None: - self.logger.error("AppHandler: " + app_topology.status) + self.logger.error(app_topology.status) self.status = app_topology.status self.apps[stack_id] = None return None @@ -142,14 +89,13 @@ class AppHandler(object): app_id = app_topology.set_app_topology(_app) if app_id is None: - self.logger.error("AppHandler: " + app_topology.status) + self.logger.error(app_topology.status) self.status = app_topology.status self.apps[stack_id] = None return None new_app = App(stack_id, application_name, action) self.apps[stack_id] = new_app ->>>>>>> c095458... Improve delay with fine-grained locking return app_topology @@ -173,41 +119,15 @@ class AppHandler(object): self.apps[v.app_uuid].add_vgroup(v, hg.name) if self._store_app_placements() is False: - # NOTE: ignore? pass def _store_app_placements(self): - (app_logfile, last_index, mode) = util.get_last_logfile( - self.config.app_log_loc, self.config.max_log_size, - self.config.max_num_of_logs, self.resource.datacenter.name, - self.last_log_index) - self.last_log_index = last_index - - # TODO(UNKNOWN): error handling - - logging = open(self.config.app_log_loc + app_logfile, mode) - - for appk, app in self.apps.iteritems(): - json_log = app.log_in_info() - log_data = json.dumps(json_log) - - logging.write(log_data) - logging.write("\n") - - logging.close() - - self.logger.info("AppHandler: log app in " + app_logfile) if self.db is not None: for appk, app in self.apps.iteritems(): json_info = app.get_json_info() if self.db.add_app(appk, json_info) is False: return False - - if self.db.update_app_log_index(self.resource.datacenter.name, - self.last_log_index) is False: - return False - return True def remove_placement(self): @@ -217,7 +137,6 @@ class AppHandler(object): if self.db.add_app(appk, None) is False: self.logger.error("AppHandler: error while adding app " "info to MUSIC") - # NOTE: ignore? def get_vm_info(self, _s_uuid, _h_uuid, _host): """Return vm_info from database.""" @@ -241,11 +160,11 @@ class AppHandler(object): old_app = self.db.get_app_info(_stack_id) if old_app is None: self.status = "error while getting old_app from MUSIC" - self.logger.error("AppHandler: " + self.status) + self.logger.error(self.status) return None elif len(old_app) == 0: self.status = "cannot find the old app in MUSIC" - self.logger.error("AppHandler: " + self.status) + self.logger.error(self.status) return None re_app["action"] = "create" diff --git a/valet/engine/optimizer/app_manager/app_topology_parser.py b/valet/engine/optimizer/app_manager/app_topology_parser.py index 9601b61..56ff359 100755 --- a/valet/engine/optimizer/app_manager/app_topology_parser.py +++ b/valet/engine/optimizer/app_manager/app_topology_parser.py @@ -101,7 +101,7 @@ class Parser(object): vms[vm.uuid] = vm - self.logger.debug("Parser: get a vm = " + vm.name) + self.logger.debug("vm = " + vm.uuid) elif r["type"] == "OS::Cinder::Volume": self.logger.warn("Parser: do nothing for volume at this " @@ -149,7 +149,7 @@ class Parser(object): vgroups[vgroup.uuid] = vgroup - self.logger.debug("Parser: get a group = " + vgroup.name) + self.logger.debug("group = " + vgroup.name) vgroup_captured = True self._set_vm_links(_elements, vms) @@ -159,7 +159,7 @@ class Parser(object): self._set_total_link_capacities(vms, volumes) - self.logger.debug("Parser: all vms parsed") + self.logger.debug("all vms parsed") if self._merge_diversity_groups(_elements, vgroups, vms, volumes) \ is False: @@ -184,7 +184,7 @@ class Parser(object): self._set_vgroup_links(vgroup, vgroups, vms, volumes) if vgroup_captured is True: - self.logger.debug("Parser: all groups resolved") + self.logger.debug("all groups resolved") return vgroups, vms, volumes diff --git a/valet/engine/optimizer/db_connect/music_handler.py b/valet/engine/optimizer/db_connect/music_handler.py index ad76eeb..eedfecf 100644 --- a/valet/engine/optimizer/db_connect/music_handler.py +++ b/valet/engine/optimizer/db_connect/music_handler.py @@ -39,8 +39,6 @@ class MusicHandler(object): self.music = None - self.logger.debug("MusicHandler.__init__: mode = " + self.config.mode) - if self.config.mode.startswith("sim"): self.music = Music() elif self.config.mode.startswith("live"): @@ -60,11 +58,9 @@ class MusicHandler(object): try: self.music.create_keyspace(self.config.db_keyspace) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False - self.logger.info("MusicHandler.init_db: create table") - schema = { 'stack_id': 'text', 'request': 'text', @@ -74,7 +70,7 @@ class MusicHandler(object): self.music.create_table(self.config.db_keyspace, self.config.db_request_table, schema) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False schema = { @@ -86,7 +82,7 @@ class MusicHandler(object): self.music.create_table(self.config.db_keyspace, self.config.db_response_table, schema) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False schema = { @@ -100,7 +96,7 @@ class MusicHandler(object): self.music.create_table(self.config.db_keyspace, self.config.db_event_table, schema) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False schema = { @@ -112,7 +108,7 @@ class MusicHandler(object): self.music.create_table(self.config.db_keyspace, self.config.db_resource_table, schema) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False schema = { @@ -124,7 +120,7 @@ class MusicHandler(object): self.music.create_table(self.config.db_keyspace, self.config.db_app_table, schema) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False schema = { @@ -136,7 +132,7 @@ class MusicHandler(object): self.music.create_table(self.config.db_keyspace, self.config.db_app_index_table, schema) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False schema = { @@ -148,7 +144,7 @@ class MusicHandler(object): self.music.create_table(self.config.db_keyspace, self.config.db_resource_index_table, schema) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False schema = { @@ -161,7 +157,7 @@ class MusicHandler(object): self.music.create_table(self.config.db_keyspace, self.config.db_uuid_table, schema) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False return True @@ -180,7 +176,7 @@ class MusicHandler(object): events = self.music.read_all_rows(self.config.db_keyspace, self.config.db_event_table) except Exception as e: - self.logger.error("MUSIC error while reading events: " + str(e)) + self.logger.error("DB:event: " + str(e)) return None if len(events) > 0: @@ -303,9 +299,6 @@ class MusicHandler(object): for e in event_list: e.set_data() - self.logger.debug("MusicHandler.get_events: event (" + - e.event_id + ") is parsed") - if e.method == "object_action": if e.object_name == 'Instance': if e.uuid is None or e.uuid == "none" or \ @@ -342,7 +335,7 @@ class MusicHandler(object): self.config.db_event_table, 'timestamp', _event_id) except Exception as e: - self.logger.error("MUSIC error while deleting event: " + str(e)) + self.logger.error("DB: while deleting event: " + str(e)) return False return True @@ -357,18 +350,13 @@ class MusicHandler(object): row = self.music.read_row(self.config.db_keyspace, self.config.db_uuid_table, 'uuid', _uuid) except Exception as e: - self.logger.error("MUSIC error while reading uuid: " + str(e)) + self.logger.error("DB: while reading uuid: " + str(e)) return None if len(row) > 0: h_uuid = row[row.keys()[0]]['h_uuid'] s_uuid = row[row.keys()[0]]['s_uuid'] - self.logger.info("MusicHandler.get_uuid: get heat uuid (" + - h_uuid + ") for uuid = " + _uuid) - else: - self.logger.debug("MusicHandler.get_uuid: heat uuid not found") - return h_uuid, s_uuid def put_uuid(self, _e): @@ -396,11 +384,9 @@ class MusicHandler(object): self.music.create_row(self.config.db_keyspace, self.config.db_uuid_table, data) except Exception as e: - self.logger.error("MUSIC error while inserting uuid: " + str(e)) + self.logger.error("DB: while inserting uuid: " + str(e)) return False - self.logger.info("MusicHandler.put_uuid: uuid (" + _e.uuid + ") added") - return True def delete_uuid(self, _k): @@ -410,7 +396,7 @@ class MusicHandler(object): self.config.db_uuid_table, 'uuid', _k) except Exception as e: - self.logger.error("MUSIC error while deleting uuid: " + str(e)) + self.logger.error("DB: while deleting uuid: " + str(e)) return False return True @@ -424,7 +410,7 @@ class MusicHandler(object): requests = self.music.read_all_rows(self.config.db_keyspace, self.config.db_request_table) except Exception as e: - self.logger.error("MUSIC error while reading requests: " + str(e)) + self.logger.error("DB: while reading requests: " + str(e)) return None if len(requests) > 0: @@ -456,9 +442,6 @@ class MusicHandler(object): "result: " + str(e)) return False - self.logger.info("MusicHandler.put_result: " + appk + - " placement result added") - for appk in _result.keys(): try: self.music.delete_row_eventually(self.config.db_keyspace, @@ -469,9 +452,6 @@ class MusicHandler(object): "request: " + str(e)) return False - self.logger.info("MusicHandler.put_result: " + - appk + " placement request deleted") - return True def get_resource_status(self, _k): @@ -492,9 +472,6 @@ class MusicHandler(object): str_resource = row[row.keys()[0]]['resource'] json_resource = json.loads(str_resource) - self.logger.info("MusicHandler.get_resource_status: get resource " - "status") - return json_resource def update_resource_status(self, _k, _status): @@ -584,12 +561,9 @@ class MusicHandler(object): self.music.create_row(self.config.db_keyspace, self.config.db_resource_table, data) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False - self.logger.info("MusicHandler.update_resource_status: resource status " - "updated") - return True def update_resource_log_index(self, _k, _index): @@ -608,9 +582,6 @@ class MusicHandler(object): "index: " + str(e)) return False - self.logger.info("MusicHandler.update_resource_log_index: resource log " - "index updated") - return True def update_app_log_index(self, _k, _index): @@ -629,9 +600,6 @@ class MusicHandler(object): str(e)) return False - self.logger.info("MusicHandler.update_app_log_index: app log index " - "updated") - return True def add_app(self, _k, _app_data): @@ -641,10 +609,10 @@ class MusicHandler(object): self.config.db_keyspace, self.config.db_app_table, 'stack_id', _k) except Exception as e: - self.logger.error("MUSIC error while deleting app: " + str(e)) + self.logger.error("DB: while deleting app: " + str(e)) return False - self.logger.info("MusicHandler.add_app: app deleted") + # self.logger.info("DB: app deleted") if _app_data is not None: data = { @@ -656,10 +624,10 @@ class MusicHandler(object): self.music.create_row(self.config.db_keyspace, self.config.db_app_table, data) except Exception as e: - self.logger.error("MUSIC error while inserting app: " + str(e)) + self.logger.error("DB: while inserting app: " + str(e)) return False - self.logger.info("MusicHandler.add_app: app added") + # self.logger.info("DB: app added") return True @@ -673,7 +641,7 @@ class MusicHandler(object): self.config.db_app_table, 'stack_id', _s_uuid) except Exception as e: - self.logger.error("MUSIC error while reading app info: " + str(e)) + self.logger.error("DB: while reading app info: " + str(e)) return None if len(row) > 0: @@ -696,7 +664,7 @@ class MusicHandler(object): self.config.db_app_table, 'stack_id', _s_uuid) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return None if len(row) > 0: @@ -712,14 +680,11 @@ class MusicHandler(object): vm["host"] = _host self.logger.warn("db: conflicted placement " "decision from Ostro") - # TODO(UNKOWN): affinity, diversity, - # exclusivity check + # TODO(GY): affinity, diversity, exclusivity validation check updated = True - else: - self.logger.debug("db: placement as expected") else: vm["status"] = "scheduled" - self.logger.warn("db: vm was deleted") + self.logger.warn("DB: vm was deleted") updated = True vm_info = vm @@ -749,7 +714,7 @@ class MusicHandler(object): self.config.db_app_table, 'stack_id', _s_uuid) except Exception as e: - self.logger.error("MUSIC error: " + str(e)) + self.logger.error("DB: " + str(e)) return False if len(row) > 0: @@ -761,16 +726,15 @@ class MusicHandler(object): if vmk == _h_uuid: if vm["status"] != "deleted": vm["status"] = "deleted" - self.logger.debug("db: deleted marked") + self.logger.debug("DB: deleted marked") updated = True else: - self.logger.warn("db: vm was already deleted") + self.logger.warn("DB: vm was already deleted") break else: self.logger.error("MusicHandler.update_vm_info: vm is missing " "from stack") - else: self.logger.warn("MusicHandler.update_vm_info: not found stack for " "update = " + _s_uuid) diff --git a/valet/engine/optimizer/ostro/constraint_solver.py b/valet/engine/optimizer/ostro/constraint_solver.py index 019cb03..e4f28ec 100755 --- a/valet/engine/optimizer/ostro/constraint_solver.py +++ b/valet/engine/optimizer/ostro/constraint_solver.py @@ -60,7 +60,7 @@ class ConstraintSolver(object): candidate_list.append(r) if len(candidate_list) == 0: self.status = "no candidate for node = " + _n.node.name - self.logger.warn("ConstraintSolver: " + self.status) + self.logger.warn(self.status) return candidate_list else: self.logger.debug("ConstraintSolver: num of candidates = " + @@ -79,7 +79,7 @@ class ConstraintSolver(object): return candidate_list else: self.logger.debug("ConstraintSolver: done availability_" - "zone constraint") + "zone constraint " + str(len(candidate_list))) """Host aggregate constraint.""" if isinstance(_n.node, VGroup) or isinstance(_n.node, VM): @@ -92,7 +92,7 @@ class ConstraintSolver(object): return candidate_list else: self.logger.debug("ConstraintSolver: done host_aggregate " - "constraint") + "constraint " + str(len(candidate_list))) """CPU capacity constraint.""" if isinstance(_n.node, VGroup) or isinstance(_n.node, VM): @@ -104,7 +104,7 @@ class ConstraintSolver(object): return candidate_list else: self.logger.debug("ConstraintSolver: done cpu capacity " - "constraint") + "constraint " + str(len(candidate_list))) """Memory capacity constraint.""" if isinstance(_n.node, VGroup) or isinstance(_n.node, VM): @@ -116,7 +116,7 @@ class ConstraintSolver(object): return candidate_list else: self.logger.debug("ConstraintSolver: done memory capacity " - "constraint") + "constraint " + str(len(candidate_list))) """Local disk capacity constraint.""" if isinstance(_n.node, VGroup) or isinstance(_n.node, VM): @@ -128,19 +128,8 @@ class ConstraintSolver(object): return candidate_list else: self.logger.debug("ConstraintSolver: done local disk capacity " - "constraint") + "constraint " + str(len(candidate_list))) - """Network bandwidth constraint.""" - self._constrain_nw_bandwidth_capacity(_level, _n, _node_placements, - candidate_list) - if len(candidate_list) == 0: - self.status = "violate nw bandwidth capacity constraint for " \ - "node = " + _n.node.name - self.logger.error("ConstraintSolver: " + self.status) - return candidate_list - else: - self.logger.debug("ConstraintSolver: done bandwidth capacity " - "constraint") """Diversity constraint.""" if len(_n.node.diversity_groups) > 0: @@ -167,7 +156,7 @@ class ConstraintSolver(object): return candidate_list else: self.logger.debug("ConstraintSolver: done diversity_group " - "constraint") + "constraint " + str(len(candidate_list))) """Exclusivity constraint.""" exclusivities = self.get_exclusivities(_n.node.exclusivity_groups, @@ -190,7 +179,7 @@ class ConstraintSolver(object): return candidate_list else: self.logger.debug("ConstraintSolver: done exclusivity " - "group constraint") + "group constraint " + str(len(candidate_list))) else: self._constrain_non_exclusivity(_level, candidate_list) if len(candidate_list) == 0: @@ -200,7 +189,7 @@ class ConstraintSolver(object): return candidate_list else: self.logger.debug("ConstraintSolver: done non-exclusivity_" - "group constraint") + "group constraint " + str(len(candidate_list))) """Affinity constraint.""" affinity_id = _n.get_affinity_id() # level:name, except name == "any" @@ -216,7 +205,7 @@ class ConstraintSolver(object): return candidate_list else: self.logger.debug("ConstraintSolver: done affinity_" - "group constraint") + "group constraintt " + str(len(candidate_list))) return candidate_list @@ -232,10 +221,6 @@ class ConstraintSolver(object): if r not in conflict_list: conflict_list.append(r) - debug_resource_name = r.get_resource_name(_level) - self.logger.debug("ConstraintSolver: not exist affinity " - "in resource = " + debug_resource_name) - _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] @@ -248,9 +233,6 @@ class ConstraintSolver(object): if r not in conflict_list: conflict_list.append(r) - debug_resource_name = r.get_resource_name(_level) - self.logger.debug("ConstraintSolver: conflict diversity " - "in resource = " + debug_resource_name) _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] @@ -279,11 +261,6 @@ class ConstraintSolver(object): if r not in conflict_list: conflict_list.append(r) - resource_name = r.get_resource_name(_level) - self.logger.debug("ConstraintSolver: conflict the " - "diversity in resource = " + - resource_name) - _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] @@ -424,10 +401,6 @@ class ConstraintSolver(object): if r not in conflict_list: conflict_list.append(r) - debug_resource_name = r.get_resource_name(_level) - self.logger.debug("ConstraintSolver: not meet aggregate " - "in resource = " + debug_resource_name) - _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] @@ -443,10 +416,6 @@ class ConstraintSolver(object): if r not in conflict_list: conflict_list.append(r) - debug_resource_name = r.get_resource_name(_level) - self.logger.debug("ConstraintSolver: not meet az in " - "resource = " + debug_resource_name) - _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] @@ -461,10 +430,6 @@ class ConstraintSolver(object): if self.check_cpu_capacity(_level, _n.node, ch) is False: conflict_list.append(ch) - debug_resource_name = ch.get_resource_name(_level) - self.logger.debug("ConstraintSolver: lack of cpu in " + - debug_resource_name) - _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] @@ -479,10 +444,6 @@ class ConstraintSolver(object): if self.check_mem_capacity(_level, _n.node, ch) is False: conflict_list.append(ch) - debug_resource_name = ch.get_resource_name(_level) - self.logger.debug("ConstraintSolver: lack of mem in " + - debug_resource_name) - _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] @@ -497,10 +458,6 @@ class ConstraintSolver(object): if self.check_local_disk_capacity(_level, _n.node, ch) is False: conflict_list.append(ch) - debug_resource_name = ch.get_resource_name(_level) - self.logger.debug("ConstraintSolver: lack of local disk in " + - debug_resource_name) - _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] @@ -515,7 +472,7 @@ class ConstraintSolver(object): if self.check_storage_availability(_level, _n.node, ch) is False: conflict_list.append(ch) - debug_resource_name = ch.get_resource_name(_level) + # debug_resource_name = ch.get_resource_name(_level) avail_storages = ch.get_avail_storages(_level) avail_disks = [] volume_classes = [] @@ -531,10 +488,6 @@ class ConstraintSolver(object): for vc in volume_classes: for _, s in avail_storages.iteritems(): if vc == "any" or s.storage_class == vc: - avail_disks.append(s.storage_avail_disk) - - self.logger.debug("ConstraintSolver: storage constrained in" - "resource = " + debug_resource_name) _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] @@ -574,10 +527,6 @@ class ConstraintSolver(object): if cr not in conflict_list: conflict_list.append(cr) - debug_resource_name = cr.get_resource_name(_level) - self.logger.debug("ConstraintSolver: bw constrained in " - "resource = " + debug_resource_name) - _candidate_list[:] = [c for c in _candidate_list if c not in conflict_list] diff --git a/valet/engine/optimizer/ostro/optimizer.py b/valet/engine/optimizer/ostro/optimizer.py index 462fa91..e140365 100755 --- a/valet/engine/optimizer/ostro/optimizer.py +++ b/valet/engine/optimizer/ostro/optimizer.py @@ -77,8 +77,6 @@ class Optimizer(object): self.logger.debug("Optimizer: search running time = " + str(end_ts - start_ts) + " sec") - self.logger.debug("Optimizer: total bandwidth = " + - str(self.search.bandwidth_usage)) self.logger.debug("Optimizer: total number of hosts = " + str(self.search.num_of_hosts)) diff --git a/valet/engine/optimizer/ostro/ostro.py b/valet/engine/optimizer/ostro/ostro.py index 973ed71..6ee4f96 100755 --- a/valet/engine/optimizer/ostro/ostro.py +++ b/valet/engine/optimizer/ostro/ostro.py @@ -89,21 +89,23 @@ class Ostro(object): self.thread_list.append(self.listener) while self.end_of_process is False: - time.sleep(1) - - event_list = self.db.get_events() - if event_list is None: - break - if len(event_list) > 0: - if self.handle_events(event_list) is False: - break + time.sleep(0.1) request_list = self.db.get_requests() if request_list is None: break + if len(request_list) > 0: if self.place_app(request_list) is False: break + else: + event_list = self.db.get_events() + if event_list is None: + break + + if len(event_list) > 0: + if self.handle_events(event_list) is False: + break self.topology.end_of_process = True self.compute.end_of_process = True @@ -164,7 +166,7 @@ class Ostro(object): def _set_topology(self): if not self.topology.set_topology(): - self.status = "datacenter configuration error" + # self.status = "datacenter configuration error" self.logger.error("failed to read datacenter topology") return False @@ -173,7 +175,7 @@ class Ostro(object): def _set_hosts(self): if not self.compute.set_hosts(): - self.status = "OpenStack (Nova) internal error" + # self.status = "OpenStack (Nova) internal error" self.logger.error("failed to read hosts from OpenStack (Nova)") return False @@ -182,7 +184,7 @@ class Ostro(object): def _set_flavors(self): if not self.compute.set_flavors(): - self.status = "OpenStack (Nova) internal error" + # self.status = "OpenStack (Nova) internal error" self.logger.error("failed to read flavors from OpenStack (Nova)") return False @@ -319,8 +321,6 @@ class Ostro(object): placement_map = self.optimizer.place(app_topology) if placement_map is None: self.status = self.optimizer.status - self.logger.debug("Ostro._place_app: error while optimizing app " - "placement: " + self.status) self.data_lock.release() return None @@ -499,6 +499,7 @@ class Ostro(object): elif e.object_name == 'ComputeNode': # Host resource is updated self.logger.debug("Ostro.handle_events: got compute event") + # NOTE: what if host is disabled? if self.resource.update_host_resources( e.host, e.status, e.vcpus, e.vcpus_used, e.mem, diff --git a/valet/engine/optimizer/ostro/search.py b/valet/engine/optimizer/ostro/search.py index 7dd1952..1d9076e 100755 --- a/valet/engine/optimizer/ostro/search.py +++ b/valet/engine/optimizer/ostro/search.py @@ -110,7 +110,7 @@ class Search(object): self.constraint_solver = ConstraintSolver(self.logger) - self.logger.info("Search: start search") + self.logger.info("start search") self._create_avail_logical_groups() self._create_avail_storage_hosts() @@ -140,7 +140,7 @@ class Search(object): self.constraint_solver = ConstraintSolver(self.logger) - self.logger.info("Search: start search for replan") + self.logger.info("start search for replan") self._create_avail_logical_groups() self._create_avail_storage_hosts() @@ -154,7 +154,7 @@ class Search(object): self._compute_resource_weights() - self.logger.debug("Search: first, place already-planned nodes") + self.logger.debug("first, place already-planned nodes") """Reconsider all vms to be migrated together.""" if len(_app_topology.exclusion_list_map) > 0: @@ -162,10 +162,10 @@ class Search(object): if self._place_planned_nodes() is False: self.status = "cannot replan VMs that was planned" - self.logger.error("Search: " + self.status) + self.logger.error(self.status) return False - self.logger.debug("Search: second, re-place not-planned nodes") + self.logger.debug("second, re-place not-planned nodes") init_level = LEVELS[len(LEVELS) - 1] (open_node_list, level) = self._open_list(self.app_topology.vms, @@ -173,7 +173,7 @@ class Search(object): self.app_topology.vgroups, init_level) if open_node_list is None: - self.logger.error("Search: fail to replan") + self.logger.error("fail to replan") return False for v, ah in self.planned_placements.iteritems(): @@ -323,16 +323,12 @@ class Search(object): avail_resources = _avail_hosts _node_list.sort(key=operator.attrgetter("sort_base"), reverse=True) - self.logger.debug("Search: level = " + _level) + self.logger.debug("level = " + _level) for on in _node_list: - self.logger.debug(" node = {}, value = {}".format(on.node.name, - on.sort_base)) - + self.logger.debug("node = {}, value = {}".format(on.node.name, + on.sort_base)) while len(_node_list) > 0: n = _node_list.pop(0) - self.logger.debug("Search: level = " + _level + - ", placing node = " + n.node.name) - best_resource = self._get_best_resource_for_planned(n, _level, avail_resources) if best_resource is not None: @@ -344,7 +340,7 @@ class Search(object): self._deduct_reservation(_level, best_resource, n) self._close_planned_placement(_level, best_resource, n.node) else: - self.logger.error("Search: fail to place already-planned VMs") + self.logger.error("fail to place already-planned VMs") return False return True @@ -384,7 +380,7 @@ class Search(object): host_name = self._get_host_of_level(_n, _level) if host_name is None: - self.logger.warn("Search: cannot find host while replanning") + self.logger.warn("cannot find host while replanning") return None avail_hosts = {} @@ -539,7 +535,7 @@ class Search(object): for lgk, lg in self.resource.logical_groups.iteritems(): if lg.status != "enabled": - self.logger.debug("Search: group (" + lg.name + ") disabled") + self.logger.debug("group (" + lg.name + ") disabled") continue lgr = LogicalGroupResource() @@ -733,21 +729,6 @@ class Search(object): elif t == "vol": self.disk_weight = float(w / denominator) - self.logger.debug("Search: placement priority weights") - for (r, w) in self.app_topology.optimization_priority: - if r == "bw": - self.logger.debug(" nw weight = " + - str(self.nw_bandwidth_weight)) - elif r == "cpu": - self.logger.debug(" cpu weight = " + str(self.CPU_weight)) - elif r == "mem": - self.logger.debug(" mem weight = " + str(self.mem_weight)) - elif r == "lvol": - self.logger.debug(" local disk weight = " + - str(self.local_disk_weight)) - elif r == "vol": - self.logger.debug(" disk weight = " + str(self.disk_weight)) - def _open_list(self, _vms, _volumes, _vgroups, _current_level): open_node_list = [] next_level = None @@ -891,9 +872,9 @@ class Search(object): self.status = self.constraint_solver.status return None - self.logger.debug("Search: candidate list") - for c in candidate_list: - self.logger.debug(" candidate = " + c.get_resource_name(_level)) + # self.logger.debug("Search: candidate list") + # for c in candidate_list: + # self.logger.debug(" candidate = " + c.get_resource_name(_level)) (target, _) = self.app_topology.optimization_priority[0] top_candidate_list = None @@ -952,7 +933,7 @@ class Search(object): if len(top_candidate_list) == 0: self.status = "no available network bandwidth left" - self.logger.error("Search: " + self.status) + self.logger.error(self.status) return None best_resource = None @@ -968,9 +949,6 @@ class Search(object): while len(top_candidate_list) > 0: cr = top_candidate_list.pop(0) - self.logger.debug("Search: try candidate = " + - cr.get_resource_name(_level)) - vms = {} volumes = {} vgroups = {} @@ -1034,7 +1012,7 @@ class Search(object): else: if len(candidate_list) == 0: self.status = "no available hosts" - self.logger.warn("Search: " + self.status) + self.logger.warn(self.status) break else: if target == "bw": @@ -1653,7 +1631,7 @@ class Search(object): lgr.group_type = "AFF" self.avail_logical_groups[lgr.name] = lgr - self.logger.debug("Search: add new affinity (" + _affinity_id + ")") + self.logger.debug("add new affinity (" + _affinity_id + ")") else: lgr = self.avail_logical_groups[_affinity_id] @@ -1860,7 +1838,7 @@ class Search(object): self._rollback_reservation(v) if _v in self.node_placements.keys(): - self.logger.debug("Search: node (" + _v.name + ") rollbacked") + self.logger.debug("node (" + _v.name + ") rollbacked") chosen_host = self.avail_hosts[self.node_placements[_v].host_name] level = self.node_placements[_v].level diff --git a/valet/engine/resource_manager/compute_manager.py b/valet/engine/resource_manager/compute_manager.py index 514df07..dee45f7 100755 --- a/valet/engine/resource_manager/compute_manager.py +++ b/valet/engine/resource_manager/compute_manager.py @@ -92,7 +92,7 @@ class ComputeManager(threading.Thread): last_trigger_mon = now.tm_mon last_trigger_mday = now.tm_mday - self.logger.info("ComputeManager: exit " + self.thread_name) + self.logger.info("exit compute_manager " + self.thread_name) def _run(self): self.logger.info("ComputeManager: --- start compute_nodes " @@ -132,7 +132,7 @@ class ComputeManager(threading.Thread): status = compute.set_hosts(hosts, logical_groups) if status != "success": - self.logger.error("ComputeManager: " + status) + # self.logger.error("ComputeManager: " + status) return False self._compute_avail_host_resources(hosts) diff --git a/valet/engine/resource_manager/resource.py b/valet/engine/resource_manager/resource.py index 88d2b31..af41456 100755 --- a/valet/engine/resource_manager/resource.py +++ b/valet/engine/resource_manager/resource.py @@ -15,15 +15,13 @@ """Resource - Handles data, metadata, status of resources.""" -import json import sys import time import traceback from valet.engine.optimizer.app_manager.app_topology_base import LEVELS -from valet.engine.optimizer.util import util as util from valet.engine.resource_manager.resource_base \ - import Datacenter, HostGroup, Host, LogicalGroup + import Datacenter, HostGroup, Host, LogicalGroup from valet.engine.resource_manager.resource_base import Flavor, Switch, Link @@ -64,6 +62,33 @@ class Resource(object): self.disk_avail = 0 self.nw_bandwidth_avail = 0 + def show_current_logical_groups(self): + for lgk, lg in self.logical_groups.iteritems(): + if lg.status == "enabled": + self.logger.debug("Resource: lg name = " + lgk) + self.logger.debug(" type = " + lg.group_type) + if lg.group_type == "AGGR": + for k in lg.metadata.keys(): + self.logger.debug(" key = " + k) + self.logger.debug(" vms") + for v in lg.vm_list: + self.logger.debug(" orch_id = " + v[0] + " uuid = " + v[2]) + self.logger.debug(" hosts") + for h, v in lg.vms_per_host.iteritems(): + self.logger.debug(" host = " + h) + self.logger.debug(" vms = " + str(len(lg.vms_per_host[h]))) + host = None + if h in self.hosts.keys(): + host = self.hosts[h] + elif h in self.host_groups.keys(): + host = self.host_groups[h] + else: + self.logger.error("Resource: lg member not exist") + if host is not None: + self.logger.debug(" status = " + host.status) + if lgk not in host.memberships.keys(): + self.logger.error("membership missing") + def bootstrap_from_db(self, _resource_status): """Return True if bootsrap resource from database successful.""" try: @@ -80,12 +105,8 @@ class Resource(object): self.logical_groups[lgk] = logical_group - if len(self.logical_groups) > 0: - self.logger.debug("Resource.bootstrap_from_db: logical_groups " - "loaded") - else: - self.logger.warn("Resource.bootstrap_from_db: no " - "logical_groups") + if len(self.logical_groups) == 0: + self.logger.warn("no logical_groups") flavors = _resource_status.get("flavors") if flavors: @@ -100,11 +121,8 @@ class Resource(object): self.flavors[fk] = flavor - if len(self.flavors) > 0: - self.logger.debug("Resource.bootstrap_from_db: flavors loaded") - else: - self.logger.error("Resource.bootstrap_from_db: fail loading " - "flavors") + if len(self.flavors) == 0: + self.logger.error("fail loading flavors") switches = _resource_status.get("switches") if switches: @@ -116,7 +134,6 @@ class Resource(object): self.switches[sk] = switch if len(self.switches) > 0: - self.logger.debug("Resource.bootstrap_from_db: switches loaded") for sk, s in switches.iteritems(): switch = self.switches[sk] @@ -143,14 +160,11 @@ class Resource(object): peer_links[plk] = plink switch.peer_links = peer_links - - self.logger.debug("Resource.bootstrap_from_db: switch links " - "loaded") else: - self.logger.error("Resource.bootstrap_from_db: fail loading " - "switches") + self.logger.error("fail loading switches") # storage_hosts + hosts = _resource_status.get("hosts") if hosts: for hk, h in hosts.iteritems(): @@ -184,12 +198,8 @@ class Resource(object): self.hosts[hk] = host - if len(self.hosts) > 0: - self.logger.debug("Resource.bootstrap_from_db: hosts " - "loaded") - else: - self.logger.error("Resource.bootstrap_from_db: fail " - "loading hosts") + if len(self.hosts) == 0: + self.logger.error("fail loading hosts") host_groups = _resource_status.get("host_groups") if host_groups: @@ -220,12 +230,8 @@ class Resource(object): self.host_groups[hgk] = host_group - if len(self.host_groups) > 0: - self.logger.debug("Resource.bootstrap_from_db: host_groups " - "loaded") - else: - self.logger.error("Resource.bootstrap_from_db: fail " - "loading host_groups") + if len(self.host_groups) == 0: + self.logger.error("fail loading host_groups") dc = _resource_status.get("datacenter") if dc: @@ -259,12 +265,8 @@ class Resource(object): elif ck in self.hosts.keys(): self.datacenter.resources[ck] = self.hosts[ck] - if len(self.datacenter.resources) > 0: - self.logger.debug("Resource.bootstrap_from_db: datacenter " - "loaded") - else: - self.logger.error("Resource.bootstrap_from_db: fail " - "loading datacenter") + if len(self.datacenter.resources) == 0: + self.logger.error("fail loading datacenter") hgs = _resource_status.get("host_groups") if hgs: @@ -283,9 +285,6 @@ class Resource(object): elif ck in self.host_groups.keys(): host_group.child_resources[ck] = self.host_groups[ck] - self.logger.debug("Resource.bootstrap_from_db: " - "host_groups'layout loaded") - hs = _resource_status.get("hosts") if hs: for hk, h in hs.iteritems(): @@ -297,19 +296,12 @@ class Resource(object): elif pk in self.host_groups.keys(): host.host_group = self.host_groups[pk] - self.logger.debug("Resource.bootstrap_from_db: " - "hosts'layout loaded") - self._update_compute_avail() self._update_storage_avail() self._update_nw_bandwidth_avail() - self.logger.debug("Resource.bootstrap_from_db: " - "resource availability updated") - except Exception: - self.logger.error("Resource.bootstrap_from_db - " - "FAILED:" + traceback.format_exc()) + self.logger.error("Resource: bootstrap_from_db:" + traceback.format_exc()) return True @@ -321,6 +313,9 @@ class Resource(object): self._update_storage_avail() self._update_nw_bandwidth_avail() + # for test + # self.show_current_logical_groups() + if store is False: return True @@ -530,14 +525,6 @@ class Resource(object): if self.datacenter.last_link_update > self.current_timestamp: last_update_time = self.datacenter.last_link_update - (resource_logfile, last_index, mode) = util.get_last_logfile( - self.config.resource_log_loc, self.config.max_log_size, - self.config.max_num_of_logs, self.datacenter.name, - self.last_log_index) - self.last_log_index = last_index - - logging = open(self.config.resource_log_loc + resource_logfile, mode) - json_logging = {} json_logging['timestamp'] = last_update_time @@ -556,23 +543,10 @@ class Resource(object): if datacenter_update is not None: json_logging['datacenter'] = datacenter_update - logged_data = json.dumps(json_logging) - - logging.write(logged_data) - logging.write("\n") - - logging.close() - - self.logger.info("Resource._store_topology_updates: log resource " - "status in " + resource_logfile) - if self.db is not None: if self.db.update_resource_status(self.datacenter.name, json_logging) is False: return None - if self.db.update_resource_log_index(self.datacenter.name, - self.last_log_index) is False: - return None return last_update_time @@ -980,17 +954,9 @@ class Resource(object): ram_allocation_ratio = self.config.default_ram_allocation_ratio static_ram_standby_ratio = 0 - if self.config.static_mem_standby_ratio > 0: - static_ram_standby_ratio = \ - float(self.config.static_mem_standby_ratio) / float(100) host.compute_avail_mem(ram_allocation_ratio, static_ram_standby_ratio) - self.logger.debug("Resource.compute_avail_resources: host (" + - hk + ")'s total_mem = " + - str(host.mem_cap) + ", avail_mem = " + - str(host.avail_mem_cap)) - cpu_allocation_ratio = 1.0 if len(cpu_allocation_ratio_list) > 0: cpu_allocation_ratio = min(cpu_allocation_ratio_list) @@ -999,17 +965,9 @@ class Resource(object): cpu_allocation_ratio = self.config.default_cpu_allocation_ratio static_cpu_standby_ratio = 0 - if self.config.static_cpu_standby_ratio > 0: - static_cpu_standby_ratio = \ - float(self.config.static_cpu_standby_ratio) / float(100) host.compute_avail_vCPUs(cpu_allocation_ratio, static_cpu_standby_ratio) - self.logger.debug("Resource.compute_avail_resources: host (" + - hk + ")'s total_vCPUs = " + - str(host.vCPUs) + ", avail_vCPUs = " + - str(host.avail_vCPUs)) - disk_allocation_ratio = 1.0 if len(disk_allocation_ratio_list) > 0: disk_allocation_ratio = min(disk_allocation_ratio_list) @@ -1019,18 +977,10 @@ class Resource(object): self.config.default_disk_allocation_ratio static_disk_standby_ratio = 0 - if self.config.static_local_disk_standby_ratio > 0: - static_disk_standby_ratio = \ - float(self.config.static_local_disk_standby_ratio) / float(100) host.compute_avail_disk(disk_allocation_ratio, static_disk_standby_ratio) - self.logger.debug("Resource.compute_avail_resources: host (" + - hk + ")'s total_local_disk = " + - str(host.local_disk_cap) + ", avail_local_disk = " + - str(host.avail_local_disk_cap)) - def get_flavor(self, _id): """Return flavor according to name passed in.""" flavor = None diff --git a/valet/engine/resource_manager/topology_manager.py b/valet/engine/resource_manager/topology_manager.py index 559211a..eb81fca 100755 --- a/valet/engine/resource_manager/topology_manager.py +++ b/valet/engine/resource_manager/topology_manager.py @@ -88,7 +88,7 @@ class TopologyManager(threading.Thread): last_trigger_mon = now.tm_mon last_trigger_mday = now.tm_mday - self.logger.info("TopologyManager: exit " + self.thread_name) + self.logger.info("exit topology_manager " + self.thread_name) def _run(self): @@ -104,7 +104,7 @@ class TopologyManager(threading.Thread): # TODO(GY): ignore? pass - self.logger.info("TopologyManager: --- done topology status update ---") + self.logger.info("--- done topology status update ---") def set_topology(self): """Return True if datacenter topology successfully setup.""" @@ -125,7 +125,7 @@ class TopologyManager(threading.Thread): status = topology.set_topology(datacenter, host_groups, hosts, self.resource.hosts, switches) if status != "success": - self.logger.error("TopologyManager: " + status) + # self.logger.error("TopologyManager: " + status) return False self.data_lock.acquire() @@ -421,9 +421,6 @@ class TopologyManager(threading.Thread): self.logger.warn("TopologyManager: host_group (" + _rhg.name + ") updated (enabled)") - if _rhg.parent_resource is None or \ - _hg.parent_resource.name != _rhg.parent_resource.name: - if _hg.parent_resource.name in self.resource.host_groups.keys(): _rhg.parent_resource = \ self.resource.host_groups[_hg.parent_resource.name]