Enhanced Drydock Health Check

- Added a database connection check to the health check.
- Added a MaaS connection check to the health check.

Change-Id: I6c771a9ed6a278eb75dbc0f2e503088ff9046149
This commit is contained in:
Aaron Sheffield 2018-03-12 12:21:01 -05:00
parent b8e5d932d3
commit cff99e4d1c
5 changed files with 72 additions and 6 deletions

View File

@ -55,7 +55,8 @@ def start_api(state_manager=None, ingester=None, orchestrator=None):
# v1.0 of Drydock API
v1_0_routes = [
# API for managing orchestrator tasks
('/health', HealthResource()),
('/health', HealthResource(state_manager=state_manager,
orchestrator=orchestrator)),
('/tasks',
TasksResource(state_manager=state_manager,
orchestrator=orchestrator)),

View File

@ -12,8 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import falcon
import json
from drydock_provisioner.control.base import BaseResource
from drydock_provisioner.drivers.node.maasdriver.actions.node import ValidateNodeServices
from drydock_provisioner.objects.fields import ActionResult
import drydock_provisioner.objects.fields as hd_fields
class HealthResource(BaseResource):
@ -21,9 +25,49 @@ class HealthResource(BaseResource):
Return empty response/body to show
that Drydock is healthy
"""
def __init__(self, state_manager=None, orchestrator=None, **kwargs):
"""Object initializer.
:param state_manager: instance of Drydock state_manager
"""
super().__init__(**kwargs)
self.state_manager = state_manager
self.orchestrator = orchestrator
def on_get(self, req, resp):
"""
It really does nothing right now. It may do more later
Returns 204 on success, otherwise 500 with a response body.
"""
resp.status = falcon.HTTP_204
healthy = True
# Test database connection
try:
now = self.state_manager.get_now()
if now is None:
raise Exception('None received from database for now()')
except Exception as ex:
healthy = False
resp.body = json.dumps({
'type': 'error',
'message': 'Database error',
'retry': True
})
resp.status = falcon.HTTP_500
# Test MaaS connection
try:
task = self.orchestrator.create_task(action=hd_fields.OrchestratorAction.Noop)
maas_validation = ValidateNodeServices(task, self.orchestrator, self.state_manager)
maas_validation.start()
if maas_validation.task.get_status() == ActionResult.Failure:
raise Exception('MaaS task failure')
except Exception as ex:
healthy = False
resp.body = json.dumps({
'type': 'error',
'message': 'MaaS error',
'retry': True
})
resp.status = falcon.HTTP_500
if healthy:
resp.status = falcon.HTTP_204

View File

@ -66,7 +66,7 @@ class ValidateNodeServices(BaseMaasAction):
ctx_type='NA')
self.task.success()
if self.maas_client.test_authentication():
self.logger.info("Able to authenitcate with MaaS API.")
self.logger.info("Able to authenticate with MaaS API.")
self.task.add_status_msg(
msg='Able to authenticate with MaaS API.',
error=False,

View File

@ -667,3 +667,24 @@ class DrydockState(object):
except Exception as ex:
self.logger.error("Error selecting build data.", exc_info=ex)
raise errors.BuildDataError("Error selecting build data.")
def get_now(self):
"""Query the database for now() from dual.
"""
try:
with self.db_engine.connect() as conn:
query = sql.text("SELECT now()")
rs = conn.execute(query)
r = rs.first()
if r is not None and r.now:
return r.now
else:
return None
except Exception as ex:
self.logger.error(str(ex))
self.logger.error(
"Error querying for now()",
exc_info=True)
return None

View File

@ -18,8 +18,8 @@ from drydock_provisioner.control.health import HealthResource
import falcon
def test_get_health(mocker):
api = HealthResource()
def test_get_health(mocker, deckhand_orchestrator, drydock_state):
api = HealthResource(state_manager=drydock_state, orchestrator=deckhand_orchestrator)
# Configure mocked request and response
req = mocker.MagicMock(spec=falcon.Request)