Prevent downtime when client cert is regenerated
When client certificate is regenerated, keepalive connection to NSX endpoint will be broken. This patch will detect this and invoke a callback to give nsxlib user a chance to reload the cert; then regenerate connection pool to restore connectivity. Change-Id: I0a334df4dd05feb784b9ff8bdc988ac41878863c
This commit is contained in:
parent
2ac012456d
commit
2b36887f5c
@ -94,6 +94,9 @@ class NsxLib(object):
|
|||||||
def reinitialize_cluster(self, resource, event, trigger, **kwargs):
|
def reinitialize_cluster(self, resource, event, trigger, **kwargs):
|
||||||
self.cluster._reinit_cluster()
|
self.cluster._reinit_cluster()
|
||||||
|
|
||||||
|
def subscribe(self, callback, event):
|
||||||
|
self.cluster.subscribe(callback, event)
|
||||||
|
|
||||||
|
|
||||||
class NsxLibPortMirror(utils.NsxLibApiBase):
|
class NsxLibPortMirror(utils.NsxLibApiBase):
|
||||||
|
|
||||||
|
@ -24,12 +24,27 @@ from vmware_nsxlib.v3 import utils
|
|||||||
|
|
||||||
LOG = log.getLogger(__name__)
|
LOG = log.getLogger(__name__)
|
||||||
|
|
||||||
ERRORS = {requests.codes.NOT_FOUND: exceptions.ResourceNotFound,
|
|
||||||
requests.codes.PRECONDITION_FAILED: exceptions.StaleRevision}
|
|
||||||
DEFAULT_ERROR = exceptions.ManagerError
|
|
||||||
NULL_CURSOR_PREFIX = '0000'
|
NULL_CURSOR_PREFIX = '0000'
|
||||||
|
|
||||||
|
|
||||||
|
def http_error_to_exception(status_code, error_code):
|
||||||
|
errors = {requests.codes.NOT_FOUND: exceptions.ResourceNotFound,
|
||||||
|
requests.codes.PRECONDITION_FAILED: exceptions.StaleRevision,
|
||||||
|
requests.codes.INTERNAL_SERVER_ERROR:
|
||||||
|
{'99': exceptions.ClientCertificateNotTrusted}}
|
||||||
|
|
||||||
|
if status_code in errors:
|
||||||
|
if isinstance(errors[status_code], dict):
|
||||||
|
# choose based on error code
|
||||||
|
if error_code in errors[status_code]:
|
||||||
|
return errors[status_code][error_code]
|
||||||
|
else:
|
||||||
|
return errors[status_code]
|
||||||
|
|
||||||
|
# default exception
|
||||||
|
return exceptions.ManagerError
|
||||||
|
|
||||||
|
|
||||||
class RESTClient(object):
|
class RESTClient(object):
|
||||||
|
|
||||||
_VERB_RESP_CODES = {
|
_VERB_RESP_CODES = {
|
||||||
@ -96,7 +111,7 @@ class RESTClient(object):
|
|||||||
|
|
||||||
def _raise_error(self, status_code, operation, result_msg,
|
def _raise_error(self, status_code, operation, result_msg,
|
||||||
error_code=None):
|
error_code=None):
|
||||||
error = ERRORS.get(status_code, DEFAULT_ERROR)
|
error = http_error_to_exception(status_code, error_code)
|
||||||
raise error(manager='', operation=operation, details=result_msg,
|
raise error(manager='', operation=operation, details=result_msg,
|
||||||
error_code=error_code)
|
error_code=error_code)
|
||||||
|
|
||||||
@ -223,7 +238,7 @@ class NSX3Client(JSONRESTClient):
|
|||||||
def _raise_error(self, status_code, operation, result_msg,
|
def _raise_error(self, status_code, operation, result_msg,
|
||||||
error_code=None):
|
error_code=None):
|
||||||
"""Override the Rest client errors to add the manager IPs"""
|
"""Override the Rest client errors to add the manager IPs"""
|
||||||
error = ERRORS.get(status_code, DEFAULT_ERROR)
|
error = http_error_to_exception(status_code, error_code)
|
||||||
raise error(manager=self.nsx_api_managers,
|
raise error(manager=self.nsx_api_managers,
|
||||||
operation=operation,
|
operation=operation,
|
||||||
details=result_msg,
|
details=result_msg,
|
||||||
|
@ -33,6 +33,7 @@ from requests import exceptions as requests_exceptions
|
|||||||
from vmware_nsxlib._i18n import _, _LI, _LW
|
from vmware_nsxlib._i18n import _, _LI, _LW
|
||||||
from vmware_nsxlib.v3 import client as nsx_client
|
from vmware_nsxlib.v3 import client as nsx_client
|
||||||
from vmware_nsxlib.v3 import exceptions
|
from vmware_nsxlib.v3 import exceptions
|
||||||
|
from vmware_nsxlib.v3 import nsx_constants
|
||||||
|
|
||||||
|
|
||||||
LOG = log.getLogger(__name__)
|
LOG = log.getLogger(__name__)
|
||||||
@ -206,6 +207,12 @@ class Endpoint(object):
|
|||||||
self._state = EndpointState.INITIALIZED
|
self._state = EndpointState.INITIALIZED
|
||||||
self._last_updated = datetime.datetime.now()
|
self._last_updated = datetime.datetime.now()
|
||||||
|
|
||||||
|
def regenerate_pool(self):
|
||||||
|
self.pool = pools.Pool(min_size=self.pool.min_size,
|
||||||
|
max_size=self.pool.max_size,
|
||||||
|
order_as_stack=True,
|
||||||
|
create=self.pool.create)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def last_updated(self):
|
def last_updated(self):
|
||||||
return self._last_updated
|
return self._last_updated
|
||||||
@ -260,6 +267,7 @@ class ClusteredAPI(object):
|
|||||||
|
|
||||||
self._http_provider = http_provider
|
self._http_provider = http_provider
|
||||||
self._keepalive_interval = keepalive_interval
|
self._keepalive_interval = keepalive_interval
|
||||||
|
self._callbacks = {}
|
||||||
|
|
||||||
def _init_cluster(*args, **kwargs):
|
def _init_cluster(*args, **kwargs):
|
||||||
self._init_endpoints(providers,
|
self._init_endpoints(providers,
|
||||||
@ -358,11 +366,30 @@ class ClusteredAPI(object):
|
|||||||
if up == len(self._endpoints)
|
if up == len(self._endpoints)
|
||||||
else ClusterHealth.ORANGE)
|
else ClusterHealth.ORANGE)
|
||||||
|
|
||||||
|
def subscribe(self, callback, event):
|
||||||
|
if event in self._callbacks:
|
||||||
|
self._callbacks[event].append(callback)
|
||||||
|
else:
|
||||||
|
self._callbacks[event] = [callback]
|
||||||
|
|
||||||
|
def _notify(self, event):
|
||||||
|
if event in self._callbacks:
|
||||||
|
for callback in self._callbacks[event]:
|
||||||
|
callback()
|
||||||
|
|
||||||
def _validate(self, endpoint):
|
def _validate(self, endpoint):
|
||||||
try:
|
try:
|
||||||
with endpoint.pool.item() as conn:
|
with endpoint.pool.item() as conn:
|
||||||
self._http_provider.validate_connection(self, endpoint, conn)
|
self._http_provider.validate_connection(self, endpoint, conn)
|
||||||
endpoint.set_state(EndpointState.UP)
|
endpoint.set_state(EndpointState.UP)
|
||||||
|
except exceptions.ClientCertificateNotTrusted:
|
||||||
|
LOG.warning(_LW("Failed to validate API cluster endpoint "
|
||||||
|
"'%(ep)s' due to untrusted client certificate"),
|
||||||
|
{'ep': endpoint})
|
||||||
|
# allow nsxlib user to reload certificate that possibly changed
|
||||||
|
self._notify(nsx_constants.ON_CLIENT_CERT_UNTRUSTED)
|
||||||
|
# regenerate connection pool based on new certificate
|
||||||
|
endpoint.regenerate_pool()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
endpoint.set_state(EndpointState.DOWN)
|
endpoint.set_state(EndpointState.DOWN)
|
||||||
LOG.warning(_LW("Failed to validate API cluster endpoint "
|
LOG.warning(_LW("Failed to validate API cluster endpoint "
|
||||||
|
@ -92,6 +92,10 @@ class StaleRevision(ManagerError):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ClientCertificateNotTrusted(ManagerError):
|
||||||
|
message = _("Certificate not trusted")
|
||||||
|
|
||||||
|
|
||||||
class ServiceClusterUnavailable(ManagerError):
|
class ServiceClusterUnavailable(ManagerError):
|
||||||
message = _("Service cluster: '%(cluster_id)s' is unavailable. Please, "
|
message = _("Service cluster: '%(cluster_id)s' is unavailable. Please, "
|
||||||
"check NSX setup and/or configuration")
|
"check NSX setup and/or configuration")
|
||||||
|
@ -110,3 +110,6 @@ ERR_CODE_IPAM_IP_NOT_IN_POOL = 5110
|
|||||||
ERR_CODE_IPAM_RANGE_MODIFY = 5602
|
ERR_CODE_IPAM_RANGE_MODIFY = 5602
|
||||||
ERR_CODE_IPAM_RANGE_DELETE = 5015
|
ERR_CODE_IPAM_RANGE_DELETE = 5015
|
||||||
ERR_CODE_IPAM_RANGE_SHRUNK = 5016
|
ERR_CODE_IPAM_RANGE_SHRUNK = 5016
|
||||||
|
|
||||||
|
# NsxLib events
|
||||||
|
ON_CLIENT_CERT_UNTRUSTED = 'on_client_cert_untrusted'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user