Prevent downtime when client cert is regenerated
When client certificate is regenerated, keepalive connection to NSX endpoint will be broken. This patch will detect this and invoke a callback to give nsxlib user a chance to reload the cert; then regenerate connection pool to restore connectivity. Change-Id: I0a334df4dd05feb784b9ff8bdc988ac41878863c
This commit is contained in:
parent
2ac012456d
commit
2b36887f5c
@ -94,6 +94,9 @@ class NsxLib(object):
|
||||
def reinitialize_cluster(self, resource, event, trigger, **kwargs):
|
||||
self.cluster._reinit_cluster()
|
||||
|
||||
def subscribe(self, callback, event):
|
||||
self.cluster.subscribe(callback, event)
|
||||
|
||||
|
||||
class NsxLibPortMirror(utils.NsxLibApiBase):
|
||||
|
||||
|
@ -24,12 +24,27 @@ from vmware_nsxlib.v3 import utils
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
ERRORS = {requests.codes.NOT_FOUND: exceptions.ResourceNotFound,
|
||||
requests.codes.PRECONDITION_FAILED: exceptions.StaleRevision}
|
||||
DEFAULT_ERROR = exceptions.ManagerError
|
||||
NULL_CURSOR_PREFIX = '0000'
|
||||
|
||||
|
||||
def http_error_to_exception(status_code, error_code):
|
||||
errors = {requests.codes.NOT_FOUND: exceptions.ResourceNotFound,
|
||||
requests.codes.PRECONDITION_FAILED: exceptions.StaleRevision,
|
||||
requests.codes.INTERNAL_SERVER_ERROR:
|
||||
{'99': exceptions.ClientCertificateNotTrusted}}
|
||||
|
||||
if status_code in errors:
|
||||
if isinstance(errors[status_code], dict):
|
||||
# choose based on error code
|
||||
if error_code in errors[status_code]:
|
||||
return errors[status_code][error_code]
|
||||
else:
|
||||
return errors[status_code]
|
||||
|
||||
# default exception
|
||||
return exceptions.ManagerError
|
||||
|
||||
|
||||
class RESTClient(object):
|
||||
|
||||
_VERB_RESP_CODES = {
|
||||
@ -96,7 +111,7 @@ class RESTClient(object):
|
||||
|
||||
def _raise_error(self, status_code, operation, result_msg,
|
||||
error_code=None):
|
||||
error = ERRORS.get(status_code, DEFAULT_ERROR)
|
||||
error = http_error_to_exception(status_code, error_code)
|
||||
raise error(manager='', operation=operation, details=result_msg,
|
||||
error_code=error_code)
|
||||
|
||||
@ -223,7 +238,7 @@ class NSX3Client(JSONRESTClient):
|
||||
def _raise_error(self, status_code, operation, result_msg,
|
||||
error_code=None):
|
||||
"""Override the Rest client errors to add the manager IPs"""
|
||||
error = ERRORS.get(status_code, DEFAULT_ERROR)
|
||||
error = http_error_to_exception(status_code, error_code)
|
||||
raise error(manager=self.nsx_api_managers,
|
||||
operation=operation,
|
||||
details=result_msg,
|
||||
|
@ -33,6 +33,7 @@ from requests import exceptions as requests_exceptions
|
||||
from vmware_nsxlib._i18n import _, _LI, _LW
|
||||
from vmware_nsxlib.v3 import client as nsx_client
|
||||
from vmware_nsxlib.v3 import exceptions
|
||||
from vmware_nsxlib.v3 import nsx_constants
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
@ -206,6 +207,12 @@ class Endpoint(object):
|
||||
self._state = EndpointState.INITIALIZED
|
||||
self._last_updated = datetime.datetime.now()
|
||||
|
||||
def regenerate_pool(self):
|
||||
self.pool = pools.Pool(min_size=self.pool.min_size,
|
||||
max_size=self.pool.max_size,
|
||||
order_as_stack=True,
|
||||
create=self.pool.create)
|
||||
|
||||
@property
|
||||
def last_updated(self):
|
||||
return self._last_updated
|
||||
@ -260,6 +267,7 @@ class ClusteredAPI(object):
|
||||
|
||||
self._http_provider = http_provider
|
||||
self._keepalive_interval = keepalive_interval
|
||||
self._callbacks = {}
|
||||
|
||||
def _init_cluster(*args, **kwargs):
|
||||
self._init_endpoints(providers,
|
||||
@ -358,11 +366,30 @@ class ClusteredAPI(object):
|
||||
if up == len(self._endpoints)
|
||||
else ClusterHealth.ORANGE)
|
||||
|
||||
def subscribe(self, callback, event):
|
||||
if event in self._callbacks:
|
||||
self._callbacks[event].append(callback)
|
||||
else:
|
||||
self._callbacks[event] = [callback]
|
||||
|
||||
def _notify(self, event):
|
||||
if event in self._callbacks:
|
||||
for callback in self._callbacks[event]:
|
||||
callback()
|
||||
|
||||
def _validate(self, endpoint):
|
||||
try:
|
||||
with endpoint.pool.item() as conn:
|
||||
self._http_provider.validate_connection(self, endpoint, conn)
|
||||
endpoint.set_state(EndpointState.UP)
|
||||
except exceptions.ClientCertificateNotTrusted:
|
||||
LOG.warning(_LW("Failed to validate API cluster endpoint "
|
||||
"'%(ep)s' due to untrusted client certificate"),
|
||||
{'ep': endpoint})
|
||||
# allow nsxlib user to reload certificate that possibly changed
|
||||
self._notify(nsx_constants.ON_CLIENT_CERT_UNTRUSTED)
|
||||
# regenerate connection pool based on new certificate
|
||||
endpoint.regenerate_pool()
|
||||
except Exception as e:
|
||||
endpoint.set_state(EndpointState.DOWN)
|
||||
LOG.warning(_LW("Failed to validate API cluster endpoint "
|
||||
|
@ -92,6 +92,10 @@ class StaleRevision(ManagerError):
|
||||
pass
|
||||
|
||||
|
||||
class ClientCertificateNotTrusted(ManagerError):
|
||||
message = _("Certificate not trusted")
|
||||
|
||||
|
||||
class ServiceClusterUnavailable(ManagerError):
|
||||
message = _("Service cluster: '%(cluster_id)s' is unavailable. Please, "
|
||||
"check NSX setup and/or configuration")
|
||||
|
@ -110,3 +110,6 @@ ERR_CODE_IPAM_IP_NOT_IN_POOL = 5110
|
||||
ERR_CODE_IPAM_RANGE_MODIFY = 5602
|
||||
ERR_CODE_IPAM_RANGE_DELETE = 5015
|
||||
ERR_CODE_IPAM_RANGE_SHRUNK = 5016
|
||||
|
||||
# NsxLib events
|
||||
ON_CLIENT_CERT_UNTRUSTED = 'on_client_cert_untrusted'
|
||||
|
Loading…
Reference in New Issue
Block a user