Use IP address over FQDN for dcmanager rabbit/db connections
In the past few months, _most_ StarlingX services have moved from static IP addressing to FQDN resolution, in support of the management network reconfig feature. While doing DC scalability testing, it was found that a transient domain resolution (controller.internal) issue was found after adding approximately 250 subclouds to the system and involved the rabbitmq/RPC subsystem. The error message returned was similar to: "OSError: failed to resolve broker hostname" The rabbitmq/amqp library is calling a _connect() function, which in turn calls the python socket getaddrinfo() Multiple attemps were made to reproduce the scenario in a non-scaled lab by stressing the getaddrinfo(), getting dnsmasq up to ~40 CPU usage, but the same error was not returned. Testing was done on the DC scale lab by manually changing the rabbit and DB config files and this confirmed that using the static floating IP (avoiding domain name resolution all-together resolved the issue) It was decided to revert the FQDN aspect of the dcmanager and dcorch modules for now, as the management network reconfiguration feature would not even apply to an AIO-DX system controller at this time. This may be re-evaluated in the future at which point a deeper dive into the rabbit/RPC usage should be considered. Testing: - Install an AIO-DX system controller and install a subcloud. Ensure the subcloud is managed and online. - Ensure the dcmanager.conf and dcorch.conf commands use an IP address in their transport_url and database connection parameters. Depends-On: https://review.opendev.org/c/starlingx/config/+/932013 Story: 2010722 Task: 48447 Change-Id: Icd067441dd08321936eb03498ff65241fac0010e
This commit is contained in:
parent
97dde7d666
commit
d74a25a7e7
@ -73,7 +73,7 @@ class dcmanager (
|
||||
}
|
||||
|
||||
dcmanager_config {
|
||||
'DEFAULT/transport_url': value => $::platform::amqp::params::transport_url;
|
||||
'DEFAULT/transport_url': value=> "rabbit://${rabbit_userid}:${rabbit_password}@${rabbit_host}:${rabbit_port}"
|
||||
}
|
||||
|
||||
dcmanager_config {
|
||||
|
@ -93,7 +93,7 @@ class dcorch (
|
||||
}
|
||||
|
||||
dcorch_config {
|
||||
'DEFAULT/transport_url': value => $::platform::amqp::params::transport_url;
|
||||
'DEFAULT/transport_url': value=> "rabbit://${rabbit_userid}:${rabbit_password}@${rabbit_host}:${rabbit_port}"
|
||||
}
|
||||
|
||||
dcorch_config {
|
||||
|
@ -12,6 +12,7 @@ class platform::dcmanager::params (
|
||||
$iso_base_dir_target = '/var/www/pages/iso',
|
||||
$state_workers = undef,
|
||||
$audit_worker_workers = undef,
|
||||
$rabbit_host = 'localhost',
|
||||
) {
|
||||
include ::platform::params
|
||||
|
||||
@ -63,29 +64,6 @@ class platform::dcmanager
|
||||
|
||||
$system_mode = $::platform::params::system_mode
|
||||
|
||||
# FQDN can be used after:
|
||||
# - after the bootstrap for any installation
|
||||
# - mate controller uses FQDN if mgmt::params::fqdn_ready is present
|
||||
# mate controller can use FQDN before the bootstrap flag
|
||||
# - just AIO-SX can use FQDN during the an upgrade. For other installs
|
||||
# the active controller in older release can resolve the .internal FQDN
|
||||
# when the mate controller is updated to N+1 version
|
||||
if (!str2bool($::is_upgrade_do_not_use_fqdn) or $system_mode == 'simplex') {
|
||||
if (str2bool($::is_bootstrap_completed)) {
|
||||
$fqdn_ready = true
|
||||
} else {
|
||||
if ($::platform::network::mgmt::params::fqdn_ready != undef) {
|
||||
$fqdn_ready = $::platform::network::mgmt::params::fqdn_ready
|
||||
}
|
||||
else {
|
||||
$fqdn_ready = false
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
$fqdn_ready = false
|
||||
}
|
||||
|
||||
# If not defined, worker values can vary from 4 to 8 depending
|
||||
# on the number of physical cores and memory available
|
||||
if $::platform::dcmanager::params::state_workers == undef {
|
||||
@ -101,10 +79,7 @@ class platform::dcmanager
|
||||
}
|
||||
|
||||
class { '::dcmanager':
|
||||
rabbit_host => (str2bool($fqdn_ready)) ? {
|
||||
true => $::platform::amqp::params::host,
|
||||
default => $::platform::amqp::params::host_url,
|
||||
},
|
||||
rabbit_host => $::platform::dcmanager::params::rabbit_host,
|
||||
rabbit_port => $::platform::amqp::params::port,
|
||||
rabbit_userid => $::platform::amqp::params::auth_user,
|
||||
rabbit_password => $::platform::amqp::params::auth_password,
|
||||
|
@ -18,6 +18,7 @@ class platform::dcorch::params (
|
||||
$sysinv_api_proxy_client_timeout = '600s',
|
||||
$sysinv_api_proxy_server_timeout = '600s',
|
||||
$engine_workers = undef,
|
||||
$rabbit_host = 'localhost',
|
||||
) {
|
||||
include ::platform::params
|
||||
|
||||
@ -69,29 +70,6 @@ class platform::dcorch
|
||||
|
||||
$system_mode = $::platform::params::system_mode
|
||||
|
||||
# FQDN can be used after:
|
||||
# - after the bootstrap for any installation
|
||||
# - mate controller uses FQDN if mgmt::params::fqdn_ready is present
|
||||
# mate controller can use FQDN before the bootstrap flag
|
||||
# - just AIO-SX can use FQDN during the an upgrade. For other installs
|
||||
# the active controller in older release can resolve the .internal FQDN
|
||||
# when the mate controller is updated to N+1 version
|
||||
if (!str2bool($::is_upgrade_do_not_use_fqdn) or $system_mode == 'simplex') {
|
||||
if (str2bool($::is_bootstrap_completed)) {
|
||||
$fqdn_ready = true
|
||||
} else {
|
||||
if ($::platform::network::mgmt::params::fqdn_ready != undef) {
|
||||
$fqdn_ready = $::platform::network::mgmt::params::fqdn_ready
|
||||
}
|
||||
else {
|
||||
$fqdn_ready = false
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
$fqdn_ready = false
|
||||
}
|
||||
|
||||
# If not defined, worker values can vary from 4 to 6 depending
|
||||
# on the number of physical cores and memory available
|
||||
if $::platform::dcorch::params::engine_workers == undef {
|
||||
@ -101,10 +79,7 @@ class platform::dcorch
|
||||
}
|
||||
|
||||
class { '::dcorch':
|
||||
rabbit_host => (str2bool($fqdn_ready)) ? {
|
||||
true => $::platform::amqp::params::host,
|
||||
default => $::platform::amqp::params::host_url,
|
||||
},
|
||||
rabbit_host => $::platform::dcorch::params::rabbit_host,
|
||||
rabbit_port => $::platform::amqp::params::port,
|
||||
rabbit_userid => $::platform::amqp::params::auth_user,
|
||||
rabbit_password => $::platform::amqp::params::auth_password,
|
||||
|
Loading…
x
Reference in New Issue
Block a user