From d277323b3e9b8ad7d84bbad0208cd9d1f02171a3 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Mon, 30 Oct 2023 14:59:47 +0000 Subject: [PATCH] prometheus: Support overriding address of scrape targets In many cases we use the kolla_address filter to look up the IP address of the current host or another host on a particular network interface. This filter uses the host's facts to determine the IP, meaning that we must have gathered facts for the host, even if it is outside of a requested --limit. This is a limitation, since it requires that all hosts must be reachable, even if we are not directly configuring them. Most instances of this cross-host fact referencing involve a controller, since they host clustered services. The only instance found to affect compute nodes is in the prometheus role, where Prometheus server needs to know the IP address of all targets in its scrape configs. If we are able to specify the address of the scrape targets as a static variable such as a host variable, then facts would not be required for compute nodes outside of the --limit. Removing the requirement to have facts for all compute nodes has benefits for performance (gathering facts for all hosts can take a long time) and fault tolerance (we can operate when some compute hosts are unreachable). This change modifies the kolla_address filter to accept an optional override_var argument which can be used to specify the name of a host variable that may override the returned IP address. This is used in the Prometheus server configuration to allow specifying the IP address used by Prometheus server when collecting metrics from exporter using a 'prometheus_target_address' host variable. If specified, this takes precedence over the API interface address currently used. This makes it possible to statically override prometheus_target_address and avoid the cross-host fact reference. This is not a complete solution because it is not yet possible to skip the cross-host fact gathering step. Partial-Bug: #2041860 Change-Id: I207ca56362de00d8ec578333eab9e1a72e7bcd19 --- .../prometheus/templates/prometheus.yml.j2 | 31 ++++----- .../prometheus-guide.rst | 13 ++++ kolla_ansible/kolla_address.py | 25 ++++++- .../tests/unit/test_address_filters.py | 65 +++++++++++++++++++ ...theus-target-address-4d2d5624ee6ae5a0.yaml | 6 ++ 5 files changed, 124 insertions(+), 16 deletions(-) create mode 100644 releasenotes/notes/prometheus-target-address-4d2d5624ee6ae5a0.yaml diff --git a/ansible/roles/prometheus/templates/prometheus.yml.j2 b/ansible/roles/prometheus/templates/prometheus.yml.j2 index 99d1298a61..f0e43f13c6 100644 --- a/ansible/roles/prometheus/templates/prometheus.yml.j2 +++ b/ansible/roles/prometheus/templates/prometheus.yml.j2 @@ -24,7 +24,7 @@ scrape_configs: static_configs: {% for host in groups['prometheus'] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ prometheus_port }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ prometheus_port }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -36,7 +36,7 @@ scrape_configs: static_configs: {% for host in groups['prometheus-node-exporter'] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_node_exporter_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_node_exporter_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -56,7 +56,7 @@ scrape_configs: static_configs: {% for host in groups['prometheus-mysqld-exporter'] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_mysqld_exporter_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_mysqld_exporter_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -69,7 +69,7 @@ scrape_configs: static_configs: {% for host in groups['loadbalancer'] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ prometheus_haproxy_exporter_port }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ prometheus_haproxy_exporter_port }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -82,7 +82,7 @@ scrape_configs: static_configs: {% for host in groups['rabbitmq'] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_rabbitmq_exporter_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_rabbitmq_exporter_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -95,7 +95,7 @@ scrape_configs: static_configs: {% for host in groups['prometheus-memcached-exporter'] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_memcached_exporter_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_memcached_exporter_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -108,7 +108,7 @@ scrape_configs: static_configs: {% for host in groups["prometheus-cadvisor"] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_cadvisor_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_cadvisor_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -121,7 +121,7 @@ scrape_configs: static_configs: {% for host in groups['fluentd'] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_fluentd_integration_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_fluentd_integration_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -159,7 +159,7 @@ scrape_configs: static_configs: {% for host in groups["prometheus-elasticsearch-exporter"] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_elasticsearch_exporter_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_elasticsearch_exporter_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -204,7 +204,7 @@ scrape_configs: static_configs: {% for host in groups["prometheus-libvirt-exporter"] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_libvirt_exporter_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_libvirt_exporter_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -220,7 +220,7 @@ scrape_configs: static_configs: {% for host in groups["etcd"] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_etcd_integration_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_etcd_integration_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -232,7 +232,8 @@ scrape_configs: - job_name: ironic_prometheus_exporter static_configs: {% for host in groups['ironic-conductor'] %} - - targets: ["{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['ironic_prometheus_exporter_port'] }}"] + - targets: + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['ironic_prometheus_exporter_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -245,7 +246,7 @@ scrape_configs: static_configs: {% for host in groups['prometheus-alertmanager'] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_alertmanager_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_alertmanager_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -257,7 +258,7 @@ scrape_configs: static_configs: {% for host in groups["loadbalancer"] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['proxysql_prometheus_exporter_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['proxysql_prometheus_exporter_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" @@ -270,7 +271,7 @@ alerting: - static_configs: {% for host in groups["prometheus-alertmanager"] %} - targets: - - '{{ 'api' | kolla_address(host) | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_alertmanager_port'] }}' + - '{{ 'api' | kolla_address(host, override_var='prometheus_target_address') | put_address_in_context('url') }}:{{ hostvars[host]['prometheus_alertmanager_port'] }}' {% if hostvars[host].prometheus_instance_label | default(false, true) %} labels: instance: "{{ hostvars[host].prometheus_instance_label }}" diff --git a/doc/source/reference/logging-and-monitoring/prometheus-guide.rst b/doc/source/reference/logging-and-monitoring/prometheus-guide.rst index 30966c1cdc..c8fc066ece 100644 --- a/doc/source/reference/logging-and-monitoring/prometheus-guide.rst +++ b/doc/source/reference/logging-and-monitoring/prometheus-guide.rst @@ -243,3 +243,16 @@ For example: labels: instance: host1 +Target address +~~~~~~~~~~~~~~ + +By default, Prometheus server uses the IP of the API interface of scrape +targets when collecting metrics. This may be overridden by setting +``prometheus_target_address`` as a host variable. The value of this host +variable must be a valid IPv4 or IPv6 address. + +Prometheus server is one of the few instances where we need to know IP +addresses of all other hosts in the cloud. Being able to specify these via +``prometheus_target_address`` allows us to operate when facts are not available +for all hosts. This could be due to some hosts being unreachable or having +previously failed. diff --git a/kolla_ansible/kolla_address.py b/kolla_ansible/kolla_address.py index 4538e75a2c..e83037d576 100644 --- a/kolla_ansible/kolla_address.py +++ b/kolla_ansible/kolla_address.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import ipaddress + from jinja2.filters import pass_context from jinja2.runtime import Undefined @@ -22,7 +24,7 @@ from kolla_ansible.helpers import _call_bool_filter @pass_context -def kolla_address(context, network_name, hostname=None): +def kolla_address(context, network_name, hostname=None, override_var=None): """returns IP address on the requested network The output is affected by '_*' variables: @@ -33,6 +35,8 @@ def kolla_address(context, network_name, hostname=None): :param network_name: string denoting the name of the network to get IP address for, e.g. 'api' :param hostname: to override host which address is retrieved for + :param override_var: optional name of a host variable that can be used + to override the IP address :returns: string with IP address """ @@ -58,6 +62,25 @@ def kolla_address(context, network_name, hostname=None): del hostvars # remove for clarity (no need for other hosts) + if override_var is not None: + if override_var in host: + try: + # Use ipaddress to test IPv4/6 validity of + # the string returned from override_var and + # return the string-formatted, valid IP address + ip = ipaddress.ip_address(host[override_var]) + return format(ip) + except ValueError: + # Catch ValueError from ipaddress and make the + # output more useful for operators + raise FilterError("variable '{override_var}' for " + "host '{hostname}' is set to " + "'{value}', which is not a valid " + "IPv4 or IPv6 address" + .format(override_var=override_var, + hostname=hostname, + value=host[override_var])) + # NOTE(yoctozepto): variable "host" will *not* return Undefined # same applies to all its children (act like plain dictionary) diff --git a/kolla_ansible/tests/unit/test_address_filters.py b/kolla_ansible/tests/unit/test_address_filters.py index 589531639a..4be75528f0 100644 --- a/kolla_ansible/tests/unit/test_address_filters.py +++ b/kolla_ansible/tests/unit/test_address_filters.py @@ -325,6 +325,71 @@ class TestKollaAddressFilter(unittest.TestCase): }) self.assertEqual(addr, kolla_address(context, 'api')) + def test_override_var_valid_ipv4(self): + addr = '192.0.2.1' + override_var = 'my_ip_address' + context = self._make_context({ + 'inventory_hostname': 'primary', + 'hostvars': { + 'primary': { + override_var: addr, + }, + }, + }) + self.assertEqual( + addr, kolla_address(context, 'api', override_var=override_var)) + + def test_override_var_valid_ipv6(self): + addr = 'fd::' + override_var = 'my_ip_address' + context = self._make_context({ + 'inventory_hostname': 'primary', + 'hostvars': { + 'primary': { + override_var: addr, + }, + }, + }) + self.assertEqual( + addr, kolla_address(context, 'api', override_var=override_var)) + + def test_override_var_invalid(self): + addr = 'this-is-an-fqdn.example.com' + override_var = 'my_ip_address' + context = self._make_context({ + 'inventory_hostname': 'primary', + 'hostvars': { + 'primary': { + override_var: addr, + }, + }, + }) + self.assertRaises( + FilterError, kolla_address, context, 'api', + override_var=override_var) + + def test_override_var_missing(self): + addr = '192.0.2.1' + override_var = 'my_ip_address' + context = self._make_context({ + 'inventory_hostname': 'primary', + 'hostvars': { + 'primary': { + 'api_address_family': 'ipv4', + 'api_interface': 'fake-interface', + 'ansible_facts': { + 'fake_interface': { + 'ipv4': { + 'address': addr, + }, + }, + }, + }, + }, + }) + self.assertEqual( + addr, kolla_address(context, 'api', None, override_var)) + class TestKollaUrlFilter(unittest.TestCase): diff --git a/releasenotes/notes/prometheus-target-address-4d2d5624ee6ae5a0.yaml b/releasenotes/notes/prometheus-target-address-4d2d5624ee6ae5a0.yaml new file mode 100644 index 0000000000..09e2e7f225 --- /dev/null +++ b/releasenotes/notes/prometheus-target-address-4d2d5624ee6ae5a0.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + Adds support for overriding the IP address used by Prometheus server to + collect metrics from exporters. This is done by specifying + ``prometheus_target_address`` as a host variable.