diff --git a/ansible/check/group_vars/all.yml b/ansible/check/group_vars/all.yml index dbfe9b5ba..9548241e6 100644 --- a/ansible/check/group_vars/all.yml +++ b/ansible/check/group_vars/all.yml @@ -1,4 +1,4 @@ --- -result_dir: "{{inventory_dir}}/" +result_dir: "{{inventory_dir}}/../results" mysql_tuner_script: https://raw.githubusercontent.com/major/MySQLTuner-perl/master/mysqltuner.pl diff --git a/ansible/check/group_vars/compute.yml b/ansible/check/group_vars/compute.yml index d9453c063..3b83a8775 100644 --- a/ansible/check/group_vars/compute.yml +++ b/ansible/check/group_vars/compute.yml @@ -7,16 +7,26 @@ nova_vif_timeout: 300 checks: bz1245714: url: "https://bugzilla.redhat.com/show_bug.cgi?id=1245714" - name: "No Swap Space allocated" + description: "No Swap Space allocated" + severity: "Critical" + impact: "The Compute node can hit OOM since we deploy with no swap" bz1282644: url : "https://bugzilla.redhat.com/show_bug.cgi?id=1282644" - name : "increase reserved_host_memory_mb" + description: "increase reserved_host_memory_mb" + severity: "Critical" + impact: "The Compute node can hit OOM if we do not reserve enough memory" tuned_profile_result : url: "none" - name: "Ensure Tuned Profile is set to virtual-host" + description: "Ensure Tuned Profile is set to virtual-host" + severity: "Suggestion" + impact: "Incorrect Tuned Profile will not result in better performance" nova_vif_timeout_result: url: "none" - name: "Nova VIF timeout should be >= 300" + description: "Nova VIF timeout should be >= 300" + severity: "Critical" + impact: "This could cause guests to fail to boot if not properly set." bz1264740: url: "https://bugzilla.redhat.com/show_bug.cgi?id=1264740" - name: "RHEL OSP Director must be configure with nova-event-callback by default" + description: "RHEL OSP Director must be configured with nova-event-callback by default" + severity: "Critical" + impact: "This could cause guests to fail to boot if not properly set." diff --git a/ansible/check/group_vars/controller.yml b/ansible/check/group_vars/controller.yml index ab134836d..ab317dd2e 100644 --- a/ansible/check/group_vars/controller.yml +++ b/ansible/check/group_vars/controller.yml @@ -13,51 +13,86 @@ keystone_processes: 2 checks : bz1095811 : url: "https://bugzilla.redhat.com/show_bug.cgi?id=1095811" - name: "Network connectivity issues after 1000 netns" + description: "Network connectivity issues after 1000 netns" + severity: "Suggestion" + impact: "Networking can become flaky after 1000 net namespaces" bz1282491 : url: "https://bugzilla.redhat.com/show_bug.cgi?id=1282491" - name: "update default file descriptor setting" + description: "update default file descriptor setting" + severity: "Critical" + impact: "RabbitMQ can become unreliable after it runs out of FDs" bz1281584 : url: "https://bugzilla.redhat.com/show_bug.cgi?id=1281584" - name: "Director does not create an haproxy configuration that conforms to our best-practice recommendations" + description: "Director does not create an haproxy configuration that conforms to our best-practice recommendations" + severity: "Suggestion" + impact: "HAProxy config should match our best practices" bz1266253 : url: "https://bugzilla.redhat.com/show_bug.cgi?id=1266253" - name: "increase mariadb max_connection default value" + description: "increase mariadb max_connection default value" + severity: "Critical" + impact: "With the max_connection set too low, MariaDB will fail at scale" buffer_pool_size: url: "none" - name: "mariadb buffer pool size tuning" + description: "mariadb buffer pool size tuning" + severity: "Suggestion" + impact: "Tune the buffer pool size to increase performance" mysqld_safe_soft_fd: url: "none" - name: "mariadb file descriptor setting not high enough" + description: "mariadb file descriptor setting not high enough" + severity: "Suggesiton" + impact: "If the FD setting is not set very high, the scale of the cloud might be impacted." bz1293712 : url: "https://bugzilla.redhat.com/show_bug.cgi?id=1293712" - name: "/etc/udev/rules.d/99-dhcp-all-interfaces.rules causes a slow and miserable degradation until things fail" + description: "/etc/udev/rules.d/99-dhcp-all-interfaces.rules causes a slow and miserable degradation until things fail" + severity: "Critical" + impact: "This bug will cause the controllers to slowly degrade with increasing neutron networks" nova_vif_timeout_result: url: "none" - name: "Nova VIF timeout should be >= 300" + description: "Nova VIF timeout should be >= 300" + severity: "Suggestion" + impact: "Best practices from the Neutron team" bz1264740: url: "https://bugzilla.redhat.com/show_bug.cgi?id=1264740" - name: "RHEL OSP Director must be configure with nova-event-callback by default" + description: "RHEL OSP Director must be configured with nova-event-callback by default" + severity: "Critical" + impact: "nova-event-callback should be enabled, nova will launch guests with possibly no networking" rabbit_partitioned: url: "none" - name: "Rabbit is currently partitioned - YMMV... Good luck." + description: "RabbitMQ is currently partitioned - YMMV... Good luck." + severity: "Critical" + impact: "RabbitMQ has become partitioned, the cloud is useless" tuned_profile_result: url: "none" - name: "Ensure Tuned Profile is set to throughput-performance" + description: "Ensure TuneD Profile is set to throughput-performance" + severity: "Suggestion" + impact: "Incorrect TuneD Profile could result in degraded performance" neutron_rootwrap_daemon: url: "none" - name: "Ensure rootwrap has daemon mode enabled" + description: "Ensure rootwrap has daemon mode enabled" + severity: "Suggestion" + impact: "Neutron team recommends to have rootwrap daemon enabled" neutron_dnsmasq_mtu: url: "https://bugs.launchpad.net/tripleo/+bug/1590100" - name: "Ensure there is no dnsmasq setting to force MTU" + description: "Ensure there is no dnsmasq setting to force MTU" + severity: "Critical" + impact: "MTU mismatches can cause SSH and other services to fail" neutron_conf_mtu: url: "https://bugs.launchpad.net/tripleo/+bug/1590101" - name: "The global_physnet_mtu should not be set unless by Administrator" + description: "The global_physnet_mtu should not be set unless by Administrator" + severity: "Critical" + impact: "MTU mismatches can cause SSH and other services to fail" glance_api_workers: url: "https://bugzilla.redhat.com/show_bug.cgi?id=1361285" - name: "Glance API wokres should be set to None , thereby defaulting to number of cores" + description: "Glance API workers should be set to None, thereby defaulting to number of cores" + severity: "Suggestion" + impact: "Not tuning the workers properly can cause degraded performance" glance_registry_workers: url: "https://bugzilla.redhat.com/show_bug.cgi?id=1361285" + description: "Glance Registry workers should be set to None, thereby defaulting to number of cores" + severity: "Suggestion" + impact: "Not tuning the workers properly can cause degraded performance" bz1347305: url: "https://bugzilla.redhat.com/show_bug.cgi?id=1347305" - name: "Overcloud deployed with keystone as single process leads to abysmal performance" + description: "Overcloud deployed with keystone as single process leads to abysmal performance" + severity: "Critical" + impact: "Not tuning the workers properly can cause degraded performance" diff --git a/ansible/check/group_vars/undercloud.yml b/ansible/check/group_vars/undercloud.yml index 3df2fa4e0..25f03b27d 100644 --- a/ansible/check/group_vars/undercloud.yml +++ b/ansible/check/group_vars/undercloud.yml @@ -13,37 +13,61 @@ keystone_processes: 2 checks : bz1282491 : url: "https://bugzilla.redhat.com/show_bug.cgi?id=1282491" - name: "update default file descriptor setting" + description: "update RabbitMQ default file descriptor setting" + severity: "Suggestion" + impact: "RabbitMQ can become unreliable after it runs out of FDs" bz1281584 : url: "https://bugzilla.redhat.com/show_bug.cgi?id=1281584" - name: "Director does not create an haproxy configuration that conforms to our best-practice recommendations" + description: "Director does not create an haproxy configuration that conforms to our best-practice recommendations" + severity: "Suggestion" + impact: "HAProxy config should match our best practices" bz1266253 : url: "https://bugzilla.redhat.com/show_bug.cgi?id=1266253" - name: "increase mariadb max_connection default value" + description: "increase mariadb max_connection default value" + severity: "Critical" + impact: "With the max_connection set too low, MariaDB will fail at scale" buffer_pool_size: url: "none" - name: "mariadb buffer pool size tuning" + description: "mariadb buffer pool size tuning" + severity: "Suggestion" + impact: "Tune the buffer pool size to increase performance" mysqld_safe_soft_fd: url: "none" - name: "mariadb file descriptor setting not high enough" + description: "mariadb file descriptor setting not high enough" + severity: "Suggestion" + impact: "If the FD setting is not set very high, the Scale of the cloud might be impacted." bz1293712 : url: "https://bugzilla.redhat.com/show_bug.cgi?id=1293712" - name: "/etc/udev/rules.d/99-dhcp-all-interfaces.rules causes a slow and miserable degradation until things fail" + description: "/etc/udev/rules.d/99-dhcp-all-interfaces.rules causes a slow and miserable degradation until things fail" + severity: "Suggestion" + impact: "This will slow down the machine due to all the dhcp-namespaces" nova_vif_timeout_result: url: "none" - name: "Nova VIF timeout should be >= 300" + description: "Nova VIF timeout should be >= 300" + severity: "Suggestion" + impact: "Best practices from the Neutron team" bz1264740: url: "https://bugzilla.redhat.com/show_bug.cgi?id=1264740" - name: "RHEL OSP Director must be configure with nova-event-callback by default" + description: "RHEL OSP Director must be configured with nova-event-callback by default" + severity: "Critical" + impact: "nova-event-callback should be enabled, nova will launch guests with possibly no networking" rabbit_partitioned: url: "none" - name: "Rabbit is currently partitioned - YMMV... Good luck." + description: "RabbitMQ is currently partitioned - YMMV... Good luck." + severity: "Critical" + impact: "RabbitMQ is partitioned - The deployment is broken" tuned_profile_result: url: "none" - name: "Ensure Tuned Profile is set to throughput-performance" + description: "Ensure TuneD Profile is set to throughput-performance" + severity: "Suggestion" + impact: "Set the right TuneD profile for the host" neutron_rootwrap_daemon: url: "none" - name: "Ensure rootwrap has daemon mode enabled" + description: "Ensure rootwrap has daemon mode enabled" + severity: "Suggestion" + impact: "The Red Hat Neutron team recommends having rootwrap in Daemon mode" bz1330980: url: "https://bugzilla.redhat.com/show_bug.cgi?id=1330980" - name: "Undercloud deployed with 1 keystone worker and cpu_count for threads" + description: "Undercloud deployed with 1 keystone worker and cpu_count for threads" + severity: "Critical" + impact: "Not tuning the workers properly can cause degraded performance" diff --git a/ansible/check/templates/bug_report.j2 b/ansible/check/templates/bug_report.j2 index b16a3e8b3..64953626b 100644 --- a/ansible/check/templates/bug_report.j2 +++ b/ansible/check/templates/bug_report.j2 @@ -7,8 +7,10 @@ {% for check in hostvars[host]['checks'] %} {% if hostvars[host][check]['failed'] == true %} Bug: {{ check }} -Name: {{ hostvars[host]['checks'][check]['name'] }} +Description: {{ hostvars[host]['checks'][check]['description'] }} URL: {{ hostvars[host]['checks'][check]['url'] }} +Severity: {{ hostvars[host]['checks'][check]['severity'] }} +Impact: {{ hostvars[host]['checks'][check]['impact'] }} {% endif %} {% endfor %} @@ -21,8 +23,10 @@ URL: {{ hostvars[host]['checks'][check]['url'] }} {% for check in hostvars[host]['checks'] %} {% if hostvars[host][check]['failed'] == true %} Bug: {{ check }} -Name: {{ hostvars[host]['checks'][check]['name'] }} +Description: {{ hostvars[host]['checks'][check]['description'] }} URL: {{ hostvars[host]['checks'][check]['url'] }} +Severity: {{ hostvars[host]['checks'][check]['severity'] }} +Impact: {{ hostvars[host]['checks'][check]['impact'] }} {% endif %} {% endfor %} @@ -35,8 +39,10 @@ URL: {{ hostvars[host]['checks'][check]['url'] }} {% for check in hostvars[host]['checks'] %} {% if hostvars[host][check]['failed'] == true %} Bug: {{ check }} -Name: {{ hostvars[host]['checks'][check]['name'] }} +Description: {{ hostvars[host]['checks'][check]['description'] }} URL: {{ hostvars[host]['checks'][check]['url'] }} +Severity: {{ hostvars[host]['checks'][check]['severity'] }} +Impact: {{ hostvars[host]['checks'][check]['impact'] }} {% endif %} {% endfor %}