Further tune the playbooks, configs, and thread pool

* Implements G1 GC optionally. The variable `elastic_g1gc_enabled` has been added with a default of false. If this option is set true and the system has more than 4GiB of RAM G1GC will be enabled. * Adds new thread options * Better constraints coordination nodes * Interface recover speed has been limited * Buffer size is now set correctly * Serialize elk deployment so that upgrades are non-impacting Change-Id: I89224eeaf4ed29c3bb1d7f8010b69503dbc74e11 Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
2018-07-24 17:46:15 -05:00 · 2018-07-24 17:46:15 -05:00 · f69d391325
commit f69d391325
parent 39e9905d00
17 changed files with 139 additions and 71 deletions
--- a/elk_metrics_6x/README.rst
+++ b/elk_metrics_6x/README.rst
@ -437,6 +437,7 @@ configuration file using the key/value pairs as options.
      client_id: "elk_metrics_6x"
      compression_type: "gzip"
      security_protocol: "SSL"
+      id: "UniqueOutputID"


 For a complete list of all options available within the Logstash Kafka output
--- a/elk_metrics_6x/common_task_data_node_hosts.yml
+++ b/elk_metrics_6x/common_task_data_node_hosts.yml
@ -41,18 +41,16 @@
  set_fact:
    data_nodes: "{{ (groups['elastic-logstash'][:master_node_count | int] + groups['elastic-logstash'][master_node_count | int::2]) }}"
    master_nodes: "{{ groups['elastic-logstash'][:master_node_count | int] }}"
-    coordination_nodes: |-
-      {%   set nodes=[] %}
-      {%   for host in groups['kibana'] %}
-      {%     set _ = nodes.insert(loop.index, ((hostvars[host]['ansible_host'] | string) + ":" + (elastic_port | string))) %}
-      {%   endfor %}
-      {{ nodes }}
-    zen_nodes: |-
-      {%   set nodes=[] %}
-      {%   for host in (groups['elastic-logstash'] | union(groups['kibana'])) %}
-      {%     set _ = nodes.insert(loop.index, (hostvars[host]['ansible_host'] | string)) %}
-      {%   endfor %}
-      {{ nodes }}
+    coordination_nodes: >-
+      {{
+        (groups['kibana'] | map('extract', hostvars, 'ansible_host') | list)
+          | map('regex_replace', '(.*)' ,'\1:' ~ elastic_port)
+          | list
+      }}
+    zen_nodes: >-
+      {{
+        (groups['elastic-logstash'] | union(groups['kibana'])) | map('extract', hostvars, 'ansible_host') | list
+      }}
    elasticserch_interface_speed: |-
      {% set default_interface_fact = hostvars[inventory_hostname]['ansible_' + (elastic_data_interface | replace('-', '_'))] %}
      {% set speeds = [] %}
@ -85,16 +83,28 @@
      {%     set _ = speeds.append(1000) %}
      {%   endif %}
      {% endif %}
-      {{ ((speeds | min) * 0.75) | int }}
+      {% set interface_speed = ((speeds | min) * 0.20) | int %}
+      {{ ((interface_speed | int) > 750) | ternary(750, interface_speed) }}
  tags:
    - always

+- name: Set data node details
+  set_fact:
+    elasticsearch_data_node_details: >-
+      {{
+        (data_nodes | map('extract', hostvars, 'ansible_host') | list) | map('regex_replace', '(.*)' ,'\1:' ~ elastic_port) | list
+      }}
+    logstash_data_node_details: >-
+      {{
+        (data_nodes | map('extract', hostvars, 'ansible_host') | list) | map('regex_replace', '(.*)' ,'\1:' ~ logstash_beat_input_port) | list
+      }}
+
 # based on the assignment of roles to hosts, set per host booleans
 - name: Node enablement
  set_fact:
    master_node: "{{ (inventory_hostname in master_nodes) | ternary(true, false) }}"
    data_node: "{{ (inventory_hostname in data_nodes) | ternary(true, false) }}"
-    elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) > 24) | ternary(24, ansible_processor_cores) }}"
+    elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) >= 24) | ternary(24, ansible_processor_cores) }}"
  tags:
    - always

@ -103,26 +113,18 @@
 - name: Set data nodes
  set_fact:
    elasticsearch_data_hosts: |-
+      {% set nodes = elasticsearch_data_node_details %}
      {% if inventory_hostname in data_nodes %}
-      {%   set data_hosts = ['127.0.0.1:' + (elastic_port | string)] %}
-      {% else %}
-      {%   set nodes=[] %}
-      {%   for host in data_nodes %}
-      {%     set _ = nodes.insert(loop.index, ((hostvars[host]['ansible_host'] | string) + ":" + (elastic_port | string))) %}
-      {%   endfor %}
-      {%   set data_hosts = nodes | shuffle(seed=inventory_hostname) %}
+      {%   set _ = nodes.insert(0, '127.0.0.1:' ~ elastic_port) %}
      {% endif %}
+      {% set data_hosts = nodes | shuffle(seed=inventory_hostname) %}
      {{ data_hosts }}
    logstash_data_hosts: |-
+      {% set nodes = logstash_data_node_details %}
      {% if inventory_hostname in data_nodes %}
-      {%   set data_hosts = ['127.0.0.1:' + (logstash_beat_input_port | string)] %}
-      {% else %}
-      {%   set nodes=[] %}
-      {%   for host in data_nodes %}
-      {%     set _ = nodes.insert(loop.index, ((hostvars[host]['ansible_host'] | string) + ":" + (logstash_beat_input_port | string))) %}
-      {%   endfor %}
-      {%   set data_hosts = nodes | shuffle(seed=inventory_hostname) %}
+      {%   set _ = nodes.insert(0, '127.0.0.1:' ~ logstash_beat_input_port) %}
      {% endif %}
+      {% set data_hosts = nodes | shuffle(seed=inventory_hostname) %}
      {{ data_hosts }}
  tags:
    - always
--- a/elk_metrics_6x/createElasticIndexes.yml
+++ b/elk_metrics_6x/createElasticIndexes.yml
@ -33,7 +33,8 @@
                number_of_replicas: "1"
        - name: "_all/_settings?preserve_existing=true"
          index_options:
-            index.refresh_interval: "1m"
+            index.refresh_interval: "10s"
        - name: "_all/_settings?preserve_existing=true"
          index_options:
-            index.queries.cache.enabled: "false"
+            index.queries.cache.enabled: "true"
+            indices.queries.cache.size: "5%"
--- a/elk_metrics_6x/installElastic.yml
+++ b/elk_metrics_6x/installElastic.yml
@ -1,6 +1,41 @@
 ---
- name: Install Elastic Search
+
+- name: Run serialization detection
  hosts: "elastic-logstash:kibana"
+  gather_facts: true
+
+  vars_files:
+    - vars/variables.yml
+
+  tasks:
+    - include_tasks: common_task_data_node_hosts.yml
+
+    - name: Group by stand alone masters
+      group_by:
+        key: elastic_masters
+        parents: elastic-logstash
+      when:
+        - inventory_hostname in master_nodes
+
+    - name: Group by non stand alone masters
+      group_by:
+        key: elastic_non_masters
+        parents: elastic-logstash
+      when:
+        - inventory_hostname in (data_nodes | difference(master_nodes))
+
+    - name: Group by coordinators
+      group_by:
+        key: elastic_coordinators
+        parents: elastic-logstash
+      when:
+        - inventory_hostname in groups['kibana']
+  tags:
+    - always
+
+- name: Install Elastic Search
+  hosts: "elastic_coordinators:elastic_masters:elastic_non_masters"
+  serial: "33%"
  become: true

  vars_files:
@ -12,11 +47,6 @@

  environment: "{{ deployment_environment_variables | default({}) }}"

-  pre_tasks:
-    - include_tasks: common_task_data_node_hosts.yml
-      tags:
-        - always
-
  tasks:
    - name: Set memory fact to half
      set_fact:
@ -40,10 +70,12 @@
            elasticsearch_node_master: false
            elasticsearch_node_data: false
            elasticsearch_node_ingest: false
+            elastic_coordination_node: true
            elastic_heap_size: "{{ (elastic_heap_size | int) // 3 }}"
-            elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) > 4) | ternary(4, 1) }}"
+            elastic_thread_pool_size: "{{ ((ansible_processor_cores | int) > 4) | ternary(4, (ansible_processor_cores // 2)) }}"
      when:
-        - inventory_hostname in (groups['kibana'] | difference(groups['elastic-logstash']))
+        - inventory_hostname in (groups['kibana'] | default([])) and
+          not inventory_hostname in (groups['elastic-logstash'] | default([]))
      tags:
        - always

--- a/elk_metrics_6x/installLogstash.yml
+++ b/elk_metrics_6x/installLogstash.yml
@ -1,6 +1,7 @@
 ---
 - name: Install Logstash
  hosts: elastic-logstash
+  serial: "50%"
  become: true
  vars_files:
    - vars/variables.yml
@ -18,7 +19,7 @@
  tasks:
    - name: Set quarter memory fact
      set_fact:
-        q_mem: "{{ (ansible_memtotal_mb | int) // 4 }}"
+        q_mem: "{{ (ansible_memtotal_mb | int) // 3 }}"
      when:
        - q_mem is not defined
      tags:
@ -26,7 +27,7 @@

    - name: Set processor cores fact
      set_fact:
-        q_storage: "{{ ansible_processor_cores }}"
+        q_storage: "{{ (ansible_processor_cores | int) * 2 }}"
      when:
        - q_storage is not defined
      tags:
@ -124,7 +125,7 @@
      notify:
        - Enable and restart logstash

-    - name: Drop elasticsearch conf file
+    - name: Drop logstash conf file(s)
      template:
        src: "{{ item.src }}"
        dest: "{{ item.dest }}"
--- a/elk_metrics_6x/setupAPMserver.yml
+++ b/elk_metrics_6x/setupAPMserver.yml
@ -2,7 +2,7 @@

 - name: Load apm-server Dashboards
  hosts: apm-server[0]
-  gather_facts: false
+  gather_facts: true
  vars_files:
    - vars/variables.yml

--- a/elk_metrics_6x/setupAuditbeat.yml
+++ b/elk_metrics_6x/setupAuditbeat.yml
@ -2,7 +2,7 @@

 - name: Load Auditbeat Dashboards
  hosts: hosts[0]
-  gather_facts: false
+  gather_facts: true
  vars_files:
    - vars/variables.yml

--- a/elk_metrics_6x/setupFilebeat.yml
+++ b/elk_metrics_6x/setupFilebeat.yml
@ -2,7 +2,7 @@

 - name: Load Filebeat Dashboards
  hosts: hosts[0]
-  gather_facts: false
+  gather_facts: true
  vars_files:
    - vars/variables.yml

--- a/elk_metrics_6x/setupHeartbeat.yml
+++ b/elk_metrics_6x/setupHeartbeat.yml
@ -2,7 +2,7 @@

 - name: Load Heartbeat Dashboards
  hosts: kibana[0]
-  gather_facts: false
+  gather_facts: true
  vars_files:
    - vars/variables.yml

--- a/elk_metrics_6x/setupJournalbeat.yml
+++ b/elk_metrics_6x/setupJournalbeat.yml
@ -15,7 +15,7 @@

 - name: Load Journalbeat Dashboards
  hosts: hosts[0]
-  gather_facts: false
+  gather_facts: true
  vars_files:
    - vars/variables.yml

--- a/elk_metrics_6x/setupMetricbeat.yml
+++ b/elk_metrics_6x/setupMetricbeat.yml
@ -2,7 +2,7 @@

 - name: Load Metricsbeat Dashboards
  hosts: all[0]
-  gather_facts: false
+  gather_facts: true
  vars_files:
    - vars/variables.yml

--- a/elk_metrics_6x/setupPacketbeat.yml
+++ b/elk_metrics_6x/setupPacketbeat.yml
@ -2,7 +2,7 @@

 - name: Load Packetbeat Dashboards
  hosts: hosts[0]
-  gather_facts: false
+  gather_facts: true
  vars_files:
    - vars/variables.yml

--- a/elk_metrics_6x/templates/99-elasticsearch-output.conf.j2
+++ b/elk_metrics_6x/templates/99-elasticsearch-output.conf.j2
@ -1,6 +1,16 @@
+filter {
+  fingerprint {
+    source => "message"
+    target => "[@metadata][fingerprint]"
+    method => "SHA1"
+    key => "{{ cluster_name | replace(' ', '_') }}"
+    base64encode => true
+  }
+}
 output {
  if [@metadata][version] {
    elasticsearch {
+      document_id => "%{[@metadata][fingerprint]}"
      hosts => {{ elasticsearch_data_hosts | shuffle(seed=inventory_hostname) | to_json }}
      sniffing => {{ (not data_node | bool) | lower }}
      manage_template => {{ (data_node | bool) | lower }}
@ -8,6 +18,7 @@ output {
    }
  } else {
    elasticsearch {
+      document_id => "%{[@metadata][fingerprint]}"
      hosts => {{ elasticsearch_data_hosts | shuffle(seed=inventory_hostname) | to_json }}
      sniffing => {{ (not data_node | bool) | lower }}
      manage_template => {{ (data_node | bool) | lower }}
--- a/elk_metrics_6x/templates/elasticsearch.yml.j2
+++ b/elk_metrics_6x/templates/elasticsearch.yml.j2
@ -101,26 +101,35 @@ gateway.recover_after_nodes: {{ ((master_node_count | int) // 2) + 1 }}
 #
 # action.destructive_requires_name: true

+{% set processors = ((elastic_thread_pool_size | int) > 0) | ternary(elastic_thread_pool_size, 1) %}
+{% if not (elastic_coordination_node | default(false)) | bool %}
 # Thread pool settings. For more on this see the documentation at:
 # <https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-threadpool.html>
 thread_pool:
-  search:
-    size: {{ (elastic_thread_pool_size | int) }}
-    queue_size: {{ (elastic_thread_pool_size | int) * 256 }}
  index:
-    size: {{ (elastic_thread_pool_size | int) }}
-    queue_size: {{ (elastic_thread_pool_size | int) * 256 }}
-  bulk:
-    size: {{ (elastic_thread_pool_size | int) }}
-    queue_size: {{ (elastic_thread_pool_size | int) * 512 }}
+    queue_size: {{ (processors | int) * 256 }}
+  get:
+    queue_size: {{ (processors | int) * 256 }}
+  write:
+    queue_size: {{ (processors | int) * 512 }}
+{% else %}
+# The number of processors is automatically detected, and the thread pool
+# settings are automatically set based on it. In some cases it can be useful to
+# override the number of detected processors. This can be done by explicitly
+# setting the processors setting. On Kibana hosts where elasticsearch is running
+# as a coordination node, the processor count is limited.
+processors: {{ processors }}
+{% endif %}

-# Accepts either a percentage or a byte size value. Set to 30%, meaning that 30%
+
+# Accepts either a percentage or a byte size value. Set to 20%, meaning that 20%
 # of the total heap allocated to a node will be used as the indexing buffer size
 # shared across all shards.
-indices.memory.index_buffer_size: 30%
+indices.memory.index_buffer_size: 20%

-# Connection throttling on recovery is limited to 75% of the detected interface
-# speed. This will improce search speeds and reduce general cluster pressure.
+# Connection throttling on recovery is limited to 20% of the detected interface
+# speed with a cap of 750mb. This will improce search speeds and reduce general
+# cluster pressure.
 indices.recovery.max_bytes_per_sec: {{ elasticserch_interface_speed }}mb

 # ---------------------------------- X-Pack ------------------------------------
--- a/elk_metrics_6x/templates/jvm.options.j2
+++ b/elk_metrics_6x/templates/jvm.options.j2
@ -1,14 +1,13 @@
 ## JVM configuration
-
-# Xms represents the initial size of total heap space
-# Xmx represents the maximum size of total heap space
 {% if (not (elasticsearch_node_master | default(master_node)) | bool) and (not (elasticsearch_node_data | default(data_node)) | bool) %}
-Xms{{ (elastic_heap_size | int) // 2 }}m
-Xmx{{ (elastic_heap_size | int) // 2 }}m
+{%   set heap_size = (elastic_heap_size | int) // 2 %}
 {% else %}
-Xms{{ elastic_heap_size }}m
-Xmx{{ elastic_heap_size }}m
+{%   set heap_size = (elastic_heap_size | int) %}
 {% endif %}
+# Xms represents the initial size of total heap space
+-Xms{{ heap_size }}m
+# Xmx represents the maximum size of total heap space
+-Xmx{{ heap_size }}m


 ################################################################
@ -21,11 +20,17 @@
 ##
 ################################################################

-## GC configuration
+## GC Configuration
+{% if ((heap_size | int) > 4096) and (elastic_g1gc_enabled | bool) %}
+-XX:+UseG1GC
+-XX:MaxGCPauseMillis=400
+-XX:InitiatingHeapOccupancyPercent=75
+{% else %}
 -XX:+UseParNewGC
 -XX:+UseConcMarkSweepGC
 -XX:CMSInitiatingOccupancyFraction=75
 -XX:+UseCMSInitiatingOccupancyOnly
+{% endif %}

 ## optimizations

--- a/elk_metrics_6x/templates/logstash.yml.j2
+++ b/elk_metrics_6x/templates/logstash.yml.j2
@ -38,16 +38,17 @@ path.data: /var/lib/logstash
 #
 # This defaults to the number of the host's CPU cores.
 #
-# pipeline.workers: 2
+{% set processors = ((elastic_thread_pool_size | int) > 0) | ternary(elastic_thread_pool_size, 1) %}
+pipeline.workers: {{ processors | int }}
 #
 # How many events to retrieve from inputs before sending to filters+workers
 #
-# pipeline.batch.size: 125
+pipeline.batch.size: 256
 #
 # How long to wait in milliseconds while polling for the next event
 # before dispatching an undersized batch to filters+outputs
 #
-# pipeline.batch.delay: 50
+pipeline.batch.delay: 20
 #
 # Force Logstash to exit during shutdown even if there are still inflight
 # events in memory. By default, logstash will refuse to quit until all
--- a/elk_metrics_6x/vars/variables.yml
+++ b/elk_metrics_6x/vars/variables.yml
@ -68,6 +68,11 @@ elastic_vip_url: >-
 #    path: "/elastic-backup"
 #    state: mounted

+# EXPERIMENTAL - When the heap size for a given elastic node is graeter than
+#                4GiB the G1 garbage collector can be enabled. This is an
+#                experimental feature and may be removed later.
+elastic_g1gc_enabled: false
+
 # kibana vars
 kibana_interface: 0.0.0.0
 kibana_port: 5601