Converg the logstash pipelines and enhance memory backed queues

The multi-logstash pipeline setup, while amazingly fast, was crashing
and causing index errors when under high load for a long period of time.
Because of the crashing behavior and the fact that the folks from
Elastic describe multi-pipeline queues to be "beta" at this time the
logstash pipelines have been converted back into a single pipeline.

The memory backed queue options are now limited by a ram disk (tmpfs)
which will ensure that a burst within the queue does not cause OOM
issues and ensures a highly performant deployment and limiting memory
usage at the same time. Memory backed queues will be enabled when the
underlying system is using "rotational" media as detected by ansible
facts. This will ensure a fast and consistent experience across all
deployment types.

Pipeline/ml/template/dashboard setup has been added to the beat
configurations which will ensure beats are properly configured even
when running in an isolated deployment and outside of normal operations
where beats are generally configured on the first data node.

Change-Id: Ie3c775f98b14f71bcbed05db9cb1c5aa46d9c436
Signed-off-by: Kevin Carter <kevin.carter@rackspace.com>
This commit is contained in:
Kevin Carter 2018-09-13 16:30:11 -05:00
parent be70a2078c
commit 0d4a4a92c7
No known key found for this signature in database
GPG Key ID: 9443251A787B9FB3
26 changed files with 595 additions and 697 deletions

View File

@ -3,6 +3,10 @@
scm: git
src: https://git.openstack.org/openstack/ansible-role-systemd_service
version: master
- name: systemd_mount
scm: git
src: https://git.openstack.org/openstack/ansible-role-systemd_mount
version: master
- name: config_template
scm: git
src: https://git.openstack.org/openstack/ansible-config_template

View File

@ -68,6 +68,14 @@ if [[ ! -d "${ANSIBLE_EMBED_HOME}/repositories/roles/systemd_service" ]]; then
popd
fi
if [[ ! -d "${ANSIBLE_EMBED_HOME}/repositories/roles/systemd_mount" ]]; then
mkdir -p "${ANSIBLE_EMBED_HOME}/repositories"
git clone https://git.openstack.org/openstack/ansible-role-systemd_mount "${ANSIBLE_EMBED_HOME}/repositories/roles/systemd_mount"
pushd "${ANSIBLE_EMBED_HOME}/repositories/roles/systemd_mount"
git checkout 0cca0b06e20a4e3d2b6b4ca19172717b6b37b68a # HEAD of master from 20-06-18
popd
fi
if [[ -f "/etc/openstack_deploy/openstack_inventory.json" ]]; then
if [[ ! -f "${ANSIBLE_EMBED_HOME}/inventory/openstack_inventory.sh" ]]; then
mkdir -p "${ANSIBLE_EMBED_HOME}/inventory"

View File

@ -19,9 +19,13 @@
{{ item }}
-E 'apm-server.host=localhost:8200'
-E 'output.elasticsearch.hosts={{ coordination_nodes | to_json }}'
-E 'setup.template.enabled=true'
-E 'setup.template.overwrite=true'
-e -v
with_items:
- "--template"
- "--pipelines"
- "--machine-learning"
- "--dashboards"
register: templates
environment:

View File

@ -19,9 +19,13 @@
{{ item }}
-E 'output.logstash.enabled=false'
-E 'output.elasticsearch.hosts={{ coordination_nodes | to_json }}'
-E 'setup.template.enabled=true'
-E 'setup.template.overwrite=true'
-e -v
with_items:
- "--template"
- "--pipelines"
- "--machine-learning"
- "--dashboards"
register: templates
environment:

View File

@ -54,10 +54,10 @@
elastic_heap_size_default: "{{ _elastic_heap_size_default }}"
elastic_log_rotate_path: "/var/log/{{ service_name }}"
- name: Configure systcl vm.max_map_count=262144 on elastic hosts
- name: Configure systcl vm.max_map_count=524288 on elastic hosts
sysctl:
name: "vm.max_map_count"
value: "262144"
value: "524288"
state: "present"
reload: "yes"
delegate_to: "{{ physical_host }}"

View File

@ -11,5 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Set memory fact to half
_elastic_heap_size_default: "{{ ((h_mem | int) > 30720) | ternary(30720, h_mem) }}"
# The heap size is set using the a half of the total memory available with
# a cap of 32GiB. If the total available memory is less than 32GiB a buffer of
# 10% will be used to ensure the underlying system is not starved of memory.
_elastic_heap_size_default: "{{ ((h_mem | int) > 30720) | ternary(30720, ((h_mem | int) - ((h_mem | int) * 0.1))) }}"

View File

@ -11,5 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Set quarter memory fact
_elastic_heap_size_default: "{{ ((q_mem | int) > 30720) | ternary(30720, q_mem) }}"
# The heap size is set using the a quarter of the total memory available with
# a cap of 32GiB. If the total available memory is less than 32GiB a buffer of
# 10% will be used to ensure the underlying system is not starved of memory.
_elastic_heap_size_default: "{{ ((q_mem | int) > 30720) | ternary(30720, ((q_mem | int) - ((q_mem | int) * 0.1))) }}"

View File

@ -19,9 +19,13 @@
{{ item }}
-E 'output.logstash.enabled=false'
-E 'output.elasticsearch.hosts={{ coordination_nodes | to_json }}'
-E 'setup.template.enabled=true'
-E 'setup.template.overwrite=true'
-e -v
with_items:
- "--template"
- "--pipelines"
- "--machine-learning"
- "--dashboards"
register: templates
environment:

View File

@ -19,9 +19,13 @@
{{ item }}
-E 'output.logstash.enabled=false'
-E 'output.elasticsearch.hosts={{ coordination_nodes | to_json }}'
-E 'setup.template.enabled=true'
-E 'setup.template.overwrite=true'
-e -v
with_items:
- "--template"
- "--pipelines"
- "--machine-learning"
- "--dashboards"
register: templates
until: templates is success

View File

@ -27,9 +27,13 @@
# {{ item }}
# -E 'output.logstash.enabled=false'
# -E 'output.elasticsearch.hosts={{ coordination_nodes | to_json }}'
# -E 'setup.template.enabled=true'
# -E 'setup.template.overwrite=true'
# -e -v
# with_items:
# - "--template"
# - "--pipelines"
# - "--machine-learning"
# - "--dashboards"
# register: templates
# until: templates is success

View File

@ -26,11 +26,15 @@ elastic_log_rotate_path: "/var/log/logstash"
# Enable direct syslog input into logstash. When this is enabled syslog messages
# can be sent directly to logstash via TCP or UDP.
logstash_syslog_input_enabled: false
# The typical syslog port is 514 however that is not available to logstash
# because it's a "privledged" port. For this reason 1514 is used as the default.
# because it's a "privledged" port. For this reason 5140 is used as the default.
# Changing this port to 514 will require overrides to the service files making
# logstash run as root (not recommended).
logstash_syslog_input_port: 1514
logstash_syslog_input_port: 5140
# Protocol used when the syslog input is enabled. Modes are "tcp" or "udp".
logstash_syslog_input_mode: udp
logstash_beat_input_port: 5044
logstash_deploy_filters: true
@ -75,3 +79,10 @@ logstash_arcsight_smart_connectors: []
# - host: 127.0.0.1
# port: 5000
logstash_arcsight_event_brokers: []
## The logstash queue type can be set to "memory" or "persisted". If the queue
## type is set to memory a ramdisk will be created limiting the in memory queue
## to 50% of the JVM heap size. When this option is undefined the playbook will
## detect the media type where the queue will exist. If the media type is
## "rotational" in memory queues will be used.
# logstash_queue_type:

View File

@ -62,6 +62,69 @@
notify:
- Enable and restart logstash
- name: Check queue type
block:
- name: Get block device for logstash
command: findmnt -no SOURCE --target=/var/lib/logstash
changed_when: false
register: _logstash_block_device
- name: Set persisted queue fact
set_fact:
logstash_queue_type: "{{ ((ansible_devices[_logstash_block_device.stdout.split('/')[-1] | regex_replace('[0-9]$','')]['rotational'] | int) != 1) | ternary('persisted', 'memory') }}"
rescue:
- name: Set persisted queue fact (fallback)
set_fact:
logstash_queue_type: memory
when:
- logstash_queue_type is undefined
- name: Systemd memory backed queue block
block:
- name: Get logstash UID
command: id -u logstash
register: logstash_uid
changed_when: false
when:
- ansible_service_mgr == 'systemd'
- name: Get logstash GID
command: id -g logstash
register: logstash_gid
changed_when: false
when:
- ansible_service_mgr == 'systemd'
- name: Run the systemd mount role
include_role:
name: systemd_mount
private: true
vars:
systemd_mounts:
- what: "tmpfs"
where: "/var/lib/logstash/queue"
type: "tmpfs"
options: "size={{ (q_mem | int) // 2 }}m,uid={{ logstash_uid.stdout }},gid={{ logstash_gid.stdout }},nodev,nodiratime,noatime"
unit:
Before:
- logstash.service
state: 'started'
enabled: true
when:
- ansible_service_mgr == 'systemd'
- name: Apply fstab options for memory queues
mount:
path: /var/lib/logstash/queue
src: tmpfs
fstype: tmpfs
opts: size={{ (q_mem | int) // 2 }}m
state: mounted
when:
- ansible_service_mgr != 'systemd'
when:
- logstash_queue_type == 'memory'
- name: Create patterns directory
file:
name: "/opt/logstash/patterns"

View File

@ -1 +0,0 @@
../../../templates/systemd.general-overrides.conf.j2

View File

@ -1 +0,0 @@
../../../templates/logstash-pipelines.yml.j2

View File

@ -39,9 +39,10 @@ path.data: /var/lib/logstash
# This defaults to the number of the host's CPU cores.
#
{% set _h_processors = ((ansible_processor_count | int) // 2) %}
{% set _processors = ((_h_processors | int) > 0) | ternary(_h_processors, 1) %}
{% set processors = ((_processors | int) > 8) | ternary(8, _processors) %}
{% set _d_processors = ((ansible_processor_count | int) * 3) %}
{% set _processors = ((_d_processors | int) > 0) | ternary(_d_processors, 2) %}
{% set _t_processors = (_processors | int) + (ansible_processor_count | int) %}
{% set processors = ((_t_processors | int) > 64) | ternary(64, _t_processors) %}
pipeline.workers: {{ processors | int }}
#
# How many events to retrieve from inputs before sending to filters+workers
@ -51,7 +52,7 @@ pipeline.batch.size: 256
# How long to wait in milliseconds while polling for the next event
# before dispatching an undersized batch to filters+outputs
#
pipeline.batch.delay: 20
pipeline.batch.delay: 64
#
# Force Logstash to exit during shutdown even if there are still inflight
# events in memory. By default, logstash will refuse to quit until all
@ -155,7 +156,15 @@ queue.type: persisted
# whichever criteria is reached first
# Default is 1024mb or 1gb
#
{% if logstash_queue_type == 'memory' %}
# An in memory queue is being used. The actual size of the queue is 90% of the
# total memory limit, which is set using 50% of the heap size.
{% set _memory_queue_size = ((q_mem | int) // 2) %}
{% set _memory_queue_size_buffer = (((_memory_queue_size | int) * 0.1) | int) %}
queue.max_bytes: {{ (_memory_queue_size | int) - (_memory_queue_size_buffer | int) }}mb
{% else %}
queue.max_bytes: {{ logstash_queue_size }}mb
{% endif %}
#
# If using queue.type: persisted, the maximum number of acked events before forcing a checkpoint
# Default is 1024, 0 for unlimited
@ -231,7 +240,7 @@ xpack.monitoring.enabled: true
#xpack.monitoring.elasticsearch.ssl.verification_mode: certificate
#xpack.monitoring.elasticsearch.sniffing: false
xpack.monitoring.collection.interval: 30s
#xpack.monitoring.collection.pipeline.details.enabled: true
xpack.monitoring.collection.pipeline.details.enabled: true
#
# ------------ X-Pack Settings (not applicable for OSS build)--------------
# X-Pack Management

View File

@ -0,0 +1,2 @@
[Unit]
Requires = logstash-mem-queue.service

View File

@ -19,9 +19,13 @@
{{ item }}
-E 'output.logstash.enabled=false'
-E 'output.elasticsearch.hosts={{ coordination_nodes | to_json }}'
-E 'setup.template.enabled=true'
-E 'setup.template.overwrite=true'
-e -v
with_items:
- "--template"
- "--pipelines"
- "--machine-learning"
- "--dashboards"
register: templates
environment:

View File

@ -19,9 +19,13 @@
{{ item }}
-E 'output.logstash.enabled=false'
-E 'output.elasticsearch.hosts={{ coordination_nodes | to_json }}'
-E 'setup.template.enabled=true'
-E 'setup.template.overwrite=true'
-e -v
with_items:
- "--template"
- "--pipelines"
- "--machine-learning"
- "--dashboards"
register: templates
environment:

View File

@ -18,3 +18,13 @@ elastic_log_rotate_path: "/var/log/elasticsearch"
temp_dir: /var/lib/elasticsearch/tmp
nfs_query: "[?fstype=='nfs' || fstype=='nfs4']"
# Enable or Disable memory locking.
elastic_memory_lock: true
# Elasticsearch plugin list. These plugins will be re-installed whenever the
# playbooks are executed, which ensures the plugins are always upgraded.
elastic_plugins:
- ingest-attachment
- ingest-geoip
- ingest-user-agent

View File

@ -0,0 +1,26 @@
---
# Copyright 2018, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
- name: Remove plugins
command: "/usr/share/elasticsearch/bin/elasticsearch-plugin remove --verbose {{ item }}"
failed_when: false
changed_when:
- remove_plugin.rc == 0
register: remove_plugin
with_items: "{{ elastic_plugins }}"
- name: Install plugins
command: "/usr/share/elasticsearch/bin/elasticsearch-plugin install --batch --verbose {{ item }}"
with_items: "{{ elastic_plugins }}"

View File

@ -107,3 +107,5 @@
when:
- elastic_shared_fs_repos is defined
- (elastic_shared_fs_repos | json_query(nfs_query)) | length > 0
- include_tasks: "elasticsearch_plugins.yml"

View File

@ -3,6 +3,8 @@ cluster.name: {{ cluster_name }}
# ------------------------------------ Node ------------------------------------
node.name: {{ ansible_nodename }}
# node.rack: r1
# Set to true to enable machine learning on the node.
node.ml: false
# ----------------------------------- Paths ------------------------------------
# Path to directory where to store the data (separate multiple locations by comma):
#
@ -36,7 +38,7 @@ index.store.type: niofs
#
# Lock the memory on startup:
#
bootstrap.memory_lock: false
bootstrap.memory_lock: {{ elastic_memory_lock }}
#
# Make sure that the `ES_HEAP_SIZE` environment variable is set to about half the memory
# available on the system and that the owner of the process is allowed to use this limit.
@ -102,7 +104,7 @@ gateway.recover_after_nodes: {{ ((master_node_count | int) // 2) + 1 }}
#
# Require explicit names when deleting indices:
#
# action.destructive_requires_name: true
action.destructive_requires_name: true
{% set processors = ((elastic_thread_pool_size | int) > 0) | ternary(elastic_thread_pool_size, 1) %}
{% if not (elastic_coordination_node | default(false)) | bool %}
@ -140,3 +142,5 @@ indices.recovery.max_bytes_per_sec: {{ elasticserch_interface_speed }}mb
# https://www.elastic.co/guide/en/elasticsearch/reference/6.3/monitoring-settings.html
xpack.monitoring.collection.enabled: true
xpack.monitoring.collection.interval: 30s
# Set to true to enable machine learning on the node.
xpack.ml.enabled: false

View File

@ -4,3 +4,11 @@
ExecStart=
# This runs our ExecStart as an override.
ExecStart=/usr/share/elasticsearch/bin/elasticsearch -p ${PID_DIR}/elasticsearch.pid
{% if elastic_memory_lock | bool %}
# Limit memory usage
LimitMEMLOCK=infinity
{% endif %}
# Number of File Descriptors
LimitNOFILE=131070

View File

@ -128,7 +128,7 @@ output.logstash:
# The maximum number of events to bulk in a single Logstash request. The
# default is the number of cores multiplied by the number of threads,
# the resultant is then multiplied again by 256 which results in a the defined
# the resultant is then multiplied again by 128 which results in a the defined
# bulk max size. If the Beat sends single events, the events are collected
# into batches. If the Beat publishes a large batch of events (larger than
# the value specified by bulk_max_size), the batch is split. Specifying a
@ -139,7 +139,7 @@ output.logstash:
# less than or equal to 0 disables the splitting of batches. When splitting
# is disabled, the queue decides on the number of events to be contained in a
# batch.
bulk_max_size: {{ (processors | int) * 256 }}
bulk_max_size: {{ (processors | int) * 128 }}
{% if named_index is defined %}
# Optional index name. The default index name is set to {{ named_index }}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
---
# Option to define third memory
q_mem: "{{ (ansible_memtotal_mb | int) // 3 }}"
# Option to define quarter memory
q_mem: "{{ (ansible_memtotal_mb | int) // 4 }}"
# Option to define half memory
h_mem: "{{ (ansible_memtotal_mb | int) // 2 }}"
@ -12,7 +12,8 @@ apm_port: 8200
elastic_port: 9200
elastic_hap_port: 9201
logstash_beat_input_port: 5044
logstash_syslog_input_port: 1514
logstash_syslog_input_port: 5140
logstash_syslog_input_mode: udp
kibana_port: 5601
kibana_nginx_port: 81