From 1f07114346d4bb9bb1a016c8eede38480ab1a60e Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Wed, 16 Oct 2013 11:34:47 -0700 Subject: [PATCH] Upgrade Logstash to 1.2.1. * modules/logstash/manifests/init.pp: Download and install Logstash 1.2.1. * modules/openstack_project/files/logstash/log-gearman-client.py: Logstash 1.2.1 comes with a new schema. Update the job data sent to log push workers to better accomodate the new schema. * modules/openstack_project/files/logstash/log-gearman-worker.py: Push Logstash 1.2.1 schema compliant JSON to the Logstash TCP input. * modules/openstack_project/templates/logstash/indexer.conf.erb: Logstash 1.2.1 comes with a new schema and many input and filter changes. Use the newly supported features like conditionals to keep the config up to date. * modules/kibana/templates/config.rb.erb: Change the default field for kibana to 'message'. It was @message which is deprecated in the new logstash schema. Change-Id: Id19fc05bcce8d42c5c0cf33df3da7e95f5794107 --- modules/kibana/templates/config.rb.erb | 8 +- modules/logstash/manifests/init.pp | 10 +- .../files/logstash/log-gearman-client.py | 4 +- .../files/logstash/log-gearman-worker.py | 13 +- .../templates/logstash/indexer.conf.erb | 204 ++++++++---------- 5 files changed, 108 insertions(+), 131 deletions(-) diff --git a/modules/kibana/templates/config.rb.erb b/modules/kibana/templates/config.rb.erb index b59843e28a..2917ca98aa 100755 --- a/modules/kibana/templates/config.rb.erb +++ b/modules/kibana/templates/config.rb.erb @@ -40,7 +40,7 @@ module KibanaConfig # Change which fields are shown by default. Must be set as an array # Default_fields = ['@fields.vhost','@fields.response','@fields.request'] - Default_fields = ['@message'] + Default_fields = ['message'] # If set to true, Kibana will use the Highlight feature of Elasticsearch to # display highlighted search results @@ -49,7 +49,7 @@ module KibanaConfig # A field needs to be specified for the highlight feature. By default, # Elasticsearch doesn't allow highlighting on _all because the field has to # be either stored or part of the _source field. - Highlighted_field = "@message" + Highlighted_field = "message" # Make URLs clickable in detailed view Clickable_URLs = true @@ -122,10 +122,10 @@ module KibanaConfig # field called _all that is searched when no field is specified. # Dropping _all can reduce index size significantly. If you do that # you'll need to change primary_field to be '@message' - Primary_field = '@message' + Primary_field = 'message' # Default Elastic Search index to query - Default_index = '@message' + Default_index = 'message' # TODO: This isn't functional yet # Prevent wildcard search terms which result in extremely slow queries diff --git a/modules/logstash/manifests/init.pp b/modules/logstash/manifests/init.pp index 9496b92e7c..08513299ad 100644 --- a/modules/logstash/manifests/init.pp +++ b/modules/logstash/manifests/init.pp @@ -38,13 +38,13 @@ class logstash { } exec { 'get_logstash_jar': - command => 'wget http://logstash.objects.dreamhost.com/release/logstash-1.1.12-monolithic.jar -O /opt/logstash/logstash-1.1.12-monolithic.jar', + command => 'wget https://download.elasticsearch.org/logstash/logstash/logstash-1.2.1-flatjar.jar -O /opt/logstash/logstash-1.2.1-flatjar.jar', path => '/bin:/usr/bin', - creates => '/opt/logstash/logstash-1.1.12-monolithic.jar', + creates => '/opt/logstash/logstash-1.2.1-flatjar.jar', require => File['/opt/logstash'], } - file { '/opt/logstash/logstash-1.1.12-monolithic.jar': + file { '/opt/logstash/logstash-1.2.1-flatjar.jar': ensure => present, owner => 'logstash', group => 'logstash', @@ -57,8 +57,8 @@ class logstash { file { '/opt/logstash/logstash.jar': ensure => link, - target => '/opt/logstash/logstash-1.1.12-monolithic.jar', - require => File['/opt/logstash/logstash-1.1.12-monolithic.jar'], + target => '/opt/logstash/logstash-1.2.1-flatjar.jar', + require => File['/opt/logstash/logstash-1.2.1-flatjar.jar'], } file { '/var/log/logstash': diff --git a/modules/openstack_project/files/logstash/log-gearman-client.py b/modules/openstack_project/files/logstash/log-gearman-client.py index 48ca04ce88..33bf208ab0 100644 --- a/modules/openstack_project/files/logstash/log-gearman-client.py +++ b/modules/openstack_project/files/logstash/log-gearman-client.py @@ -106,8 +106,8 @@ class EventProcessor(threading.Thread): os.path.join(log_dir, fileopts['name']) fields["log_url"] = source_url out_event = {} - out_event["@fields"] = fields - out_event["@tags"] = [fileopts['name']] + fileopts.get('tags', []) + out_event["fields"] = fields + out_event["tags"] = [fileopts['name']] + fileopts.get('tags', []) return source_url, out_event diff --git a/modules/openstack_project/files/logstash/log-gearman-worker.py b/modules/openstack_project/files/logstash/log-gearman-worker.py index a0f43d0f56..297c29ba68 100644 --- a/modules/openstack_project/files/logstash/log-gearman-worker.py +++ b/modules/openstack_project/files/logstash/log-gearman-worker.py @@ -69,19 +69,20 @@ class LogRetriever(threading.Thread): retry = arguments['retry'] event = arguments['event'] logging.debug("Handling event: " + json.dumps(event)) - fields = event['@fields'] - tags = event['@tags'] + fields = event.get('fields') or event.get('@fields') + tags = event.get('tags') or event.get('@tags') if fields['build_status'] != 'ABORTED': # Handle events ignoring aborted builds. These builds are # discarded by zuul. log_lines = self._retrieve_log(source_url, retry) logging.debug("Pushing " + str(len(log_lines)) + " log lines.") + base_event = {} + base_event.update(fields) + base_event["tags"] = tags for line in log_lines: - out_event = {} - out_event["@fields"] = fields - out_event["@tags"] = tags - out_event["event_message"] = line + out_event = base_event.copy() + out_event["message"] = line self.logq.put(out_event) job.sendWorkComplete() except Exception as e: diff --git a/modules/openstack_project/templates/logstash/indexer.conf.erb b/modules/openstack_project/templates/logstash/indexer.conf.erb index fec7b5d485..004678def0 100644 --- a/modules/openstack_project/templates/logstash/indexer.conf.erb +++ b/modules/openstack_project/templates/logstash/indexer.conf.erb @@ -2,132 +2,108 @@ input { tcp { host => "localhost" port => 9999 - format => "json" - message_format => "%{event_message}" + codec => line {} type => "jenkins" } } # You can check grok patterns at http://grokdebug.herokuapp.com/ filter { - grep { - # Remove unneeded html tags. - type => "jenkins" - tags => ["console.html"] - # Drop matches. - negate => true - match => ["@message", "^$"] + # This is a work around for a bug. We should be able to set the tcp + # input codec to json, but that codec doesn't support streaming. + # Convert to json here instead. + json { + source => "message" } - grep { - # Remove screen log headers. - type => "jenkins" - tags => ["screen"] - # Drop matches. - negate => true - match => ["@message", "^\+ "] + if "screen" in [tags] and [message] =~ "^\+ " { + drop {} } - grep { - # Remove blank lines. - type => "jenkins" - tags => ["keystonefmt"] - # Drop matches. - negate => true - match => ["@message", "^$"] - } - multiline { - type => "jenkins" - tags => ["console.html"] - negate => true - pattern => "^%{DATESTAMP} \|" - what => "previous" - stream_identity => "%{@source_host}.%{filename}" - } - multiline { - type => "jenkins" - tags => ["oslofmt"] - negate => true - pattern => "^%{DATESTAMP} " - what => "previous" - stream_identity => "%{@source_host}.%{filename}" - } - multiline { - type => "jenkins" - tags => ["oslofmt"] - negate => false - pattern => "^%{DATESTAMP}%{SPACE}%{NUMBER}?%{SPACE}?TRACE" - what => "previous" - stream_identity => "%{@source_host}.%{filename}" - } - multiline { - type => "jenkins" - tags => ["keystonefmt"] - negate => true - pattern => "^\(\b%{NOTSPACE}\b\):" - what => "previous" - stream_identity => "%{@source_host}.%{filename}" - } - grok { - type => "jenkins" - tags => ["console.html"] - # Do multiline matching as the above mutliline filter may add newlines - # to the log messages. - pattern => [ "(?m)^%{DATESTAMP:logdate} \| %{GREEDYDATA:logmessage}" ] - add_field => [ "received_at", "%{@timestamp}" ] - } - grok { - type => "jenkins" - tags => ["oslofmt"] - # Do multiline matching as the above mutliline filter may add newlines - # to the log messages. - # TODO move the LOGLEVELs into a proper grok pattern. - pattern => [ "(?m)^%{DATESTAMP:logdate}%{SPACE}%{NUMBER:pid}?%{SPACE}?(?AUDIT|CRITICAL|DEBUG|INFO|TRACE|WARNING|ERROR) \[?\b%{NOTSPACE:module}\b\]?%{SPACE}?%{GREEDYDATA:logmessage}?" ] - add_field => [ "received_at", "%{@timestamp}" ] - } - grok { - type => "jenkins" - tags => ["keystonefmt"] - # Do multiline matching as the above mutliline filter may add newlines - # to the log messages. - # TODO move the LOGLEVELs into a proper grok pattern. - pattern => [ "(?m)^\(\b%{NOTSPACE:module}\b\):%{SPACE}%{DATESTAMP:logdate}%{SPACE}(?AUDIT|CRITICAL|DEBUG|INFO|TRACE|WARNING|ERROR)%{SPACE}%{GREEDYDATA:logmessage}" ] - add_field => [ "received_at", "%{@timestamp}" ] - } - grok { - type => "jenkins" - tags => ["apachecombined"] - pattern => [ "%{COMBINEDAPACHELOG}" ] - add_field => [ "received_at", "%{@timestamp}", "logdate", "%{timestamp}", "logmessage", "%{verb} %{request} %{response}" ] - } - grok { - type => "jenkins" - tags => ["syslog"] - # Syslog grok filter adapted from - # http://cookbook.logstash.net/recipes/syslog-pri/syslog.conf - pattern => [ "%{SYSLOGTIMESTAMP:logdate}%{SPACE}%{SYSLOGHOST:syslog_host}?%{SPACE}%{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?:? %{GREEDYDATA:logmessage}" ] - add_field => [ "received_at", "%{@timestamp}" ] - } - # Remove DEBUG logs to reduce the amount of data that needs to be processed. - grep { - type => "jenkins" - negate => true - match => [ "loglevel", "DEBUG" ] + if "console.html" in [tags] { + if [message] == "
" or [message] == "
" { + drop {} + } + multiline { + negate => true + pattern => "^%{TIMESTAMP_ISO8601} \|" + what => "previous" + stream_identity => "%{host}.%{filename}" + } + grok { + # Do multiline matching as the above mutliline filter may add newlines + # to the log messages. + match => { "message" => "(?m)^%{TIMESTAMP_ISO8601:logdate} \| %{GREEDYDATA:logmessage}" } + add_field => { "received_at" => "%{@timestamp}" } + } + } else if "oslofmt" in [tags] { + multiline { + negate => true + pattern => "^%{TIMESTAMP_ISO8601} " + what => "previous" + stream_identity => "%{host}.%{filename}" + } + multiline { + negate => false + pattern => "^%{TIMESTAMP_ISO8601}%{SPACE}%{NUMBER}?%{SPACE}?TRACE" + what => "previous" + stream_identity => "%{host}.%{filename}" + } + grok { + # Do multiline matching as the above mutliline filter may add newlines + # to the log messages. + # TODO move the LOGLEVELs into a proper grok pattern. + match => { "message" => "(?m)^%{TIMESTAMP_ISO8601:logdate}%{SPACE}%{NUMBER:pid}?%{SPACE}?(?AUDIT|CRITICAL|DEBUG|INFO|TRACE|WARNING|ERROR) \[?\b%{NOTSPACE:module}\b\]?%{SPACE}?%{GREEDYDATA:logmessage}?" } + add_field => { "received_at" => "%{@timestamp}" } + } + } else if "keystonefmt" in [tags] { + if [message] == "" { + drop {} + } + multiline { + negate => true + pattern => "^\(\b%{NOTSPACE}\b\):" + what => "previous" + stream_identity => "%{host}.%{filename}" + } + grok { + # Do multiline matching as the above mutliline filter may add newlines + # to the log messages. + # TODO move the LOGLEVELs into a proper grok pattern. + match => { "message" => "(?m)^\(\b%{NOTSPACE:module}\b\):%{SPACE}%{TIMESTAMP_ISO8601:logdate}%{SPACE}(?AUDIT|CRITICAL|DEBUG|INFO|TRACE|WARNING|ERROR)%{SPACE}%{GREEDYDATA:logmessage}" } + add_field => { "received_at" => "%{@timestamp}" } + } + } else if "apachecombined" in [tags] { + grok { + match => { "message" => "%{COMBINEDAPACHELOG}" } + add_field => { "received_at" => "%{@timestamp}" } + add_field => { "logdate" => "%{timestamp}" } + add_field => { "logmessage" => "%{verb} %{request} %{response}" } + } + } else if "syslog" in [tags] { + grok { + # Syslog grok filter adapted from + # http://cookbook.logstash.net/recipes/syslog-pri/syslog.conf + match => { "message" => "%{SYSLOGTIMESTAMP:logdate}%{SPACE}%{SYSLOGHOST:syslog_host}?%{SPACE}%{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?:? %{GREEDYDATA:logmessage}" } + add_field => { "received_at" => "%{@timestamp}" } + } } # Filters below here should be consistent for all Jenkins log formats. - date { - type => "jenkins" - exclude_tags => "_grokparsefailure" - match => [ "logdate", "yyyy-MM-dd HH:mm:ss.SSS", "yyyy-MM-dd HH:mm:ss,SSS", "yyyy-MM-dd HH:mm:ss", "MMM d HH:mm:ss", "MMM dd HH:mm:ss", "dd/MMM/yyyy:HH:mm:ss Z" ] + # Remove DEBUG logs to reduce the amount of data that needs to be processed. + if [loglevel] == "DEBUG" { + drop {} } - mutate { - type => "jenkins" - exclude_tags => "_grokparsefailure" - replace => [ "@message", "%{logmessage}" ] - } - mutate { - type => "jenkins" - exclude_tags => "_grokparsefailure" - remove => [ "logdate", "logmessage", "event_message" ] + + if ! ("_grokparsefailure" in [tags]) { + date { + match => [ "logdate", "yyyy-MM-dd HH:mm:ss.SSS", "yyyy-MM-dd HH:mm:ss,SSS", "yyyy-MM-dd HH:mm:ss", "MMM d HH:mm:ss", "MMM dd HH:mm:ss", "dd/MMM/yyyy:HH:mm:ss Z" ] + timezone => "UTC" + } + mutate { + replace => { "message" => "%{logmessage}" } + } + mutate { + remove_field => [ "logdate", "logmessage" ] + } } }