From fcc4dd4995dd990d7f9d0e2b52604346fd15bb33 Mon Sep 17 00:00:00 2001 From: Sean Dague Date: Mon, 15 Jul 2013 17:25:32 -0400 Subject: [PATCH] add utility to htmlify screen logs for devstack runs this adds a wsgi application which processes all the txt.gz log files and htmlifies them on the fly before sending them to the user. It uses content negotiation to only do this if the client says it Accepts text/html, otherwise it just dumps them back out as text/plain. In either case the content is compressed with mod_deflate, which provides a 15x-20x bandwidth savings vs. if we left it uncompressed on the wire. Should get more testing before general deployment, but this is getting close to the approach I think we want to use. This enables the functionality on a new logs-dev.openstack.org vhost so that we can do live testing that this works before switching over the main site to it. Change-Id: I12a06c4b55be05fb8060b76f5aea9ebc801c0537 --- .../files/logs/htmlify-screen-log.py | 125 ++++++++++++++++++ modules/openstack_project/manifests/static.pp | 17 +++ .../templates/logs-dev.vhost.erb | 47 +++++++ 3 files changed, 189 insertions(+) create mode 100755 modules/openstack_project/files/logs/htmlify-screen-log.py create mode 100644 modules/openstack_project/templates/logs-dev.vhost.erb diff --git a/modules/openstack_project/files/logs/htmlify-screen-log.py b/modules/openstack_project/files/logs/htmlify-screen-log.py new file mode 100755 index 0000000000..c1925bab26 --- /dev/null +++ b/modules/openstack_project/files/logs/htmlify-screen-log.py @@ -0,0 +1,125 @@ +#!/usr/bin/python +# +# Copyright (c) 2013 IBM Corp. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import cgi +import fileinput +import re +import sys +import wsgiref.util + + +DATEFMT = '\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(\.\d{3})?' +STATUSFMT = '(DEBUG|INFO|WARN|ERROR|TRACE|AUDIT)' +LOGMATCH = '(?P%s)(?P \d+)? (?P%s)' % (DATEFMT, STATUSFMT) + + +def _html_close(): + return ("\n") + + +def _css_preamble(): + """Write a valid html start with css that we need.""" + return (""" + + +
\n""")
+
+
+def color_by_sev(line):
+    """Wrap a line in a span whose class matches it's severity."""
+    m = re.match(LOGMATCH, line)
+    if m:
+        return "%s" % (m.group('status'), line)
+    else:
+        return line
+
+
+def escape_html(line):
+    """Escape the html in a line.
+
+    We need to do this because we dump xml into the logs, and if we don't
+    escape the xml we end up with invisible parts of the logs in turning it
+    into html.
+    """
+    return cgi.escape(line)
+
+
+def link_timestamp(line):
+    return re.sub('^(?P]*>)?(?P%s)' % DATEFMT,
+                  ('\g\g'),
+                  line)
+
+
+def passthrough_filter(fname):
+    for line in fileinput.input(fname, openhook=fileinput.hook_compressed):
+        yield line
+
+
+def html_filter(fname):
+    """Generator to read logs and output html in a stream.
+
+    This produces a stream of the htmlified logs which lets us return
+    data quickly to the user, and use minimal memory in the process.
+    """
+    yield _css_preamble()
+    for line in fileinput.input(fname, openhook=fileinput.hook_compressed):
+        newline = escape_html(line)
+        newline = color_by_sev(newline)
+        newline = link_timestamp(newline)
+        yield newline
+    yield _html_close()
+
+
+def htmlify_stdin():
+    out = sys.stdout
+    out.write(_css_preamble())
+    for line in fileinput.input():
+        newline = escape_html(line)
+        newline = color_by_sev(newline)
+        newline = link_timestamp(newline)
+        out.write(newline)
+    out.write(_html_close())
+
+
+def application(environ, start_response):
+    status = '200 OK'
+    path = wsgiref.util.request_uri(environ)
+    match = re.search('htmlify/(.*)', path)
+    # TODO(sdague): scrub all .. chars out of the path, for security reasons
+    fname = "/srv/static/logs/%s" % match.groups(1)[0]
+    if 'HTTP_ACCEPT' in environ and 'text/html' in environ['HTTP_ACCEPT']:
+        response_headers = [('Content-type', 'text/html')]
+        start_response(status, response_headers)
+        return html_filter(fname)
+    else:
+        response_headers = [('Content-type', 'text/plain')]
+        start_response(status, response_headers)
+        return passthrough_filter(fname)
+
+
+# for development purposes, makes it easy to test the filter output
+if __name__ == "__main__":
+    htmlify_stdin()
diff --git a/modules/openstack_project/manifests/static.pp b/modules/openstack_project/manifests/static.pp
index 2aeebd3e1f..ea54c4173c 100644
--- a/modules/openstack_project/manifests/static.pp
+++ b/modules/openstack_project/manifests/static.pp
@@ -21,6 +21,7 @@ class openstack_project::static (
   }
 
   include apache
+  include apache::mod::wsgi
 
   a2mod { 'rewrite':
     ensure => present,
@@ -81,6 +82,14 @@ class openstack_project::static (
     template => 'openstack_project/logs.vhost.erb',
   }
 
+  apache::vhost { 'logs-dev.openstack.org':
+    port     => 80,
+    priority => '51',
+    docroot  => '/srv/static/logs',
+    require  => File['/srv/static/logs'],
+    template => 'openstack_project/logs-dev.vhost.erb',
+  }
+
   file { '/srv/static/logs':
     ensure  => directory,
     owner   => 'jenkins',
@@ -97,6 +106,14 @@ class openstack_project::static (
     require => File['/srv/static/logs'],
   }
 
+  file { '/usr/local/bin/htmlify-screen-log.py':
+    ensure  => present,
+    owner   => 'root',
+    group   => 'root',
+    mode    => '0755',
+    source  => 'puppet:///modules/openstack_project/logs/htmlify-screen-log.py',
+  }
+
   file { '/srv/static/logs/help':
     ensure  => directory,
     recurse => true,
diff --git a/modules/openstack_project/templates/logs-dev.vhost.erb b/modules/openstack_project/templates/logs-dev.vhost.erb
new file mode 100644
index 0000000000..606c5ca5e8
--- /dev/null
+++ b/modules/openstack_project/templates/logs-dev.vhost.erb
@@ -0,0 +1,47 @@
+# -*- apache -*-
+# ************************************
+# Managed by Puppet
+# ************************************
+
+NameVirtualHost <%= vhost_name %>:<%= port %>
+:<%= port %>>
+  ServerName <%= srvname %>
+<% if serveraliases.is_a? Array -%>
+<% serveraliases.each do |name| -%><%= " ServerAlias #{name}\n" %><% end -%>
+<% elsif serveraliases != '' -%>
+<%= " ServerAlias #{serveraliases}" %>
+<% end -%>
+  DocumentRoot <%= docroot %>
+
+  RewriteEngine On
+  # rewrite all txt.gz files to map to our internal htmlify wsgi app
+  RewriteRule ^/(.*\.txt\.gz)$ /htmlify/$1 [QSA,L,PT]
+  WSGIScriptAlias /htmlify /usr/local/bin/htmlify-screen-log.py
+  # use Apache to compress the results afterwards, to save on the wire
+  # it's approx 18x savings of wire traffic to compress. We need to
+  # compress by content types that htmlify can produce
+  AddOutputFilterByType DEFLATE text/plain text/html
+
+  
+    ForceType text/html
+    AddDefaultCharset UTF-8
+    AddEncoding x-gzip gz
+  
+  >
+    Options <%= options %>
+    AllowOverride None
+    Order allow,deny
+    allow from all
+  
+  /*/*/*/gate-tempest-devstack*/*>
+     ReadmeName /help/tempest-overview.html
+  
+  /*/*/*/gate-tempest-devstack*/*/logs/>
+     ReadmeName /help/tempest-logs.html
+  
+
+  ErrorLog /var/log/apache2/<%= name %>_error.log
+  LogLevel warn
+  CustomLog /var/log/apache2/<%= name %>_access.log combined
+  ServerSignature Off
+