updated htmlifier for safety and features

ensure that path of the file we're going to open is actually inside our log root. If not return a 400. escape dates in the regex so they are easier to pass around, unencoded spaces kind of suck for that. catch the possible IOError of openning a file that doesn't exist, be nice and give people a 404 in that case. move the content negotiation into a function, for readability Change-Id: I334e1ac0419cd140c6af35c78634a2d7c05dcf01
2013-07-24 20:04:26 -04:00 · 2013-07-24 20:04:26 -04:00 · c7a2bf51b7
commit c7a2bf51b7
parent fcc4dd4995
2 changed files with 59 additions and 17 deletions
--- a/modules/openstack_project/files/logs/htmlify-screen-log.py
+++ b/modules/openstack_project/files/logs/htmlify-screen-log.py
@ -17,8 +17,10 @@
 import cgi
 import fileinput
 import os.path
 import re
 import sys
 import urllib
 import wsgiref.util
@ -67,10 +69,15 @@ def escape_html(line):
 def link_timestamp(line):
-    return re.sub('^(?P<span><span[^>]*>)?(?P<date>%s)' % DATEFMT,
+    m = re.match(
-                  ('\g<span><a name="\g<date>" class="date" '
+        '(?P<span><span[^>]*>)?(?P<date>%s)(?P<rest>.*)' % DATEFMT,
-                   'href="#\g<date>">\g<date></a>'),
+        line)
-                  line)
+    if m:
        date = urllib.quote(m.group('date'))
        return "%s<a name='%s' class='date' href='#%s'>%s</a>%s\n" % (
            m.group('span'), date, date, m.group('date'), m.group('rest'))
    else:
        return line
 def passthrough_filter(fname):
@ -104,20 +111,54 @@ def htmlify_stdin():
    out.write(_html_close())
-def application(environ, start_response):
+def safe_path(root, environ):
-    status = '200 OK'
+    """Pull out a save path from a url.
    Basically we need to ensure that the final computed path
    remains under the root path. If not, we return None to indicate
    that we are very sad.
    """
    path = wsgiref.util.request_uri(environ)
    match = re.search('htmlify/(.*)', path)
-    # TODO(sdague): scrub all .. chars out of the path, for security reasons
+    raw = match.groups(1)[0]
-    fname = "/srv/static/logs/%s" % match.groups(1)[0]
+    newpath = os.path.abspath("%s/%s" % (root, raw))
-    if 'HTTP_ACCEPT' in environ and 'text/html' in environ['HTTP_ACCEPT']:
+    if newpath.find(root) == 0:
-        response_headers = [('Content-type', 'text/html')]
+        return newpath
        start_response(status, response_headers)
        return html_filter(fname)
    else:
        return None
 def should_be_html(environ):
    """Simple content negotiation."""
    return 'HTTP_ACCEPT' in environ and 'text/html' in environ['HTTP_ACCEPT']
 def application(environ, start_response):
    status = '200 OK'
    logpath = safe_path('/srv/static/logs/', environ)
    if not logpath:
        status = '400 Bad Request'
        response_headers = [('Content-type', 'text/plain')]
        start_response(status, response_headers)
-        return passthrough_filter(fname)
+        return ['Invalid file url']
    try:
        if should_be_html(environ):
            response_headers = [('Content-type', 'text/html')]
            generator = html_filter(logpath)
            start_response(status, response_headers)
            return generator
        else:
            response_headers = [('Content-type', 'text/plain')]
            generator = passthrough_filter(logpath)
            start_response(status, response_headers)
            return generator
    except IOError:
        status = "404 Not Found"
        response_headers = [('Content-type', 'text/plain')]
        start_response(status, response_headers)
        return ['File Not Found']
 # for development purposes, makes it easy to test the filter output
--- a/modules/openstack_project/templates/logs-dev.vhost.erb
+++ b/modules/openstack_project/templates/logs-dev.vhost.erb
@ -13,10 +13,6 @@ NameVirtualHost <%= vhost_name %>:<%= port %>
 <% end -%>
  DocumentRoot <%= docroot %>
  RewriteEngine On
  # rewrite all txt.gz files to map to our internal htmlify wsgi app
  RewriteRule ^/(.*\.txt\.gz)$ /htmlify/$1 [QSA,L,PT]
  WSGIScriptAlias /htmlify /usr/local/bin/htmlify-screen-log.py
  # use Apache to compress the results afterwards, to save on the wire
  # it's approx 18x savings of wire traffic to compress. We need to
  # compress by content types that htmlify can produce
@ -40,6 +36,11 @@ NameVirtualHost <%= vhost_name %>:<%= port %>
     ReadmeName /help/tempest-logs.html
  </Directory>
  RewriteEngine On
  # rewrite all txt.gz files to map to our internal htmlify wsgi app
  RewriteRule ^/(.*\.txt\.gz)$ /htmlify/$1 [QSA,L,PT]
  WSGIScriptAlias /htmlify /usr/local/bin/htmlify-screen-log.py
  ErrorLog /var/log/apache2/<%= name %>_error.log
  LogLevel warn
  CustomLog /var/log/apache2/<%= name %>_access.log combined