updated htmlifier for safety and features

ensure that path of the file we're going to open is actually inside our log root. If not return a 400. escape dates in the regex so they are easier to pass around, unencoded spaces kind of suck for that. catch the possible IOError of openning a file that doesn't exist, be nice and give people a 404 in that case. move the content negotiation into a function, for readability Change-Id: I334e1ac0419cd140c6af35c78634a2d7c05dcf01
2013-07-24 20:04:26 -04:00 · 2013-07-24 20:04:26 -04:00 · c7a2bf51b7
commit c7a2bf51b7
parent fcc4dd4995
2 changed files with 59 additions and 17 deletions
--- a/modules/openstack_project/files/logs/htmlify-screen-log.py
+++ b/modules/openstack_project/files/logs/htmlify-screen-log.py
@ -17,8 +17,10 @@

 import cgi
 import fileinput
+import os.path
 import re
 import sys
+import urllib
 import wsgiref.util


@ -67,10 +69,15 @@ def escape_html(line):


 def link_timestamp(line):
-    return re.sub('^(?P<span><span[^>]*>)?(?P<date>%s)' % DATEFMT,
-                  ('\g<span><a name="\g<date>" class="date" '
-                   'href="#\g<date>">\g<date></a>'),
-                  line)
+    m = re.match(
+        '(?P<span><span[^>]*>)?(?P<date>%s)(?P<rest>.*)' % DATEFMT,
+        line)
+    if m:
+        date = urllib.quote(m.group('date'))
+        return "%s<a name='%s' class='date' href='#%s'>%s</a>%s\n" % (
+            m.group('span'), date, date, m.group('date'), m.group('rest'))
+    else:
+        return line


 def passthrough_filter(fname):
@ -104,20 +111,54 @@ def htmlify_stdin():
    out.write(_html_close())


-def application(environ, start_response):
-    status = '200 OK'
+def safe_path(root, environ):
+    """Pull out a save path from a url.
+
+    Basically we need to ensure that the final computed path
+    remains under the root path. If not, we return None to indicate
+    that we are very sad.
+    """
    path = wsgiref.util.request_uri(environ)
    match = re.search('htmlify/(.*)', path)
-    # TODO(sdague): scrub all .. chars out of the path, for security reasons
-    fname = "/srv/static/logs/%s" % match.groups(1)[0]
-    if 'HTTP_ACCEPT' in environ and 'text/html' in environ['HTTP_ACCEPT']:
-        response_headers = [('Content-type', 'text/html')]
-        start_response(status, response_headers)
-        return html_filter(fname)
+    raw = match.groups(1)[0]
+    newpath = os.path.abspath("%s/%s" % (root, raw))
+    if newpath.find(root) == 0:
+        return newpath
    else:
+        return None
+
+
+def should_be_html(environ):
+    """Simple content negotiation."""
+    return 'HTTP_ACCEPT' in environ and 'text/html' in environ['HTTP_ACCEPT']
+
+
+def application(environ, start_response):
+    status = '200 OK'
+
+    logpath = safe_path('/srv/static/logs/', environ)
+    if not logpath:
+        status = '400 Bad Request'
        response_headers = [('Content-type', 'text/plain')]
        start_response(status, response_headers)
-        return passthrough_filter(fname)
+        return ['Invalid file url']
+
+    try:
+        if should_be_html(environ):
+            response_headers = [('Content-type', 'text/html')]
+            generator = html_filter(logpath)
+            start_response(status, response_headers)
+            return generator
+        else:
+            response_headers = [('Content-type', 'text/plain')]
+            generator = passthrough_filter(logpath)
+            start_response(status, response_headers)
+            return generator
+    except IOError:
+        status = "404 Not Found"
+        response_headers = [('Content-type', 'text/plain')]
+        start_response(status, response_headers)
+        return ['File Not Found']


 # for development purposes, makes it easy to test the filter output
--- a/modules/openstack_project/templates/logs-dev.vhost.erb
+++ b/modules/openstack_project/templates/logs-dev.vhost.erb
@ -13,10 +13,6 @@ NameVirtualHost <%= vhost_name %>:<%= port %>
 <% end -%>
  DocumentRoot <%= docroot %>

-  RewriteEngine On
-  # rewrite all txt.gz files to map to our internal htmlify wsgi app
-  RewriteRule ^/(.*\.txt\.gz)$ /htmlify/$1 [QSA,L,PT]
-  WSGIScriptAlias /htmlify /usr/local/bin/htmlify-screen-log.py
  # use Apache to compress the results afterwards, to save on the wire
  # it's approx 18x savings of wire traffic to compress. We need to
  # compress by content types that htmlify can produce
@ -40,6 +36,11 @@ NameVirtualHost <%= vhost_name %>:<%= port %>
     ReadmeName /help/tempest-logs.html
  </Directory>

+  RewriteEngine On
+  # rewrite all txt.gz files to map to our internal htmlify wsgi app
+  RewriteRule ^/(.*\.txt\.gz)$ /htmlify/$1 [QSA,L,PT]
+  WSGIScriptAlias /htmlify /usr/local/bin/htmlify-screen-log.py
+
  ErrorLog /var/log/apache2/<%= name %>_error.log
  LogLevel warn
  CustomLog /var/log/apache2/<%= name %>_access.log combined