updated htmlifier for safety and features
ensure that path of the file we're going to open is actually inside our log root. If not return a 400. escape dates in the regex so they are easier to pass around, unencoded spaces kind of suck for that. catch the possible IOError of openning a file that doesn't exist, be nice and give people a 404 in that case. move the content negotiation into a function, for readability Change-Id: I334e1ac0419cd140c6af35c78634a2d7c05dcf01
This commit is contained in:
parent
fcc4dd4995
commit
c7a2bf51b7
@ -17,8 +17,10 @@
|
||||
|
||||
import cgi
|
||||
import fileinput
|
||||
import os.path
|
||||
import re
|
||||
import sys
|
||||
import urllib
|
||||
import wsgiref.util
|
||||
|
||||
|
||||
@ -67,10 +69,15 @@ def escape_html(line):
|
||||
|
||||
|
||||
def link_timestamp(line):
|
||||
return re.sub('^(?P<span><span[^>]*>)?(?P<date>%s)' % DATEFMT,
|
||||
('\g<span><a name="\g<date>" class="date" '
|
||||
'href="#\g<date>">\g<date></a>'),
|
||||
line)
|
||||
m = re.match(
|
||||
'(?P<span><span[^>]*>)?(?P<date>%s)(?P<rest>.*)' % DATEFMT,
|
||||
line)
|
||||
if m:
|
||||
date = urllib.quote(m.group('date'))
|
||||
return "%s<a name='%s' class='date' href='#%s'>%s</a>%s\n" % (
|
||||
m.group('span'), date, date, m.group('date'), m.group('rest'))
|
||||
else:
|
||||
return line
|
||||
|
||||
|
||||
def passthrough_filter(fname):
|
||||
@ -104,20 +111,54 @@ def htmlify_stdin():
|
||||
out.write(_html_close())
|
||||
|
||||
|
||||
def application(environ, start_response):
|
||||
status = '200 OK'
|
||||
def safe_path(root, environ):
|
||||
"""Pull out a save path from a url.
|
||||
|
||||
Basically we need to ensure that the final computed path
|
||||
remains under the root path. If not, we return None to indicate
|
||||
that we are very sad.
|
||||
"""
|
||||
path = wsgiref.util.request_uri(environ)
|
||||
match = re.search('htmlify/(.*)', path)
|
||||
# TODO(sdague): scrub all .. chars out of the path, for security reasons
|
||||
fname = "/srv/static/logs/%s" % match.groups(1)[0]
|
||||
if 'HTTP_ACCEPT' in environ and 'text/html' in environ['HTTP_ACCEPT']:
|
||||
response_headers = [('Content-type', 'text/html')]
|
||||
start_response(status, response_headers)
|
||||
return html_filter(fname)
|
||||
raw = match.groups(1)[0]
|
||||
newpath = os.path.abspath("%s/%s" % (root, raw))
|
||||
if newpath.find(root) == 0:
|
||||
return newpath
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def should_be_html(environ):
|
||||
"""Simple content negotiation."""
|
||||
return 'HTTP_ACCEPT' in environ and 'text/html' in environ['HTTP_ACCEPT']
|
||||
|
||||
|
||||
def application(environ, start_response):
|
||||
status = '200 OK'
|
||||
|
||||
logpath = safe_path('/srv/static/logs/', environ)
|
||||
if not logpath:
|
||||
status = '400 Bad Request'
|
||||
response_headers = [('Content-type', 'text/plain')]
|
||||
start_response(status, response_headers)
|
||||
return passthrough_filter(fname)
|
||||
return ['Invalid file url']
|
||||
|
||||
try:
|
||||
if should_be_html(environ):
|
||||
response_headers = [('Content-type', 'text/html')]
|
||||
generator = html_filter(logpath)
|
||||
start_response(status, response_headers)
|
||||
return generator
|
||||
else:
|
||||
response_headers = [('Content-type', 'text/plain')]
|
||||
generator = passthrough_filter(logpath)
|
||||
start_response(status, response_headers)
|
||||
return generator
|
||||
except IOError:
|
||||
status = "404 Not Found"
|
||||
response_headers = [('Content-type', 'text/plain')]
|
||||
start_response(status, response_headers)
|
||||
return ['File Not Found']
|
||||
|
||||
|
||||
# for development purposes, makes it easy to test the filter output
|
||||
|
@ -13,10 +13,6 @@ NameVirtualHost <%= vhost_name %>:<%= port %>
|
||||
<% end -%>
|
||||
DocumentRoot <%= docroot %>
|
||||
|
||||
RewriteEngine On
|
||||
# rewrite all txt.gz files to map to our internal htmlify wsgi app
|
||||
RewriteRule ^/(.*\.txt\.gz)$ /htmlify/$1 [QSA,L,PT]
|
||||
WSGIScriptAlias /htmlify /usr/local/bin/htmlify-screen-log.py
|
||||
# use Apache to compress the results afterwards, to save on the wire
|
||||
# it's approx 18x savings of wire traffic to compress. We need to
|
||||
# compress by content types that htmlify can produce
|
||||
@ -40,6 +36,11 @@ NameVirtualHost <%= vhost_name %>:<%= port %>
|
||||
ReadmeName /help/tempest-logs.html
|
||||
</Directory>
|
||||
|
||||
RewriteEngine On
|
||||
# rewrite all txt.gz files to map to our internal htmlify wsgi app
|
||||
RewriteRule ^/(.*\.txt\.gz)$ /htmlify/$1 [QSA,L,PT]
|
||||
WSGIScriptAlias /htmlify /usr/local/bin/htmlify-screen-log.py
|
||||
|
||||
ErrorLog /var/log/apache2/<%= name %>_error.log
|
||||
LogLevel warn
|
||||
CustomLog /var/log/apache2/<%= name %>_access.log combined
|
||||
|
Loading…
x
Reference in New Issue
Block a user