diff --git a/doc/source/misc.rst b/doc/source/misc.rst index 5fae5f9a4c..bb856d2fc4 100644 --- a/doc/source/misc.rst +++ b/doc/source/misc.rst @@ -128,3 +128,9 @@ Swift3 :members: :show-inheritance: +StaticWeb +========= + +.. automodule:: swift.common.middleware.staticweb + :members: + :show-inheritance: diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index 457aa42cf3..07f9d496f3 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -150,3 +150,10 @@ use = egg:swift#cname_lookup # set log_headers = False # storage_domain = example.com # lookup_depth = 1 + +# Note: Put staticweb just after your auth filter(s) in the pipeline +[filter:staticweb] +use = egg:swift#staticweb +# Seconds to cache container x-container-meta-index, x-container-meta-error, +# and x-container-listing-css header values. +# cache_timeout = 300 diff --git a/swift/common/middleware/staticweb.py b/swift/common/middleware/staticweb.py index 319d6a28f4..10311b7d6a 100644 --- a/swift/common/middleware/staticweb.py +++ b/swift/common/middleware/staticweb.py @@ -13,95 +13,209 @@ # See the License for the specific language governing permissions and # limitations under the License. +""" +This StaticWeb WSGI middleware will serve container data as a static web site +with index file and error file resolution and optional file listings. This mode +is normally only active for anonymous requests. If you want to use it with +authenticated requests, set the ``X-Web-Mode: true`` header on the request. + +The ``staticweb`` filter should be added to the pipeline in your +``/etc/swift/proxy-server.conf`` file just after any auth middleware. Also, the +configuration section for the ``staticweb`` middleware itself needs to be +added. For example:: + + [DEFAULT] + ... + + [pipeline:main] + pipeline = healthcheck cache swauth staticweb proxy-server + + ... + + [filter:staticweb] + user = egg:swift#staticweb + # Seconds to cache container x-container-meta-index, + # x-container-meta-error, and x-container-listing-css header values. + # cache_timeout = 300 + +Any publicly readable containers (for example, ``X-Container-Read: .r:*``, see +`acls`_ for more information on this) will be checked for +X-Container-Meta-Index and X-Container-Meta-Error header values:: + + X-Container-Meta-Index + X-Container-Meta-Error + +If X-Container-Meta-Index is set, any files will be served without +having to specify the part. For instance, setting +``X-Container-Meta-Index: index.html`` will be able to serve the object +.../pseudo/path/index.html with just .../pseudo/path or .../pseudo/path/ + +If X-Container-Meta-Error is set, any errors (currently just 401 Unauthorized +and 404 Not Found) will instead serve the .../ +object. For instance, setting ``X-Container-Meta-Error: error.html`` will serve +.../404error.html for requests for paths not found. + +For psuedo paths that have no , this middleware will serve HTML +file listings by default. If you don't want to serve such listings, you can +turn this off via the `acls`_ X-Container-Read setting of ``.rnolisting``. For +example, instead of ``X-Container-Read: .r:*`` you would use +``X-Container-Read: .r:*,.rnolisting`` + +If listings are enabled, the listings can have a custom style sheet by setting +the X-Container-Meta-Listing-CSS header. For instance, setting +``X-Container-Meta-Listing-CSS: listing.css`` will make listings link to the +.../listing.css style sheet. If you "view source" in your browser on a listing +page, you will see the well defined document structure that can be styled. + +Example usage of this middleware via ``st``: + + Make the container publicly readable:: + + st post -r '.r:*' container + + You should be able to get objects and do direct container listings now, + though they'll be in the REST API format. + + Set an index file directive:: + + st post -m 'index:index.html' container + + You should be able to hit paths that have an index.html without needing to + type the index.html part and listings will now be HTML. + + Turn off listings:: + + st post -r '.r:*,.rnolisting' container + + Set an error file:: + + st post -m 'error:error.html' container + + Now 401's should load 401error.html, 404's should load 404error.html, etc. + + Turn listings back on:: + + st post -r '.r:*' container + + Enable a custom listing style sheet:: + + st post -m 'listing-css:listing.css' container +""" + + try: import simplejson as json except ImportError: import json import cgi +import os import urllib from webob import Response, Request from webob.exc import HTTPMovedPermanently, HTTPNotFound, HTTPUnauthorized -from swift.common.utils import split_path, TRUE_VALUES +from swift.common.utils import cache_from_env, get_logger, split_path, \ + TRUE_VALUES -# To use: -# Put the staticweb filter just after the auth filter. -# Make the container publicly readable: -# st post -r '.r:*' container -# You should be able to get objects and do direct container listings -# now, though they'll be in the REST API format. -# Set an index file directive: -# st post -m 'index:index.html' container -# You should be able to hit paths that have an index.html without -# needing to type the index.html part and listings will now be HTML. -# Turn off listings: -# st post -r '.r:*,.rnolisting' container -# Set an error file: -# st post -m 'error:error.html' container -# Now 401's should load s 401error.html, 404's should load -# 404error.html, etc. -# -# This mode is normally only active for anonymous requests. If you -# want to use it with authenticated requests, set the X-Web-Mode: -# true header. -# -# TODO: Make new headers instead of using user metadata. -# TODO: Tests. -# TODO: Docs. -# TODO: get_container_info can be memcached. -# TODO: Blueprint. - class StaticWeb(object): + """ + The Static Web WSGI middleware filter; serves container data as a static + web site. See `staticweb`_ for an overview. + + :param app: The next WSGI application/filter in the paste.deploy pipeline. + :param conf: The filter configuration dict. + """ def __init__(self, app, conf): + #: The next WSGI application/filter in the paste.deploy pipeline. self.app = app + #: The filter configuration dict. self.conf = conf + #: The logger to use with this filter. + self.logger = get_logger(conf, log_route='staticweb') + #: The seconds to cache the x-container-meta-index, + #: x-container-meta-error, and x-container-listing-css headers for a + #: container. + self.cache_timeout = int(conf.get('cache_timeout', 300)) + # Results from the last call to self._start_response. + self._response_status = None + self._response_headers = None + self._response_exc_info = None + # Results from the last call to self._get_container_info. + self._index = self._error = self._listing_css = None - def start_response(self, status, headers, exc_info=None): - self.response_status = status - self.response_headers = headers - self.response_exc_info = exc_info + def _start_response(self, status, headers, exc_info=None): + """ + Saves response info without sending it to the remote client. + Uses the same semantics as the usual WSGI start_response. + """ + self._response_status = status + self._response_headers = headers + self._response_exc_info = exc_info - def error_response(self, response, env, start_response): - if not self.error: - start_response(self.response_status, self.response_headers, - self.response_exc_info) + def _error_response(self, response, env, start_response): + """ + Sends the error response to the remote client, possibly resolving a + custom error response body based on x-container-meta-error. + + :param response: The error response we should default to sending. + :param env: The original request WSGI environment. + :param start_response: The WSGI start_response hook. + """ + if not self._error: + start_response(self._response_status, self._response_headers, + self._response_exc_info) return response - save_response_status = self.response_status - save_response_headers = self.response_headers - save_response_exc_info = self.response_exc_info + save_response_status = self._response_status + save_response_headers = self._response_headers + save_response_exc_info = self._response_exc_info tmp_env = dict(env) - self.strip_ifs(tmp_env) + self._strip_ifs(tmp_env) tmp_env['PATH_INFO'] = '/%s/%s/%s/%s%s' % (self.version, self.account, - self.container, self.get_status_int(), self.error) + self.container, self._get_status_int(), self._error) tmp_env['REQUEST_METHOD'] = 'GET' - resp = self.app(tmp_env, self.start_response) - if self.get_status_int() // 100 == 2: - start_response(self.response_status, self.response_headers, - self.response_exc_info) + resp = self.app(tmp_env, self._start_response) + if self._get_status_int() // 100 == 2: + start_response(save_response_status, self._response_headers, + self._response_exc_info) return resp start_response(save_response_status, save_response_headers, save_response_exc_info) return response - def get_status_int(self): - return int(self.response_status.split(' ', 1)[0]) + def _get_status_int(self): + """ + Returns the HTTP status int from the last called self._start_response + result. + """ + return int(self._response_status.split(' ', 1)[0]) - def get_header(self, headers, name, default_value=None): - for header, value in headers: - if header.lower() == name: - return value - return default_value - - def strip_ifs(self, env): + def _strip_ifs(self, env): + """ Strips any HTTP_IF_* keys from the env dict. """ for key in [k for k in env.keys() if k.startswith('HTTP_IF_')]: del env[key] - def get_container_info(self, env, start_response): - self.index = self.error = None + def _get_container_info(self, env, start_response): + """ + Retrieves x-container-meta-index, x-container-meta-error, and + x-container-meta-listing-css from memcache or from the cluster and + stores the result in memcache and in self._index, self._error, and + self._listing_css. + + :param env: The WSGI environment dict. + :param start_response: The WSGI start_response hook. + """ + self._index = self._error = self._listing_css = None + memcache_client = cache_from_env(env) + if memcache_client: + memcache_key = '/staticweb/%s/%s/%s' % (self.version, self.account, + self.container) + cached_data = memcache_client.get(memcache_key) + if cached_data: + self._index, self._error, self._listing_css = cached_data + return tmp_env = {'REQUEST_METHOD': 'HEAD', 'HTTP_USER_AGENT': 'StaticWeb'} for name in ('swift.cache', 'HTTP_X_CF_TRANS_ID'): if name in env: @@ -110,127 +224,213 @@ class StaticWeb(object): self.container), environ=tmp_env) resp = req.get_response(self.app) if resp.status_int // 100 == 2: - self.index = resp.headers.get('x-container-meta-index', '').strip() - self.error = resp.headers.get('x-container-meta-error', '').strip() + self._index = \ + resp.headers.get('x-container-meta-index', '').strip() + self._listing_css = \ + resp.headers.get('x-container-meta-listing-css', '').strip() + self._error = \ + resp.headers.get('x-container-meta-error', '').strip() + if memcache_client: + memcache_client.set(memcache_key, + (self._index, self._error, self._listing_css), + timeout=self.cache_timeout) - def listing(self, env, start_response, prefix=None): + def _listing(self, env, start_response, prefix=None): + """ + Sends an HTML object listing to the remote client. + + :param env: The original WSGI environment dict. + :param start_response: The original WSGI start_response hook. + :param prefix: Any prefix desired for the container listing. + """ tmp_env = dict(env) - self.strip_ifs(tmp_env) + self._strip_ifs(tmp_env) tmp_env['REQUEST_METHOD'] = 'GET' tmp_env['PATH_INFO'] = \ '/%s/%s/%s' % (self.version, self.account, self.container) tmp_env['QUERY_STRING'] = 'delimiter=/&format=json' if prefix: tmp_env['QUERY_STRING'] += '&prefix=%s' % urllib.quote(prefix) - resp = self.app(tmp_env, self.start_response) - if self.get_status_int() // 100 != 2: - return self.error_response(resp, env, start_response) + resp = self.app(tmp_env, self._start_response) + if self._get_status_int() // 100 != 2: + return self._error_response(resp, env, start_response) listing = json.loads(''.join(resp)) if not listing: - resp = HTTPNotFound()(env, self.start_response) - return self.error_response(resp, env, start_response) + resp = HTTPNotFound()(env, self._start_response) + return self._error_response(resp, env, start_response) headers = {'Content-Type': 'text/html'} - body = 'Listing of%s' \ - '

Listing of %s

\n' % \ - (cgi.escape(env['PATH_INFO']), cgi.escape(env['PATH_INFO'])) + body = '\n' \ + '\n' \ + ' \n' \ + ' Listing of %s\n' % \ + cgi.escape(env['PATH_INFO']) + if self._listing_css: + body += ' \n' % \ + (self.version, self.account, self.container, + urllib.quote(self._listing_css)) + else: + body += ' \n' + body += ' \n' \ + ' \n' \ + '

Listing of %s

\n' \ + ' \n' \ + ' \n' \ + ' \n' \ + ' \n' \ + ' \n' \ + ' \n' % \ + cgi.escape(env['PATH_INFO']) if prefix: - body += '../
' + body += ' \n' \ + ' \n' \ + ' \n' \ + ' \n' \ + ' \n' for item in listing: if 'subdir' in item: subdir = item['subdir'] if prefix: subdir = subdir[len(prefix):] - body += '%s
' % \ + body += ' \n' \ + ' \n' \ + ' \n' \ + ' \n' \ + ' \n' % \ (urllib.quote(subdir), cgi.escape(subdir)) for item in listing: if 'name' in item: name = item['name'] if prefix: name = name[len(prefix):] - body += '%s
' % \ - (urllib.quote(name), cgi.escape(name)) - body += '

\n' + body += ' \n' \ + ' \n' \ + ' \n' \ + ' \n' \ + ' \n' % \ + (' '.join('type-' + cgi.escape(t.lower(), quote=True) + for t in item['content_type'].split('/')), + urllib.quote(name), cgi.escape(name), + self.human_readable(item['bytes']), + cgi.escape(item['last_modified']).split('.')[0]. + replace('T', ' ')) + body += '
NameSizeDate
../  
%s  
%s%s%s
\n' \ + ' \n' \ + '\n' return Response(headers=headers, body=body)(env, start_response) - def handle_container(self, env, start_response): - self.get_container_info(env, start_response) - if not self.index: + def _handle_container(self, env, start_response): + """ + Handles a possible static web request for a container. + + :param env: The original WSGI environment dict. + :param start_response: The original WSGI start_response hook. + """ + self._get_container_info(env, start_response) + if not self._index: return self.app(env, start_response) if env['PATH_INFO'][-1] != '/': return HTTPMovedPermanently( - location=env['PATH_INFO'] + '/')(env, start_response) + location=(env['PATH_INFO'] + '/'))(env, start_response) tmp_env = dict(env) - tmp_env['PATH_INFO'] += self.index - resp = self.app(tmp_env, self.start_response) - status_int = self.get_status_int() + tmp_env['PATH_INFO'] += self._index + resp = self.app(tmp_env, self._start_response) + status_int = self._get_status_int() if status_int == 404: - return self.listing(env, start_response) - elif self.get_status_int() // 100 not in (2, 3): - return self.error_response(resp, env, start_response) - start_response(self.response_status, self.response_headers, - self.response_exc_info) + return self._listing(env, start_response) + elif self._get_status_int() // 100 not in (2, 3): + return self._error_response(resp, env, start_response) + start_response(self._response_status, self._response_headers, + self._response_exc_info) return resp - def handle_object(self, env, start_response): + def _handle_object(self, env, start_response): + """ + Handles a possible static web request for an object. This object could + resolve into an index or listing request. + + :param env: The original WSGI environment dict. + :param start_response: The original WSGI start_response hook. + """ tmp_env = dict(env) - resp = self.app(tmp_env, self.start_response) - status_int = self.get_status_int() + resp = self.app(tmp_env, self._start_response) + status_int = self._get_status_int() if status_int // 100 in (2, 3): - start_response(self.response_status, self.response_headers, - self.response_exc_info) + start_response(self._response_status, self._response_headers, + self._response_exc_info) return resp if status_int != 404: - return self.error_response(resp, env, start_response) - self.get_container_info(env, start_response) - if not self.index: + return self._error_response(resp, env, start_response) + self._get_container_info(env, start_response) + if not self._index: return self.app(env, start_response) tmp_env = dict(env) if tmp_env['PATH_INFO'][-1] != '/': tmp_env['PATH_INFO'] += '/' - tmp_env['PATH_INFO'] += self.index - resp = self.app(tmp_env, self.start_response) - status_int = self.get_status_int() + tmp_env['PATH_INFO'] += self._index + resp = self.app(tmp_env, self._start_response) + status_int = self._get_status_int() if status_int // 100 in (2, 3): if env['PATH_INFO'][-1] != '/': return HTTPMovedPermanently( location=env['PATH_INFO'] + '/')(env, start_response) - start_response(self.response_status, self.response_headers, - self.response_exc_info) + start_response(self._response_status, self._response_headers, + self._response_exc_info) return resp elif status_int == 404: if env['PATH_INFO'][-1] != '/': tmp_env = dict(env) - self.strip_ifs(tmp_env) + self._strip_ifs(tmp_env) tmp_env['REQUEST_METHOD'] = 'GET' tmp_env['PATH_INFO'] = '/%s/%s/%s' % (self.version, self.account, self.container) tmp_env['QUERY_STRING'] = 'limit=1&format=json&delimiter' \ '=/&limit=1&prefix=%s' % urllib.quote(self.obj + '/') - resp = self.app(tmp_env, self.start_response) - if self.get_status_int() // 100 != 2 or \ + resp = self.app(tmp_env, self._start_response) + if self._get_status_int() // 100 != 2 or \ not json.loads(''.join(resp)): - resp = HTTPNotFound()(env, self.start_response) - return self.error_response(resp, env, start_response) + resp = HTTPNotFound()(env, self._start_response) + return self._error_response(resp, env, start_response) return HTTPMovedPermanently(location=env['PATH_INFO'] + '/')(env, start_response) - return self.listing(env, start_response, self.obj) + return self._listing(env, start_response, self.obj) def __call__(self, env, start_response): + """ + Main hook into the WSGI paste.deploy filter/app pipeline. + + :param env: The WSGI environment dict. + :param start_response: The WSGI start_response hook. + """ + (self.version, self.account, self.container, self.obj) = \ + split_path(env['PATH_INFO'], 2, 4, True) + memcache_client = cache_from_env(env) + if memcache_client: + if env['REQUEST_METHOD'] in ('PUT', 'POST'): + if not self.obj and self.container: + memcache_key = '/staticweb/%s/%s/%s' % \ + (self.version, self.account, self.container) + memcache_client.delete(memcache_key) + return self.app(env, start_response) if env['REQUEST_METHOD'] not in ('HEAD', 'GET') or \ (env.get('REMOTE_USER') and not env.get('HTTP_X_WEB_MODE', '') in TRUE_VALUES): return self.app(env, start_response) - (self.version, self.account, self.container, self.obj) = \ - split_path(env['PATH_INFO'], 2, 4, True) if self.obj: - return self.handle_object(env, start_response) + return self._handle_object(env, start_response) elif self.container: - return self.handle_container(env, start_response) + return self._handle_container(env, start_response) return self.app(env, start_response) def filter_factory(global_conf, **local_conf): - """Returns a WSGI filter app for use with paste.deploy.""" + """ Returns a Static Web WSGI filter for use with paste.deploy. """ conf = global_conf.copy() conf.update(local_conf) diff --git a/swift/common/utils.py b/swift/common/utils.py index c867a55821..416a482817 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -17,6 +17,7 @@ import errno import fcntl +import math import os import pwd import signal @@ -969,3 +970,14 @@ def urlparse(url): :param url: URL to parse. """ return ModifiedParseResult(*stdlib_urlparse(url)) + + +def human_readable(self, n): + """ + Returns the number in a human readable format; for example 1000000 = "1m". + Idea from: http://stackoverflow.com/questions/3154460/ + """ + millnames = ['', 'k', 'm', 'g', 't', 'p', 'e'] + millidx = max(0, min(len(millnames) - 1, + int(math.floor(math.log10(abs(n)) / 3.0)))) + return '%.0f%s' % (n / 10 ** (3 * millidx), millnames[millidx])