From a4cc353375264e4bcd8f9aef2f0701ae800e0e73 Mon Sep 17 00:00:00 2001 From: Gilles Biannic Date: Thu, 1 Mar 2018 11:31:12 +0100 Subject: [PATCH] Make log format for requests configurable Add the log_msg_template option in proxy-server.conf and log_format in a/c/o-server.conf. It is a string parsable by Python's format() function. Some fields containing user data might be anonymized by using log_anonymization_method and log_anonymization_salt. Change-Id: I29e30ef45fe3f8a026e7897127ffae08a6a80cd9 --- doc/manpages/proxy-server.conf.5 | 46 +++++ doc/source/logs.rst | 58 +++++-- etc/account-server.conf-sample | 11 ++ etc/container-server.conf-sample | 11 ++ etc/object-server.conf-sample | 11 ++ etc/proxy-server.conf-sample | 11 ++ swift/account/server.py | 4 +- swift/common/base_storage_server.py | 6 +- swift/common/middleware/proxy_logging.py | 149 ++++++++++++---- swift/common/swob.py | 19 +-- swift/common/utils.py | 160 +++++++++++++++++- swift/container/server.py | 5 +- swift/obj/server.py | 4 +- test/unit/account/test_server.py | 14 +- .../common/middleware/test_proxy_logging.py | 42 +++++ test/unit/common/test_swob.py | 5 - test/unit/common/test_utils.py | 103 ++++++++++- test/unit/container/test_server.py | 11 +- test/unit/obj/test_server.py | 37 ++-- 19 files changed, 582 insertions(+), 125 deletions(-) diff --git a/doc/manpages/proxy-server.conf.5 b/doc/manpages/proxy-server.conf.5 index e8007919a7..e9e9a9d4dc 100644 --- a/doc/manpages/proxy-server.conf.5 +++ b/doc/manpages/proxy-server.conf.5 @@ -730,6 +730,52 @@ Note: reveal_sensitive_prefix will not affect the value logged with access_log_h What HTTP methods are allowed for StatsD logging (comma-sep); request methods not in this list will have "BAD_METHOD" for the portion of the metric. Default is "GET,HEAD,POST,PUT,DELETE,COPY,OPTIONS". +.IP \fBlog_anonymization_method\fR +Hashing algorithm for anonymization. Must be one of algorithms supported by Python's hashlib. Default is MD5. +.IP \fBlog_anonymization_salt\fR +Salt added as prefix before hashing the value to anonymize. Default is empty (no salt). +.IP "\fBlog_msg_template\fR" +Template used to format access logs. All words surrounded by curly brackets will be substituted with the appropriate values. + +.RE +.PD 0 +.RS 10 +.IP "Some keywords map to timestamps and can be converted to standard dates formats using the matching transformers: 'datetime', 'asctime' or 'iso8601'." +.IP "Other transformers for timestamps are 's', 'ms', 'us' and 'ns' for seconds, milliseconds, microseconds and nanoseconds." +.IP "Python's strftime directives can also be used as tranformers (a, A, b, B, c, d, H, I, j, m, M, p, S, U, w, W, x, X, y, Y, Z)." +.IP "Some keywords map to user data that could be anonymized by using the transformer 'anonymized'." +.IP "Keywords availables are:" +.PD 0 +.RS 7 +.IP "client_ip (anonymizable)" +.IP "remote_addr (anonymizable)" +.IP "method (request method)" +.IP "path (anonymizable)" +.IP "protocol" +.IP "status_int" +.IP "referer (anonymizable)" +.IP "user_agent (anonymizable)" +.IP "auth_token" +.IP "bytes_recvd (number of bytes received)" +.IP "bytes_sent (number of bytes sent)" +.IP "client_etag (anonymizable)" +.IP "transaction_id" +.IP "headers (anonymizable)" +.IP "request_time (difference between start and end timestamps) +.IP "source" +.IP "log_info" +.IP "start_time (timestamp at the receiving, timestamp)" +.IP "end_time (timestamp at the end of the treatment, timestamp)" +.IP "policy_index" +.IP "account (account name, anonymizable)" +.IP "container (container name, anonymizable)" +.IP "object (object name, anonymizable)" +.PD +.RE + +.IP "Example: '{client_ip.anonymized} {remote_addr.anonymized} {start_time.iso8601} {end_time.H}:{end_time.M} {method} acc:{account} cnt:{container} obj:{object.anonymized}'" +.IP "Default: '{client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol} {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} {source} {log_info} {start_time} {end_time} {policy_index}'" +.IP "Warning: A bad log message template will raise an error in initialization." .RE .PD diff --git a/doc/source/logs.rst b/doc/source/logs.rst index 9cf9c569f6..54fc308525 100644 --- a/doc/source/logs.rst +++ b/doc/source/logs.rst @@ -26,47 +26,71 @@ Proxy Logs The proxy logs contain the record of all external API requests made to the proxy server. Swift's proxy servers log requests using a custom format -designed to provide robust information and simple processing. The log format -is:: +designed to provide robust information and simple processing. It is possible +to change this format with the ``log_msg_template`` config parameter. +The default log format is:: - client_ip remote_addr datetime request_method request_path protocol - status_int referer user_agent auth_token bytes_recvd bytes_sent - client_etag transaction_id headers request_time source log_info - request_start_time request_end_time policy_index + {client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol} + {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} + {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} + {source} {log_info} {start_time} {end_time} {policy_index} + +Some keywords, signaled by the (anonymizable) flag, can be anonymized by +using the transformer 'anonymized'. The data are applied the hashing method of +`log_anonymization_method` and an optional salt `log_anonymization_salt`. + +Some keywords, signaled by the (timestamp) flag, can be converted to standard +dates formats using the matching transformers: 'datetime', 'asctime' or +'iso8601'. Other transformers for timestamps are 's', 'ms', 'us' and 'ns' for +seconds, milliseconds, microseconds and nanoseconds. Python's strftime +directives can also be used as tranformers (a, A, b, B, c, d, H, I, j, m, M, p, +S, U, w, W, x, X, y, Y, Z). + +Example {client_ip.anonymized} {remote_addr.anonymized} {start_time.iso8601} + {end_time.H}:{end_time.M} {method} acc:{account} cnt:{container} + obj:{object.anonymized} =================== ========================================================== **Log Field** **Value** ------------------- ---------------------------------------------------------- client_ip Swift's guess at the end-client IP, taken from various - headers in the request. + headers in the request. (anonymizable) remote_addr The IP address of the other end of the TCP connection. -datetime Timestamp of the request, in - day/month/year/hour/minute/second format. -request_method The HTTP verb in the request. -request_path The path portion of the request. + (anonymizable) +end_time Timestamp of the request. (timestamp) +method The HTTP verb in the request. +path The path portion of the request. (anonymizable) protocol The transport protocol used (currently one of http or https). status_int The response code for the request. -referer The value of the HTTP Referer header. -user_agent The value of the HTTP User-Agent header. +referer The value of the HTTP Referer header. (anonymizable) +user_agent The value of the HTTP User-Agent header. (anonymizable) auth_token The value of the auth token. This may be truncated or otherwise obscured. bytes_recvd The number of bytes read from the client for this request. bytes_sent The number of bytes sent to the client in the body of the response. This is how many bytes were yielded to the WSGI server. -client_etag The etag header value given by the client. +client_etag The etag header value given by the client. (anonymizable) transaction_id The transaction id of the request. -headers The headers given in the request. +headers The headers given in the request. (anonymizable) request_time The duration of the request. source The "source" of the request. This may be set for requests that are generated in order to fulfill client requests, e.g. bulk uploads. log_info Various info that may be useful for diagnostics, e.g. the value of any x-delete-at header. -request_start_time High-resolution timestamp from the start of the request. -request_end_time High-resolution timestamp from the end of the request. +start_time High-resolution timestamp from the start of the request. + (timestamp) +end_time High-resolution timestamp from the end of the request. + (timestamp) policy_index The value of the storage policy index. +account The account part extracted from the path of the request. + (anonymizable) +container The container part extracted from the path of the request. + (anonymizable) +object The object part extracted from the path of the request. + (anonymizable) =================== ========================================================== In one log line, all of the above fields are space-separated and url-encoded. diff --git a/etc/account-server.conf-sample b/etc/account-server.conf-sample index 0a3856256b..589f00aaa5 100644 --- a/etc/account-server.conf-sample +++ b/etc/account-server.conf-sample @@ -26,6 +26,17 @@ bind_port = 6202 # set to 0, the default. # log_max_line_length = 0 # +# Hashing algorithm for log anonymization. Must be one of algorithms supported +# by Python's hashlib. +# log_anonymization_method = MD5 +# +# Salt added during log anonymization +# log_anonymization_salt = +# +# Template used to format logs. All words surrounded by curly brackets +# will be substituted with the appropriate values +# log_format = {remote_addr} - - [{time.d}/{time.b}/{time.Y}:{time.H}:{time.M}:{time.S} +0000] "{method} {path}" {status} {content_length} "{referer}" "{txn_id}" "{user_agent}" {trans_time:.4f} "{additional_info}" {pid} {policy_index} +# # comma separated list of functions to call to setup custom log handlers. # functions get passed: conf, name, log_to_console, log_route, fmt, logger, # adapted_logger diff --git a/etc/container-server.conf-sample b/etc/container-server.conf-sample index 5458793a5a..42d9421976 100644 --- a/etc/container-server.conf-sample +++ b/etc/container-server.conf-sample @@ -32,6 +32,17 @@ bind_port = 6201 # set to 0, the default. # log_max_line_length = 0 # +# Hashing algorithm for log anonymization. Must be one of algorithms supported +# by Python's hashlib. +# log_anonymization_method = MD5 +# +# Salt added during log anonymization +# log_anonymization_salt = +# +# Template used to format logs. All words surrounded by curly brackets +# will be substituted with the appropriate values +# log_format = {remote_addr} - - [{time.d}/{time.b}/{time.Y}:{time.H}:{time.M}:{time.S} +0000] "{method} {path}" {status} {content_length} "{referer}" "{txn_id}" "{user_agent}" {trans_time:.4f} "{additional_info}" {pid} {policy_index} +# # comma separated list of functions to call to setup custom log handlers. # functions get passed: conf, name, log_to_console, log_route, fmt, logger, # adapted_logger diff --git a/etc/object-server.conf-sample b/etc/object-server.conf-sample index 2d060fbd9c..c20a33d42c 100644 --- a/etc/object-server.conf-sample +++ b/etc/object-server.conf-sample @@ -34,6 +34,17 @@ bind_port = 6200 # set to 0, the default. # log_max_line_length = 0 # +# Hashing algorithm for log anonymization. Must be one of algorithms supported +# by Python's hashlib. +# log_anonymization_method = MD5 +# +# Salt added during log anonymization +# log_anonymization_salt = +# +# Template used to format logs. All words surrounded by curly brackets +# will be substituted with the appropriate values +# log_format = {remote_addr} - - [{time.d}/{time.b}/{time.Y}:{time.H}:{time.M}:{time.S} +0000] "{method} {path}" {status} {content_length} "{referer}" "{txn_id}" "{user_agent}" {trans_time:.4f} "{additional_info}" {pid} {policy_index} +# # comma separated list of functions to call to setup custom log handlers. # functions get passed: conf, name, log_to_console, log_route, fmt, logger, # adapted_logger diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index 4a1b643b85..5f9712e00b 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -882,6 +882,17 @@ use = egg:swift#proxy_logging # middleware and never made it through to the right-most middleware (and # proxy server). Double logging is prevented for normal requests. See # proxy-logging docs. +# +# Hashing algorithm for log anonymization. Must be one of algorithms supported +# by Python's hashlib. +# log_anonymization_method = MD5 +# +# Salt added during log anonymization +# log_anonymization_salt = +# +# Template used to format access logs. All words surrounded by curly brackets +# will be substituted with the appropriate values +# log_msg_template = {client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol} {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} {source} {log_info} {start_time} {end_time} {policy_index} # Note: Put before both ratelimit and auth in the pipeline. [filter:bulk] diff --git a/swift/account/server.py b/swift/account/server.py index 3770a5d494..a9c20c5792 100644 --- a/swift/account/server.py +++ b/swift/account/server.py @@ -321,7 +321,9 @@ class AccountController(BaseStorageServer): if res.headers.get('x-container-timestamp') is not None: additional_info += 'x-container-timestamp: %s' % \ res.headers['x-container-timestamp'] - log_msg = get_log_line(req, res, trans_time, additional_info) + log_msg = get_log_line(req, res, trans_time, additional_info, + self.log_format, self.anonymization_method, + self.anonymization_salt) if req.method.upper() == 'REPLICATE': self.logger.debug(log_msg) else: diff --git a/swift/common/base_storage_server.py b/swift/common/base_storage_server.py index f7b4be8ea0..e3d1d4a889 100644 --- a/swift/common/base_storage_server.py +++ b/swift/common/base_storage_server.py @@ -15,7 +15,8 @@ import inspect from swift import __version__ as swift_version -from swift.common.utils import public, timing_stats, config_true_value +from swift.common.utils import public, timing_stats, config_true_value, \ + LOG_LINE_DEFAULT_FORMAT from swift.common.swob import Response @@ -30,6 +31,9 @@ class BaseStorageServer(object): if replication_server is not None: replication_server = config_true_value(replication_server) self.replication_server = replication_server + self.log_format = conf.get('log_format', LOG_LINE_DEFAULT_FORMAT) + self.anonymization_method = conf.get('log_anonymization_method', 'md5') + self.anonymization_salt = conf.get('log_anonymization_salt', '') @property def server_type(self): diff --git a/swift/common/middleware/proxy_logging.py b/swift/common/middleware/proxy_logging.py index 61c3b956d0..7ade69a299 100644 --- a/swift/common/middleware/proxy_logging.py +++ b/swift/common/middleware/proxy_logging.py @@ -21,10 +21,10 @@ to plug in your own logging format/method. The logging format implemented below is as follows: -client_ip remote_addr datetime request_method request_path protocol +client_ip remote_addr end_time.datetime method path protocol status_int referer user_agent auth_token bytes_recvd bytes_sent client_etag transaction_id headers request_time source log_info - request_start_time request_end_time + start_time end_time policy_index These values are space-separated, and each is url-encoded, so that they can be separated with a simple .split() @@ -73,16 +73,15 @@ bandwidth usage will want to only sum up logs with no swift.source. import time -from six.moves.urllib.parse import quote from swift.common.swob import Request from swift.common.utils import (get_logger, get_remote_client, config_true_value, - InputProxy, list_from_csv, get_policy_index) + InputProxy, list_from_csv, get_policy_index, + split_path, StrAnonymizer, StrFormatTime, + LogStringFormatter) from swift.common.storage_policy import POLICIES -QUOTE_SAFE = '/:' - class ProxyLoggingMiddleware(object): """ @@ -91,6 +90,18 @@ class ProxyLoggingMiddleware(object): def __init__(self, app, conf, logger=None): self.app = app + self.log_formatter = LogStringFormatter(default='-', quote=True) + self.log_msg_template = conf.get( + 'log_msg_template', ( + '{client_ip} {remote_addr} {end_time.datetime} {method} ' + '{path} {protocol} {status_int} {referer} {user_agent} ' + '{auth_token} {bytes_recvd} {bytes_sent} {client_etag} ' + '{transaction_id} {headers} {request_time} {source} ' + '{log_info} {start_time} {end_time} {policy_index}')) + # The salt is only used in StrAnonymizer. This class requires bytes, + # convert it now to prevent useless convertion later. + self.anonymization_method = conf.get('log_anonymization_method', 'md5') + self.anonymization_salt = conf.get('log_anonymization_salt', '') self.log_hdrs = config_true_value(conf.get( 'access_log_headers', conf.get('log_headers', 'no'))) @@ -121,6 +132,51 @@ class ProxyLoggingMiddleware(object): self.access_logger.set_statsd_prefix('proxy-server') self.reveal_sensitive_prefix = int( conf.get('reveal_sensitive_prefix', 16)) + self.check_log_msg_template_validity() + + def check_log_msg_template_validity(self): + replacements = { + # Time information + 'end_time': StrFormatTime(1000001), + 'start_time': StrFormatTime(1000000), + # Information worth to anonymize + 'client_ip': StrAnonymizer('1.2.3.4', self.anonymization_method, + self.anonymization_salt), + 'remote_addr': StrAnonymizer('4.3.2.1', self.anonymization_method, + self.anonymization_salt), + 'path': StrAnonymizer('/', self.anonymization_method, + self.anonymization_salt), + 'referer': StrAnonymizer('ref', self.anonymization_method, + self.anonymization_salt), + 'user_agent': StrAnonymizer('swift', self.anonymization_method, + self.anonymization_salt), + 'headers': StrAnonymizer('header', self.anonymization_method, + self.anonymization_salt), + 'client_etag': StrAnonymizer('etag', self.anonymization_method, + self.anonymization_salt), + 'account': StrAnonymizer('a', self.anonymization_method, + self.anonymization_salt), + 'container': StrAnonymizer('c', self.anonymization_method, + self.anonymization_salt), + 'object': StrAnonymizer('', self.anonymization_method, + self.anonymization_salt), + # Others information + 'method': 'GET', + 'protocol': '', + 'status_int': '0', + 'auth_token': '1234...', + 'bytes_recvd': '1', + 'bytes_sent': '0', + 'transaction_id': 'tx1234', + 'request_time': '0.05', + 'source': '', + 'log_info': '', + 'policy_index': '' + } + try: + self.log_formatter.format(self.log_msg_template, **replacements) + except Exception as e: + raise ValueError('Cannot interpolate log_msg_template: %s' % e) def method_from_req(self, req): return req.environ.get('swift.orig_req_method', req.method) @@ -161,37 +217,62 @@ class ProxyLoggingMiddleware(object): for k, v in req.headers.items()) method = self.method_from_req(req) - end_gmtime_str = time.strftime('%d/%b/%Y/%H/%M/%S', - time.gmtime(end_time)) duration_time_str = "%.4f" % (end_time - start_time) - start_time_str = "%.9f" % start_time - end_time_str = "%.9f" % end_time policy_index = get_policy_index(req.headers, resp_headers) - self.access_logger.info(' '.join( - quote(str(x) if x else '-', QUOTE_SAFE) - for x in ( - get_remote_client(req), - req.remote_addr, - end_gmtime_str, - method, - req.path_qs, + + acc, cont, obj = None, None, None + if req.path.startswith('/v1/'): + _, acc, cont, obj = split_path(req.path, 1, 4, True) + + replacements = { + # Time information + 'end_time': StrFormatTime(end_time), + 'start_time': StrFormatTime(start_time), + # Information worth to anonymize + 'client_ip': StrAnonymizer(get_remote_client(req), + self.anonymization_method, + self.anonymization_salt), + 'remote_addr': StrAnonymizer(req.remote_addr, + self.anonymization_method, + self.anonymization_salt), + 'path': StrAnonymizer(req.path_qs, self.anonymization_method, + self.anonymization_salt), + 'referer': StrAnonymizer(req.referer, self.anonymization_method, + self.anonymization_salt), + 'user_agent': StrAnonymizer(req.user_agent, + self.anonymization_method, + self.anonymization_salt), + 'headers': StrAnonymizer(logged_headers, self.anonymization_method, + self.anonymization_salt), + 'client_etag': StrAnonymizer(req.headers.get('etag'), + self.anonymization_method, + self.anonymization_salt), + 'account': StrAnonymizer(acc, self.anonymization_method, + self.anonymization_salt), + 'container': StrAnonymizer(cont, self.anonymization_method, + self.anonymization_salt), + 'object': StrAnonymizer(obj, self.anonymization_method, + self.anonymization_salt), + # Others information + 'method': method, + 'protocol': req.environ.get('SERVER_PROTOCOL'), - status_int, - req.referer, - req.user_agent, - self.obscure_sensitive(req.headers.get('x-auth-token')), - bytes_received, - bytes_sent, - req.headers.get('etag', None), - req.environ.get('swift.trans_id'), - logged_headers, - duration_time_str, - req.environ.get('swift.source'), - ','.join(req.environ.get('swift.log_info') or ''), - start_time_str, - end_time_str, - policy_index - ))) + 'status_int': status_int, + 'auth_token': + self.obscure_sensitive( + req.headers.get('x-auth-token')), + 'bytes_recvd': bytes_received, + 'bytes_sent': bytes_sent, + 'transaction_id': req.environ.get('swift.trans_id'), + 'request_time': duration_time_str, + 'source': req.environ.get('swift.source'), + 'log_info': + ','.join(req.environ.get('swift.log_info', '')), + 'policy_index': policy_index, + } + self.access_logger.info( + self.log_formatter.format(self.log_msg_template, + **replacements)) # Log timing and bytes-transferred data to StatsD metric_name = self.statsd_metric_name(req, status_int, method) diff --git a/swift/common/swob.py b/swift/common/swob.py index 71b97ff019..9a157241df 100644 --- a/swift/common/swob.py +++ b/swift/common/swob.py @@ -38,7 +38,7 @@ needs to change. from collections import defaultdict, MutableMapping import time from functools import partial -from datetime import datetime, timedelta, tzinfo +from datetime import datetime from email.utils import parsedate import re import random @@ -51,7 +51,7 @@ from six import StringIO from six.moves import urllib from swift.common.header_key_dict import HeaderKeyDict -from swift.common.utils import reiterate, split_path, Timestamp, pairs, \ +from swift.common.utils import UTC, reiterate, split_path, Timestamp, pairs, \ close_if_possible, closing_if_possible from swift.common.exceptions import InvalidTimestamp @@ -118,21 +118,6 @@ MAX_NONASCENDING_RANGES = 8 MAX_RANGES = 50 -class _UTC(tzinfo): - """ - A tzinfo class for datetime objects that returns a 0 timedelta (UTC time) - """ - def dst(self, dt): - return timedelta(0) - utcoffset = dst - - def tzname(self, dt): - return 'UTC' - - -UTC = _UTC() - - class WsgiBytesIO(BytesIO): """ This class adds support for the additional wsgi.input methods defined on diff --git a/swift/common/utils.py b/swift/common/utils.py index 9ce71fa0b7..e502356e79 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -187,6 +187,13 @@ IPV6_RE = re.compile("^\[(?P
.*)\](:(?P[0-9]+))?$") MD5_OF_EMPTY_STRING = 'd41d8cd98f00b204e9800998ecf8427e' +LOG_LINE_DEFAULT_FORMAT = '{remote_addr} - - [{time.d}/{time.b}/{time.Y}' \ + ':{time.H}:{time.M}:{time.S} +0000] ' \ + '"{method} {path}" {status} {content_length} ' \ + '"{referer}" "{txn_id}" "{user_agent}" ' \ + '{trans_time:.4f} "{additional_info}" {pid} ' \ + '{policy_index}' + class InvalidHashPathConfigError(ValueError): @@ -603,7 +610,117 @@ def get_policy_index(req_headers, res_headers): return str(policy_index) if policy_index is not None else None -def get_log_line(req, res, trans_time, additional_info): +class _UTC(datetime.tzinfo): + """ + A tzinfo class for datetime objects that returns a 0 timedelta (UTC time) + """ + def dst(self, dt): + return datetime.timedelta(0) + utcoffset = dst + + def tzname(self, dt): + return 'UTC' + + +UTC = _UTC() + + +class LogStringFormatter(string.Formatter): + def __init__(self, default='', quote=False): + super(LogStringFormatter, self).__init__() + self.default = default + self.quote = quote + + def format_field(self, value, spec): + if not value: + return self.default + else: + log = super(LogStringFormatter, self).format_field(value, spec) + if self.quote: + return quote(log, ':/{}') + else: + return log + + +class StrAnonymizer(str): + """ + Class that permits to get a string anonymized or simply quoted. + """ + + def __new__(cls, data, method, salt): + method = method.lower() + if method not in hashlib.algorithms_guaranteed: + raise ValueError('Unsupported hashing method: %r' % method) + s = str.__new__(cls, data or '') + s.method = method + s.salt = salt + return s + + @property + def anonymized(self): + if not self: + return self + else: + h = getattr(hashlib, self.method)() + if self.salt: + h.update(six.b(self.salt)) + h.update(six.b(self)) + return '{%s%s}%s' % ('S' if self.salt else '', self.method.upper(), + h.hexdigest()) + + +class StrFormatTime(object): + """ + Class that permits to get formats or parts of a time. + """ + + def __init__(self, ts): + self.time = ts + self.time_struct = time.gmtime(ts) + + def __str__(self): + return "%.9f" % self.time + + def __getattr__(self, attr): + if attr not in ['a', 'A', 'b', 'B', 'c', 'd', 'H', + 'I', 'j', 'm', 'M', 'p', 'S', 'U', + 'w', 'W', 'x', 'X', 'y', 'Y', 'Z']: + raise ValueError(("The attribute %s is not a correct directive " + "for time.strftime formater.") % attr) + return datetime.datetime(*self.time_struct[:-2], + tzinfo=UTC).strftime('%' + attr) + + @property + def asctime(self): + return time.asctime(self.time_struct) + + @property + def datetime(self): + return time.strftime('%d/%b/%Y/%H/%M/%S', self.time_struct) + + @property + def iso8601(self): + return time.strftime('%Y-%m-%dT%H:%M:%S', self.time_struct) + + @property + def ms(self): + return self.__str__().split('.')[1][:3] + + @property + def us(self): + return self.__str__().split('.')[1][:6] + + @property + def ns(self): + return self.__str__().split('.')[1] + + @property + def s(self): + return self.__str__().split('.')[0] + + +def get_log_line(req, res, trans_time, additional_info, fmt, + anonymization_method, anonymization_salt): """ Make a line for logging that matches the documented log line format for backend servers. @@ -617,14 +734,39 @@ def get_log_line(req, res, trans_time, additional_info): """ policy_index = get_policy_index(req.headers, res.headers) - return '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %.4f "%s" %d %s' % ( - req.remote_addr, - time.strftime('%d/%b/%Y:%H:%M:%S +0000', time.gmtime()), - req.method, req.path, res.status.split()[0], - res.content_length or '-', req.referer or '-', - req.headers.get('x-trans-id', '-'), - req.user_agent or '-', trans_time, additional_info or '-', - os.getpid(), policy_index or '-') + if req.path.startswith('/'): + disk, partition, account, container, obj = split_path(req.path, 0, 5, + True) + else: + disk, partition, account, container, obj = (None, ) * 5 + replacements = { + 'remote_addr': StrAnonymizer(req.remote_addr, anonymization_method, + anonymization_salt), + 'time': StrFormatTime(time.time()), + 'method': req.method, + 'path': StrAnonymizer(req.path, anonymization_method, + anonymization_salt), + 'disk': disk, + 'partition': partition, + 'account': StrAnonymizer(account, anonymization_method, + anonymization_salt), + 'container': StrAnonymizer(container, anonymization_method, + anonymization_salt), + 'object': StrAnonymizer(obj, anonymization_method, + anonymization_salt), + 'status': res.status.split()[0], + 'content_length': res.content_length, + 'referer': StrAnonymizer(req.referer, anonymization_method, + anonymization_salt), + 'txn_id': req.headers.get('x-trans-id'), + 'user_agent': StrAnonymizer(req.user_agent, anonymization_method, + anonymization_salt), + 'trans_time': trans_time, + 'additional_info': additional_info, + 'pid': os.getpid(), + 'policy_index': policy_index, + } + return LogStringFormatter(default='-').format(fmt, **replacements) def get_trans_id_time(trans_id): diff --git a/swift/container/server.py b/swift/container/server.py index 8f3df213c7..8be07a0ccc 100644 --- a/swift/container/server.py +++ b/swift/container/server.py @@ -798,7 +798,10 @@ class ContainerController(BaseStorageServer): res = HTTPInternalServerError(body=traceback.format_exc()) if self.log_requests: trans_time = time.time() - start_time - log_message = get_log_line(req, res, trans_time, '') + log_message = get_log_line(req, res, trans_time, '', + self.log_format, + self.anonymization_method, + self.anonymization_salt) if req.method.upper() == 'REPLICATE': self.logger.debug(log_message) else: diff --git a/swift/obj/server.py b/swift/obj/server.py index 0bd80c6fd5..491b9f808a 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -1292,7 +1292,9 @@ class ObjectController(BaseStorageServer): trans_time = time.time() - start_time res.fix_conditional_response() if self.log_requests: - log_line = get_log_line(req, res, trans_time, '') + log_line = get_log_line(req, res, trans_time, '', self.log_format, + self.anonymization_method, + self.anonymization_salt) if req.method in ('REPLICATE', 'SSYNC') or \ 'X-Backend-Replication' in req.headers: self.logger.debug(log_line) diff --git a/test/unit/account/test_server.py b/test/unit/account/test_server.py index c34fec74bc..a5e4d2306d 100644 --- a/test/unit/account/test_server.py +++ b/test/unit/account/test_server.py @@ -20,7 +20,6 @@ import posix import unittest from tempfile import mkdtemp from shutil import rmtree -from time import gmtime from test.unit import FakeLogger import itertools import random @@ -2003,16 +2002,15 @@ class TestAccountController(unittest.TestCase): environ={'REQUEST_METHOD': 'HEAD', 'REMOTE_ADDR': '1.2.3.4'}) self.controller.logger = FakeLogger() with mock.patch( - 'time.gmtime', mock.MagicMock(side_effect=[gmtime(10001.0)])): + 'time.time', + mock.MagicMock(side_effect=[10000.0, 10001.0, 10002.0, + 10002.0])): with mock.patch( - 'time.time', - mock.MagicMock(side_effect=[10000.0, 10001.0, 10002.0])): - with mock.patch( - 'os.getpid', mock.MagicMock(return_value=1234)): - req.get_response(self.controller) + 'os.getpid', mock.MagicMock(return_value=1234)): + req.get_response(self.controller) self.assertEqual( self.controller.logger.log_dict['info'], - [(('1.2.3.4 - - [01/Jan/1970:02:46:41 +0000] "HEAD /sda1/p/a" 404 ' + [(('1.2.3.4 - - [01/Jan/1970:02:46:42 +0000] "HEAD /sda1/p/a" 404 ' '- "-" "-" "-" 2.0000 "-" 1234 -',), {})]) def test_policy_stats_with_legacy(self): diff --git a/test/unit/common/middleware/test_proxy_logging.py b/test/unit/common/middleware/test_proxy_logging.py index af105b8b1c..fa6364150e 100644 --- a/test/unit/common/middleware/test_proxy_logging.py +++ b/test/unit/common/middleware/test_proxy_logging.py @@ -407,6 +407,48 @@ class TestProxyLogging(unittest.TestCase): self._log_parts(app, should_be_empty=True) self.assertEqual(resp_body, b'FAKE APP') + def test_log_msg_template(self): + # Access logs configuration should override the default one + app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), { + 'log_anonymization_salt': 'secret_salt', + 'log_msg_template': ( + 'template which can be edited in config: ' + '{protocol} {path} {method} ' + '{path.anonymized} {container.anonymized} ' + '{request_time} {start_time.datetime} {end_time} ')}) + app.access_logger = FakeLogger() + req = Request.blank('/', environ={'REQUEST_METHOD': 'GET'}) + with mock.patch('time.time', + mock.MagicMock( + side_effect=[10000000.0, 10000001.0])): + resp = app(req.environ, start_response) + resp_body = b''.join(resp) + # exhaust generator + [x for x in resp] + log_parts = self._log_parts(app) + self.assertEqual(log_parts[0], 'template') + self.assertEqual(log_parts[7], 'HTTP/1.0') + self.assertEqual(log_parts[8], '/') + self.assertEqual(log_parts[9], 'GET') + self.assertEqual(log_parts[10], + '{SMD5}c65475e457fea0951fbb9ec9596b2177') + self.assertEqual(log_parts[11], '-') + self.assertEqual(log_parts[13], '26/Apr/1970/17/46/40') + self.assertEqual(log_parts[14], '10000001.000000000') + self.assertEqual(resp_body, b'FAKE APP') + + def test_invalid_log_config(self): + with self.assertRaises(ValueError): + proxy_logging.ProxyLoggingMiddleware(FakeApp(), { + 'log_anonymization_salt': 'secret_salt', + 'log_msg_template': '{invalid_field}'}) + + with self.assertRaises(ValueError): + proxy_logging.ProxyLoggingMiddleware(FakeApp(), { + 'log_anonymization_method': 'invalid_hash_method', + 'log_anonymization_salt': 'secret_salt', + 'log_msg_template': '{protocol}'}) + def test_multi_segment_resp(self): app = proxy_logging.ProxyLoggingMiddleware(FakeApp( [b'some', b'chunks', b'of data']), {}) diff --git a/test/unit/common/test_swob.py b/test/unit/common/test_swob.py index f97941fa8d..a30994ab3a 100644 --- a/test/unit/common/test_swob.py +++ b/test/unit/common/test_swob.py @@ -1626,11 +1626,6 @@ class TestResponse(unittest.TestCase): self.assertEqual(resp.body, b'') -class TestUTC(unittest.TestCase): - def test_tzname(self): - self.assertEqual(swift.common.swob.UTC.tzname(None), 'UTC') - - class TestConditionalIfNoneMatch(unittest.TestCase): def fake_app(self, environ, start_response): start_response('200 OK', [('Etag', 'the-etag')]) diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index d26decfc5f..bc9b3cd664 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -184,6 +184,11 @@ def reset_logger_state(f): return wrapper +class TestUTC(unittest.TestCase): + def test_tzname(self): + self.assertEqual(utils.UTC.tzname(None), 'UTC') + + class TestTimestamp(unittest.TestCase): """Tests for swift.common.utils.Timestamp""" @@ -3503,6 +3508,96 @@ cluster_dfw1 = http://dfw1.host/v1/ self.assertEqual('2', utils.get_policy_index(req.headers, res.headers)) + def test_log_string_formatter(self): + # Plain ASCII + lf = utils.LogStringFormatter() + self.assertEqual(lf.format('{a} {b}', a='Swift is', b='great'), + 'Swift is great') + + lf = utils.LogStringFormatter() + self.assertEqual(lf.format('{a} {b}', a='', b='great'), + ' great') + + lf = utils.LogStringFormatter(default='-') + self.assertEqual(lf.format('{a} {b}', a='', b='great'), + '- great') + + lf = utils.LogStringFormatter(default='-', quote=True) + self.assertEqual(lf.format('{a} {b}', a='', b='great'), + '- great') + + lf = utils.LogStringFormatter(quote=True) + self.assertEqual(lf.format('{a} {b}', a='Swift is', b='great'), + 'Swift%20is great') + + # Unicode & co + lf = utils.LogStringFormatter() + self.assertEqual(lf.format('{a} {b}', a='Swift est', + b=u'g\u00e9nial ^^'), + u'Swift est g\u00e9nial ^^') + + lf = utils.LogStringFormatter(quote=True) + self.assertEqual(lf.format('{a} {b}', a='Swift est', + b=u'g\u00e9nial ^^'), + 'Swift%20est g%C3%A9nial%20%5E%5E') + + def test_str_anonymizer(self): + anon = utils.StrAnonymizer('Swift is great!', 'md5', '') + self.assertEqual(anon, 'Swift is great!') + self.assertEqual(anon.anonymized, + '{MD5}45e6f00d48fdcf86213602a87df18772') + + anon = utils.StrAnonymizer('Swift is great!', 'sha1', '') + self.assertEqual(anon, 'Swift is great!') + self.assertEqual(anon.anonymized, + '{SHA1}0010a3df215495d8bfa0ae4b66acc2afcc8f4c5c') + + anon = utils.StrAnonymizer('Swift is great!', 'md5', 'salty_secret') + self.assertEqual(anon, 'Swift is great!') + self.assertEqual(anon.anonymized, + '{SMD5}ef4ce28fe3bdd10b6659458ceb1f3f0c') + + anon = utils.StrAnonymizer('Swift is great!', 'sha1', 'salty_secret') + self.assertEqual(anon, 'Swift is great!') + self.assertEqual(anon.anonymized, + '{SSHA1}a4968f76acaddff0eb4069ebe8805d9cab44c9fe') + + self.assertRaises(ValueError, utils.StrAnonymizer, + 'Swift is great!', 'sha257', '') + + def test_str_format_time(self): + dt = utils.StrFormatTime(10000.123456789) + self.assertEqual(str(dt), '10000.123456789') + self.assertEqual(dt.datetime, '01/Jan/1970/02/46/40') + self.assertEqual(dt.iso8601, '1970-01-01T02:46:40') + self.assertEqual(dt.asctime, 'Thu Jan 1 02:46:40 1970') + self.assertEqual(dt.s, '10000') + self.assertEqual(dt.ms, '123') + self.assertEqual(dt.us, '123456') + self.assertEqual(dt.ns, '123456789') + self.assertEqual(dt.a, 'Thu') + self.assertEqual(dt.A, 'Thursday') + self.assertEqual(dt.b, 'Jan') + self.assertEqual(dt.B, 'January') + self.assertEqual(dt.c, 'Thu Jan 1 02:46:40 1970') + self.assertEqual(dt.d, '01') + self.assertEqual(dt.H, '02') + self.assertEqual(dt.I, '02') + self.assertEqual(dt.j, '001') + self.assertEqual(dt.m, '01') + self.assertEqual(dt.M, '46') + self.assertEqual(dt.p, 'AM') + self.assertEqual(dt.S, '40') + self.assertEqual(dt.U, '00') + self.assertEqual(dt.w, '4') + self.assertEqual(dt.W, '00') + self.assertEqual(dt.x, '01/01/70') + self.assertEqual(dt.X, '02:46:40') + self.assertEqual(dt.y, '70') + self.assertEqual(dt.Y, '1970') + self.assertIn(dt.Z, ('GMT', 'UTC')) # It depends of Python 2/3 + self.assertRaises(ValueError, getattr, dt, 'z') + def test_get_log_line(self): req = Request.blank( '/sda1/p/a/c/o', @@ -3513,14 +3608,14 @@ cluster_dfw1 = http://dfw1.host/v1/ server_pid = 1234 exp_line = '1.2.3.4 - - [01/Jan/1970:02:46:41 +0000] "HEAD ' \ '/sda1/p/a/c/o" 200 - "-" "-" "-" 1.2000 "some information" 1234 -' - with mock.patch( - 'time.gmtime', - mock.MagicMock(side_effect=[time.gmtime(10001.0)])): + with mock.patch('time.time', mock.MagicMock(side_effect=[10001.0])): with mock.patch( 'os.getpid', mock.MagicMock(return_value=server_pid)): self.assertEqual( exp_line, - utils.get_log_line(req, res, trans_time, additional_info)) + utils.get_log_line(req, res, trans_time, additional_info, + utils.LOG_LINE_DEFAULT_FORMAT, + 'md5', '54LT')) def test_cache_from_env(self): # should never get logging when swift.cache is found diff --git a/test/unit/container/test_server.py b/test/unit/container/test_server.py index f759f5287d..aac76521ec 100644 --- a/test/unit/container/test_server.py +++ b/test/unit/container/test_server.py @@ -24,7 +24,6 @@ from contextlib import contextmanager from shutil import rmtree from tempfile import mkdtemp from test.unit import make_timestamp_iter, mock_timestamp_now -from time import gmtime from xml.dom import minidom import time import random @@ -4564,16 +4563,14 @@ class TestContainerController(unittest.TestCase): req = Request.blank( '/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD', 'REMOTE_ADDR': '1.2.3.4'}) - with mock.patch('time.gmtime', - mock.MagicMock(side_effect=[gmtime(10001.0)])), \ - mock.patch('time.time', - mock.MagicMock(side_effect=[ - 10000.0, 10001.0, 10002.0])), \ + with mock.patch('time.time', + mock.MagicMock(side_effect=[10000.0, 10001.0, 10002.0, + 10002.0])), \ mock.patch('os.getpid', mock.MagicMock(return_value=1234)): req.get_response(self.controller) info_lines = self.controller.logger.get_lines_for_level('info') self.assertEqual(info_lines, [ - '1.2.3.4 - - [01/Jan/1970:02:46:41 +0000] "HEAD /sda1/p/a/c" ' + '1.2.3.4 - - [01/Jan/1970:02:46:42 +0000] "HEAD /sda1/p/a/c" ' '404 - "-" "-" "-" 2.0000 "-" 1234 0', ]) diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 98fb607df7..d150b186c3 100644 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -7073,21 +7073,19 @@ class TestObjectController(unittest.TestCase): with mock.patch.object(self.object_controller, method, new=mock_method): mock_method.replication = True - with mock.patch('time.gmtime', - mock.MagicMock(side_effect=[gmtime(10001.0)])): - with mock.patch('time.time', - mock.MagicMock(side_effect=[10000.0, - 10001.0])): - with mock.patch('os.getpid', - mock.MagicMock(return_value=1234)): - response = self.object_controller.__call__( - env, start_response) - self.assertEqual(response, answer) - self.assertEqual( - self.logger.get_lines_for_level('info'), - ['None - - [01/Jan/1970:02:46:41 +0000] "PUT' - ' /sda1/p/a/c/o" 405 91 "-" "-" "-" 1.0000 "-"' - ' 1234 -']) + with mock.patch('time.time', + mock.MagicMock(side_effect=[10000.0, + 10001.0, 10001.0])): + with mock.patch('os.getpid', + mock.MagicMock(return_value=1234)): + response = self.object_controller.__call__( + env, start_response) + self.assertEqual(response, answer) + self.assertEqual( + self.logger.get_lines_for_level('info'), + ['- - - [01/Jan/1970:02:46:41 +0000] "PUT' + ' /sda1/p/a/c/o" 405 91 "-" "-" "-" 1.0000 "-"' + ' 1234 -']) def test_call_incorrect_replication_method(self): inbuf = StringIO() @@ -7254,15 +7252,14 @@ class TestObjectController(unittest.TestCase): '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'HEAD', 'REMOTE_ADDR': '1.2.3.4'}) self.object_controller.logger = self.logger - with mock.patch('time.gmtime', side_effect=[gmtime(10001.0)]), \ - mock.patch( - 'time.time', - side_effect=[10000.0, 10000.0, 10001.0, 10002.0]), \ + with mock.patch('time.time', + side_effect=[10000.0, 10000.0, 10001.0, 10002.0, + 10002.0]), \ mock.patch('os.getpid', return_value=1234): req.get_response(self.object_controller) self.assertEqual( self.logger.get_lines_for_level('info'), - ['1.2.3.4 - - [01/Jan/1970:02:46:41 +0000] "HEAD /sda1/p/a/c/o" ' + ['1.2.3.4 - - [01/Jan/1970:02:46:42 +0000] "HEAD /sda1/p/a/c/o" ' '404 - "-" "-" "-" 2.0000 "-" 1234 -']) @patch_policies([StoragePolicy(0, 'zero', True),