Make log format for requests configurable

Add the log_msg_template option in proxy-server.conf and log_format in
a/c/o-server.conf. It is a string parsable by Python's format()
function. Some fields containing user data might be anonymized by using
log_anonymization_method and log_anonymization_salt.

Change-Id: I29e30ef45fe3f8a026e7897127ffae08a6a80cd9
This commit is contained in:
Gilles Biannic 2018-03-01 11:31:12 +01:00 committed by Romain LE DISEZ
parent 89eced960c
commit a4cc353375
19 changed files with 582 additions and 125 deletions

View File

@ -730,6 +730,52 @@ Note: reveal_sensitive_prefix will not affect the value logged with access_log_h
What HTTP methods are allowed for StatsD logging (comma-sep); request methods
not in this list will have "BAD_METHOD" for the <verb> portion of the metric.
Default is "GET,HEAD,POST,PUT,DELETE,COPY,OPTIONS".
.IP \fBlog_anonymization_method\fR
Hashing algorithm for anonymization. Must be one of algorithms supported by Python's hashlib. Default is MD5.
.IP \fBlog_anonymization_salt\fR
Salt added as prefix before hashing the value to anonymize. Default is empty (no salt).
.IP "\fBlog_msg_template\fR"
Template used to format access logs. All words surrounded by curly brackets will be substituted with the appropriate values.
.RE
.PD 0
.RS 10
.IP "Some keywords map to timestamps and can be converted to standard dates formats using the matching transformers: 'datetime', 'asctime' or 'iso8601'."
.IP "Other transformers for timestamps are 's', 'ms', 'us' and 'ns' for seconds, milliseconds, microseconds and nanoseconds."
.IP "Python's strftime directives can also be used as tranformers (a, A, b, B, c, d, H, I, j, m, M, p, S, U, w, W, x, X, y, Y, Z)."
.IP "Some keywords map to user data that could be anonymized by using the transformer 'anonymized'."
.IP "Keywords availables are:"
.PD 0
.RS 7
.IP "client_ip (anonymizable)"
.IP "remote_addr (anonymizable)"
.IP "method (request method)"
.IP "path (anonymizable)"
.IP "protocol"
.IP "status_int"
.IP "referer (anonymizable)"
.IP "user_agent (anonymizable)"
.IP "auth_token"
.IP "bytes_recvd (number of bytes received)"
.IP "bytes_sent (number of bytes sent)"
.IP "client_etag (anonymizable)"
.IP "transaction_id"
.IP "headers (anonymizable)"
.IP "request_time (difference between start and end timestamps)
.IP "source"
.IP "log_info"
.IP "start_time (timestamp at the receiving, timestamp)"
.IP "end_time (timestamp at the end of the treatment, timestamp)"
.IP "policy_index"
.IP "account (account name, anonymizable)"
.IP "container (container name, anonymizable)"
.IP "object (object name, anonymizable)"
.PD
.RE
.IP "Example: '{client_ip.anonymized} {remote_addr.anonymized} {start_time.iso8601} {end_time.H}:{end_time.M} {method} acc:{account} cnt:{container} obj:{object.anonymized}'"
.IP "Default: '{client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol} {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} {source} {log_info} {start_time} {end_time} {policy_index}'"
.IP "Warning: A bad log message template will raise an error in initialization."
.RE
.PD

View File

@ -26,47 +26,71 @@ Proxy Logs
The proxy logs contain the record of all external API requests made to the
proxy server. Swift's proxy servers log requests using a custom format
designed to provide robust information and simple processing. The log format
is::
designed to provide robust information and simple processing. It is possible
to change this format with the ``log_msg_template`` config parameter.
The default log format is::
client_ip remote_addr datetime request_method request_path protocol
status_int referer user_agent auth_token bytes_recvd bytes_sent
client_etag transaction_id headers request_time source log_info
request_start_time request_end_time policy_index
{client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol}
{status_int} {referer} {user_agent} {auth_token} {bytes_recvd}
{bytes_sent} {client_etag} {transaction_id} {headers} {request_time}
{source} {log_info} {start_time} {end_time} {policy_index}
Some keywords, signaled by the (anonymizable) flag, can be anonymized by
using the transformer 'anonymized'. The data are applied the hashing method of
`log_anonymization_method` and an optional salt `log_anonymization_salt`.
Some keywords, signaled by the (timestamp) flag, can be converted to standard
dates formats using the matching transformers: 'datetime', 'asctime' or
'iso8601'. Other transformers for timestamps are 's', 'ms', 'us' and 'ns' for
seconds, milliseconds, microseconds and nanoseconds. Python's strftime
directives can also be used as tranformers (a, A, b, B, c, d, H, I, j, m, M, p,
S, U, w, W, x, X, y, Y, Z).
Example {client_ip.anonymized} {remote_addr.anonymized} {start_time.iso8601}
{end_time.H}:{end_time.M} {method} acc:{account} cnt:{container}
obj:{object.anonymized}
=================== ==========================================================
**Log Field** **Value**
------------------- ----------------------------------------------------------
client_ip Swift's guess at the end-client IP, taken from various
headers in the request.
headers in the request. (anonymizable)
remote_addr The IP address of the other end of the TCP connection.
datetime Timestamp of the request, in
day/month/year/hour/minute/second format.
request_method The HTTP verb in the request.
request_path The path portion of the request.
(anonymizable)
end_time Timestamp of the request. (timestamp)
method The HTTP verb in the request.
path The path portion of the request. (anonymizable)
protocol The transport protocol used (currently one of http or
https).
status_int The response code for the request.
referer The value of the HTTP Referer header.
user_agent The value of the HTTP User-Agent header.
referer The value of the HTTP Referer header. (anonymizable)
user_agent The value of the HTTP User-Agent header. (anonymizable)
auth_token The value of the auth token. This may be truncated or
otherwise obscured.
bytes_recvd The number of bytes read from the client for this request.
bytes_sent The number of bytes sent to the client in the body of the
response. This is how many bytes were yielded to the WSGI
server.
client_etag The etag header value given by the client.
client_etag The etag header value given by the client. (anonymizable)
transaction_id The transaction id of the request.
headers The headers given in the request.
headers The headers given in the request. (anonymizable)
request_time The duration of the request.
source The "source" of the request. This may be set for requests
that are generated in order to fulfill client requests,
e.g. bulk uploads.
log_info Various info that may be useful for diagnostics, e.g. the
value of any x-delete-at header.
request_start_time High-resolution timestamp from the start of the request.
request_end_time High-resolution timestamp from the end of the request.
start_time High-resolution timestamp from the start of the request.
(timestamp)
end_time High-resolution timestamp from the end of the request.
(timestamp)
policy_index The value of the storage policy index.
account The account part extracted from the path of the request.
(anonymizable)
container The container part extracted from the path of the request.
(anonymizable)
object The object part extracted from the path of the request.
(anonymizable)
=================== ==========================================================
In one log line, all of the above fields are space-separated and url-encoded.

View File

@ -26,6 +26,17 @@ bind_port = 6202
# set to 0, the default.
# log_max_line_length = 0
#
# Hashing algorithm for log anonymization. Must be one of algorithms supported
# by Python's hashlib.
# log_anonymization_method = MD5
#
# Salt added during log anonymization
# log_anonymization_salt =
#
# Template used to format logs. All words surrounded by curly brackets
# will be substituted with the appropriate values
# log_format = {remote_addr} - - [{time.d}/{time.b}/{time.Y}:{time.H}:{time.M}:{time.S} +0000] "{method} {path}" {status} {content_length} "{referer}" "{txn_id}" "{user_agent}" {trans_time:.4f} "{additional_info}" {pid} {policy_index}
#
# comma separated list of functions to call to setup custom log handlers.
# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
# adapted_logger

View File

@ -32,6 +32,17 @@ bind_port = 6201
# set to 0, the default.
# log_max_line_length = 0
#
# Hashing algorithm for log anonymization. Must be one of algorithms supported
# by Python's hashlib.
# log_anonymization_method = MD5
#
# Salt added during log anonymization
# log_anonymization_salt =
#
# Template used to format logs. All words surrounded by curly brackets
# will be substituted with the appropriate values
# log_format = {remote_addr} - - [{time.d}/{time.b}/{time.Y}:{time.H}:{time.M}:{time.S} +0000] "{method} {path}" {status} {content_length} "{referer}" "{txn_id}" "{user_agent}" {trans_time:.4f} "{additional_info}" {pid} {policy_index}
#
# comma separated list of functions to call to setup custom log handlers.
# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
# adapted_logger

View File

@ -34,6 +34,17 @@ bind_port = 6200
# set to 0, the default.
# log_max_line_length = 0
#
# Hashing algorithm for log anonymization. Must be one of algorithms supported
# by Python's hashlib.
# log_anonymization_method = MD5
#
# Salt added during log anonymization
# log_anonymization_salt =
#
# Template used to format logs. All words surrounded by curly brackets
# will be substituted with the appropriate values
# log_format = {remote_addr} - - [{time.d}/{time.b}/{time.Y}:{time.H}:{time.M}:{time.S} +0000] "{method} {path}" {status} {content_length} "{referer}" "{txn_id}" "{user_agent}" {trans_time:.4f} "{additional_info}" {pid} {policy_index}
#
# comma separated list of functions to call to setup custom log handlers.
# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
# adapted_logger

View File

@ -882,6 +882,17 @@ use = egg:swift#proxy_logging
# middleware and never made it through to the right-most middleware (and
# proxy server). Double logging is prevented for normal requests. See
# proxy-logging docs.
#
# Hashing algorithm for log anonymization. Must be one of algorithms supported
# by Python's hashlib.
# log_anonymization_method = MD5
#
# Salt added during log anonymization
# log_anonymization_salt =
#
# Template used to format access logs. All words surrounded by curly brackets
# will be substituted with the appropriate values
# log_msg_template = {client_ip} {remote_addr} {end_time.datetime} {method} {path} {protocol} {status_int} {referer} {user_agent} {auth_token} {bytes_recvd} {bytes_sent} {client_etag} {transaction_id} {headers} {request_time} {source} {log_info} {start_time} {end_time} {policy_index}
# Note: Put before both ratelimit and auth in the pipeline.
[filter:bulk]

View File

@ -321,7 +321,9 @@ class AccountController(BaseStorageServer):
if res.headers.get('x-container-timestamp') is not None:
additional_info += 'x-container-timestamp: %s' % \
res.headers['x-container-timestamp']
log_msg = get_log_line(req, res, trans_time, additional_info)
log_msg = get_log_line(req, res, trans_time, additional_info,
self.log_format, self.anonymization_method,
self.anonymization_salt)
if req.method.upper() == 'REPLICATE':
self.logger.debug(log_msg)
else:

View File

@ -15,7 +15,8 @@
import inspect
from swift import __version__ as swift_version
from swift.common.utils import public, timing_stats, config_true_value
from swift.common.utils import public, timing_stats, config_true_value, \
LOG_LINE_DEFAULT_FORMAT
from swift.common.swob import Response
@ -30,6 +31,9 @@ class BaseStorageServer(object):
if replication_server is not None:
replication_server = config_true_value(replication_server)
self.replication_server = replication_server
self.log_format = conf.get('log_format', LOG_LINE_DEFAULT_FORMAT)
self.anonymization_method = conf.get('log_anonymization_method', 'md5')
self.anonymization_salt = conf.get('log_anonymization_salt', '')
@property
def server_type(self):

View File

@ -21,10 +21,10 @@ to plug in your own logging format/method.
The logging format implemented below is as follows:
client_ip remote_addr datetime request_method request_path protocol
client_ip remote_addr end_time.datetime method path protocol
status_int referer user_agent auth_token bytes_recvd bytes_sent
client_etag transaction_id headers request_time source log_info
request_start_time request_end_time
start_time end_time policy_index
These values are space-separated, and each is url-encoded, so that they can
be separated with a simple .split()
@ -73,16 +73,15 @@ bandwidth usage will want to only sum up logs with no swift.source.
import time
from six.moves.urllib.parse import quote
from swift.common.swob import Request
from swift.common.utils import (get_logger, get_remote_client,
config_true_value,
InputProxy, list_from_csv, get_policy_index)
InputProxy, list_from_csv, get_policy_index,
split_path, StrAnonymizer, StrFormatTime,
LogStringFormatter)
from swift.common.storage_policy import POLICIES
QUOTE_SAFE = '/:'
class ProxyLoggingMiddleware(object):
"""
@ -91,6 +90,18 @@ class ProxyLoggingMiddleware(object):
def __init__(self, app, conf, logger=None):
self.app = app
self.log_formatter = LogStringFormatter(default='-', quote=True)
self.log_msg_template = conf.get(
'log_msg_template', (
'{client_ip} {remote_addr} {end_time.datetime} {method} '
'{path} {protocol} {status_int} {referer} {user_agent} '
'{auth_token} {bytes_recvd} {bytes_sent} {client_etag} '
'{transaction_id} {headers} {request_time} {source} '
'{log_info} {start_time} {end_time} {policy_index}'))
# The salt is only used in StrAnonymizer. This class requires bytes,
# convert it now to prevent useless convertion later.
self.anonymization_method = conf.get('log_anonymization_method', 'md5')
self.anonymization_salt = conf.get('log_anonymization_salt', '')
self.log_hdrs = config_true_value(conf.get(
'access_log_headers',
conf.get('log_headers', 'no')))
@ -121,6 +132,51 @@ class ProxyLoggingMiddleware(object):
self.access_logger.set_statsd_prefix('proxy-server')
self.reveal_sensitive_prefix = int(
conf.get('reveal_sensitive_prefix', 16))
self.check_log_msg_template_validity()
def check_log_msg_template_validity(self):
replacements = {
# Time information
'end_time': StrFormatTime(1000001),
'start_time': StrFormatTime(1000000),
# Information worth to anonymize
'client_ip': StrAnonymizer('1.2.3.4', self.anonymization_method,
self.anonymization_salt),
'remote_addr': StrAnonymizer('4.3.2.1', self.anonymization_method,
self.anonymization_salt),
'path': StrAnonymizer('/', self.anonymization_method,
self.anonymization_salt),
'referer': StrAnonymizer('ref', self.anonymization_method,
self.anonymization_salt),
'user_agent': StrAnonymizer('swift', self.anonymization_method,
self.anonymization_salt),
'headers': StrAnonymizer('header', self.anonymization_method,
self.anonymization_salt),
'client_etag': StrAnonymizer('etag', self.anonymization_method,
self.anonymization_salt),
'account': StrAnonymizer('a', self.anonymization_method,
self.anonymization_salt),
'container': StrAnonymizer('c', self.anonymization_method,
self.anonymization_salt),
'object': StrAnonymizer('', self.anonymization_method,
self.anonymization_salt),
# Others information
'method': 'GET',
'protocol': '',
'status_int': '0',
'auth_token': '1234...',
'bytes_recvd': '1',
'bytes_sent': '0',
'transaction_id': 'tx1234',
'request_time': '0.05',
'source': '',
'log_info': '',
'policy_index': ''
}
try:
self.log_formatter.format(self.log_msg_template, **replacements)
except Exception as e:
raise ValueError('Cannot interpolate log_msg_template: %s' % e)
def method_from_req(self, req):
return req.environ.get('swift.orig_req_method', req.method)
@ -161,37 +217,62 @@ class ProxyLoggingMiddleware(object):
for k, v in req.headers.items())
method = self.method_from_req(req)
end_gmtime_str = time.strftime('%d/%b/%Y/%H/%M/%S',
time.gmtime(end_time))
duration_time_str = "%.4f" % (end_time - start_time)
start_time_str = "%.9f" % start_time
end_time_str = "%.9f" % end_time
policy_index = get_policy_index(req.headers, resp_headers)
self.access_logger.info(' '.join(
quote(str(x) if x else '-', QUOTE_SAFE)
for x in (
get_remote_client(req),
req.remote_addr,
end_gmtime_str,
method,
req.path_qs,
acc, cont, obj = None, None, None
if req.path.startswith('/v1/'):
_, acc, cont, obj = split_path(req.path, 1, 4, True)
replacements = {
# Time information
'end_time': StrFormatTime(end_time),
'start_time': StrFormatTime(start_time),
# Information worth to anonymize
'client_ip': StrAnonymizer(get_remote_client(req),
self.anonymization_method,
self.anonymization_salt),
'remote_addr': StrAnonymizer(req.remote_addr,
self.anonymization_method,
self.anonymization_salt),
'path': StrAnonymizer(req.path_qs, self.anonymization_method,
self.anonymization_salt),
'referer': StrAnonymizer(req.referer, self.anonymization_method,
self.anonymization_salt),
'user_agent': StrAnonymizer(req.user_agent,
self.anonymization_method,
self.anonymization_salt),
'headers': StrAnonymizer(logged_headers, self.anonymization_method,
self.anonymization_salt),
'client_etag': StrAnonymizer(req.headers.get('etag'),
self.anonymization_method,
self.anonymization_salt),
'account': StrAnonymizer(acc, self.anonymization_method,
self.anonymization_salt),
'container': StrAnonymizer(cont, self.anonymization_method,
self.anonymization_salt),
'object': StrAnonymizer(obj, self.anonymization_method,
self.anonymization_salt),
# Others information
'method': method,
'protocol':
req.environ.get('SERVER_PROTOCOL'),
status_int,
req.referer,
req.user_agent,
self.obscure_sensitive(req.headers.get('x-auth-token')),
bytes_received,
bytes_sent,
req.headers.get('etag', None),
req.environ.get('swift.trans_id'),
logged_headers,
duration_time_str,
req.environ.get('swift.source'),
','.join(req.environ.get('swift.log_info') or ''),
start_time_str,
end_time_str,
policy_index
)))
'status_int': status_int,
'auth_token':
self.obscure_sensitive(
req.headers.get('x-auth-token')),
'bytes_recvd': bytes_received,
'bytes_sent': bytes_sent,
'transaction_id': req.environ.get('swift.trans_id'),
'request_time': duration_time_str,
'source': req.environ.get('swift.source'),
'log_info':
','.join(req.environ.get('swift.log_info', '')),
'policy_index': policy_index,
}
self.access_logger.info(
self.log_formatter.format(self.log_msg_template,
**replacements))
# Log timing and bytes-transferred data to StatsD
metric_name = self.statsd_metric_name(req, status_int, method)

View File

@ -38,7 +38,7 @@ needs to change.
from collections import defaultdict, MutableMapping
import time
from functools import partial
from datetime import datetime, timedelta, tzinfo
from datetime import datetime
from email.utils import parsedate
import re
import random
@ -51,7 +51,7 @@ from six import StringIO
from six.moves import urllib
from swift.common.header_key_dict import HeaderKeyDict
from swift.common.utils import reiterate, split_path, Timestamp, pairs, \
from swift.common.utils import UTC, reiterate, split_path, Timestamp, pairs, \
close_if_possible, closing_if_possible
from swift.common.exceptions import InvalidTimestamp
@ -118,21 +118,6 @@ MAX_NONASCENDING_RANGES = 8
MAX_RANGES = 50
class _UTC(tzinfo):
"""
A tzinfo class for datetime objects that returns a 0 timedelta (UTC time)
"""
def dst(self, dt):
return timedelta(0)
utcoffset = dst
def tzname(self, dt):
return 'UTC'
UTC = _UTC()
class WsgiBytesIO(BytesIO):
"""
This class adds support for the additional wsgi.input methods defined on

View File

@ -187,6 +187,13 @@ IPV6_RE = re.compile("^\[(?P<address>.*)\](:(?P<port>[0-9]+))?$")
MD5_OF_EMPTY_STRING = 'd41d8cd98f00b204e9800998ecf8427e'
LOG_LINE_DEFAULT_FORMAT = '{remote_addr} - - [{time.d}/{time.b}/{time.Y}' \
':{time.H}:{time.M}:{time.S} +0000] ' \
'"{method} {path}" {status} {content_length} ' \
'"{referer}" "{txn_id}" "{user_agent}" ' \
'{trans_time:.4f} "{additional_info}" {pid} ' \
'{policy_index}'
class InvalidHashPathConfigError(ValueError):
@ -603,7 +610,117 @@ def get_policy_index(req_headers, res_headers):
return str(policy_index) if policy_index is not None else None
def get_log_line(req, res, trans_time, additional_info):
class _UTC(datetime.tzinfo):
"""
A tzinfo class for datetime objects that returns a 0 timedelta (UTC time)
"""
def dst(self, dt):
return datetime.timedelta(0)
utcoffset = dst
def tzname(self, dt):
return 'UTC'
UTC = _UTC()
class LogStringFormatter(string.Formatter):
def __init__(self, default='', quote=False):
super(LogStringFormatter, self).__init__()
self.default = default
self.quote = quote
def format_field(self, value, spec):
if not value:
return self.default
else:
log = super(LogStringFormatter, self).format_field(value, spec)
if self.quote:
return quote(log, ':/{}')
else:
return log
class StrAnonymizer(str):
"""
Class that permits to get a string anonymized or simply quoted.
"""
def __new__(cls, data, method, salt):
method = method.lower()
if method not in hashlib.algorithms_guaranteed:
raise ValueError('Unsupported hashing method: %r' % method)
s = str.__new__(cls, data or '')
s.method = method
s.salt = salt
return s
@property
def anonymized(self):
if not self:
return self
else:
h = getattr(hashlib, self.method)()
if self.salt:
h.update(six.b(self.salt))
h.update(six.b(self))
return '{%s%s}%s' % ('S' if self.salt else '', self.method.upper(),
h.hexdigest())
class StrFormatTime(object):
"""
Class that permits to get formats or parts of a time.
"""
def __init__(self, ts):
self.time = ts
self.time_struct = time.gmtime(ts)
def __str__(self):
return "%.9f" % self.time
def __getattr__(self, attr):
if attr not in ['a', 'A', 'b', 'B', 'c', 'd', 'H',
'I', 'j', 'm', 'M', 'p', 'S', 'U',
'w', 'W', 'x', 'X', 'y', 'Y', 'Z']:
raise ValueError(("The attribute %s is not a correct directive "
"for time.strftime formater.") % attr)
return datetime.datetime(*self.time_struct[:-2],
tzinfo=UTC).strftime('%' + attr)
@property
def asctime(self):
return time.asctime(self.time_struct)
@property
def datetime(self):
return time.strftime('%d/%b/%Y/%H/%M/%S', self.time_struct)
@property
def iso8601(self):
return time.strftime('%Y-%m-%dT%H:%M:%S', self.time_struct)
@property
def ms(self):
return self.__str__().split('.')[1][:3]
@property
def us(self):
return self.__str__().split('.')[1][:6]
@property
def ns(self):
return self.__str__().split('.')[1]
@property
def s(self):
return self.__str__().split('.')[0]
def get_log_line(req, res, trans_time, additional_info, fmt,
anonymization_method, anonymization_salt):
"""
Make a line for logging that matches the documented log line format
for backend servers.
@ -617,14 +734,39 @@ def get_log_line(req, res, trans_time, additional_info):
"""
policy_index = get_policy_index(req.headers, res.headers)
return '%s - - [%s] "%s %s" %s %s "%s" "%s" "%s" %.4f "%s" %d %s' % (
req.remote_addr,
time.strftime('%d/%b/%Y:%H:%M:%S +0000', time.gmtime()),
req.method, req.path, res.status.split()[0],
res.content_length or '-', req.referer or '-',
req.headers.get('x-trans-id', '-'),
req.user_agent or '-', trans_time, additional_info or '-',
os.getpid(), policy_index or '-')
if req.path.startswith('/'):
disk, partition, account, container, obj = split_path(req.path, 0, 5,
True)
else:
disk, partition, account, container, obj = (None, ) * 5
replacements = {
'remote_addr': StrAnonymizer(req.remote_addr, anonymization_method,
anonymization_salt),
'time': StrFormatTime(time.time()),
'method': req.method,
'path': StrAnonymizer(req.path, anonymization_method,
anonymization_salt),
'disk': disk,
'partition': partition,
'account': StrAnonymizer(account, anonymization_method,
anonymization_salt),
'container': StrAnonymizer(container, anonymization_method,
anonymization_salt),
'object': StrAnonymizer(obj, anonymization_method,
anonymization_salt),
'status': res.status.split()[0],
'content_length': res.content_length,
'referer': StrAnonymizer(req.referer, anonymization_method,
anonymization_salt),
'txn_id': req.headers.get('x-trans-id'),
'user_agent': StrAnonymizer(req.user_agent, anonymization_method,
anonymization_salt),
'trans_time': trans_time,
'additional_info': additional_info,
'pid': os.getpid(),
'policy_index': policy_index,
}
return LogStringFormatter(default='-').format(fmt, **replacements)
def get_trans_id_time(trans_id):

View File

@ -798,7 +798,10 @@ class ContainerController(BaseStorageServer):
res = HTTPInternalServerError(body=traceback.format_exc())
if self.log_requests:
trans_time = time.time() - start_time
log_message = get_log_line(req, res, trans_time, '')
log_message = get_log_line(req, res, trans_time, '',
self.log_format,
self.anonymization_method,
self.anonymization_salt)
if req.method.upper() == 'REPLICATE':
self.logger.debug(log_message)
else:

View File

@ -1292,7 +1292,9 @@ class ObjectController(BaseStorageServer):
trans_time = time.time() - start_time
res.fix_conditional_response()
if self.log_requests:
log_line = get_log_line(req, res, trans_time, '')
log_line = get_log_line(req, res, trans_time, '', self.log_format,
self.anonymization_method,
self.anonymization_salt)
if req.method in ('REPLICATE', 'SSYNC') or \
'X-Backend-Replication' in req.headers:
self.logger.debug(log_line)

View File

@ -20,7 +20,6 @@ import posix
import unittest
from tempfile import mkdtemp
from shutil import rmtree
from time import gmtime
from test.unit import FakeLogger
import itertools
import random
@ -2003,16 +2002,15 @@ class TestAccountController(unittest.TestCase):
environ={'REQUEST_METHOD': 'HEAD', 'REMOTE_ADDR': '1.2.3.4'})
self.controller.logger = FakeLogger()
with mock.patch(
'time.gmtime', mock.MagicMock(side_effect=[gmtime(10001.0)])):
'time.time',
mock.MagicMock(side_effect=[10000.0, 10001.0, 10002.0,
10002.0])):
with mock.patch(
'time.time',
mock.MagicMock(side_effect=[10000.0, 10001.0, 10002.0])):
with mock.patch(
'os.getpid', mock.MagicMock(return_value=1234)):
req.get_response(self.controller)
'os.getpid', mock.MagicMock(return_value=1234)):
req.get_response(self.controller)
self.assertEqual(
self.controller.logger.log_dict['info'],
[(('1.2.3.4 - - [01/Jan/1970:02:46:41 +0000] "HEAD /sda1/p/a" 404 '
[(('1.2.3.4 - - [01/Jan/1970:02:46:42 +0000] "HEAD /sda1/p/a" 404 '
'- "-" "-" "-" 2.0000 "-" 1234 -',), {})])
def test_policy_stats_with_legacy(self):

View File

@ -407,6 +407,48 @@ class TestProxyLogging(unittest.TestCase):
self._log_parts(app, should_be_empty=True)
self.assertEqual(resp_body, b'FAKE APP')
def test_log_msg_template(self):
# Access logs configuration should override the default one
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {
'log_anonymization_salt': 'secret_salt',
'log_msg_template': (
'template which can be edited in config: '
'{protocol} {path} {method} '
'{path.anonymized} {container.anonymized} '
'{request_time} {start_time.datetime} {end_time} ')})
app.access_logger = FakeLogger()
req = Request.blank('/', environ={'REQUEST_METHOD': 'GET'})
with mock.patch('time.time',
mock.MagicMock(
side_effect=[10000000.0, 10000001.0])):
resp = app(req.environ, start_response)
resp_body = b''.join(resp)
# exhaust generator
[x for x in resp]
log_parts = self._log_parts(app)
self.assertEqual(log_parts[0], 'template')
self.assertEqual(log_parts[7], 'HTTP/1.0')
self.assertEqual(log_parts[8], '/')
self.assertEqual(log_parts[9], 'GET')
self.assertEqual(log_parts[10],
'{SMD5}c65475e457fea0951fbb9ec9596b2177')
self.assertEqual(log_parts[11], '-')
self.assertEqual(log_parts[13], '26/Apr/1970/17/46/40')
self.assertEqual(log_parts[14], '10000001.000000000')
self.assertEqual(resp_body, b'FAKE APP')
def test_invalid_log_config(self):
with self.assertRaises(ValueError):
proxy_logging.ProxyLoggingMiddleware(FakeApp(), {
'log_anonymization_salt': 'secret_salt',
'log_msg_template': '{invalid_field}'})
with self.assertRaises(ValueError):
proxy_logging.ProxyLoggingMiddleware(FakeApp(), {
'log_anonymization_method': 'invalid_hash_method',
'log_anonymization_salt': 'secret_salt',
'log_msg_template': '{protocol}'})
def test_multi_segment_resp(self):
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(
[b'some', b'chunks', b'of data']), {})

View File

@ -1626,11 +1626,6 @@ class TestResponse(unittest.TestCase):
self.assertEqual(resp.body, b'')
class TestUTC(unittest.TestCase):
def test_tzname(self):
self.assertEqual(swift.common.swob.UTC.tzname(None), 'UTC')
class TestConditionalIfNoneMatch(unittest.TestCase):
def fake_app(self, environ, start_response):
start_response('200 OK', [('Etag', 'the-etag')])

View File

@ -184,6 +184,11 @@ def reset_logger_state(f):
return wrapper
class TestUTC(unittest.TestCase):
def test_tzname(self):
self.assertEqual(utils.UTC.tzname(None), 'UTC')
class TestTimestamp(unittest.TestCase):
"""Tests for swift.common.utils.Timestamp"""
@ -3503,6 +3508,96 @@ cluster_dfw1 = http://dfw1.host/v1/
self.assertEqual('2', utils.get_policy_index(req.headers,
res.headers))
def test_log_string_formatter(self):
# Plain ASCII
lf = utils.LogStringFormatter()
self.assertEqual(lf.format('{a} {b}', a='Swift is', b='great'),
'Swift is great')
lf = utils.LogStringFormatter()
self.assertEqual(lf.format('{a} {b}', a='', b='great'),
' great')
lf = utils.LogStringFormatter(default='-')
self.assertEqual(lf.format('{a} {b}', a='', b='great'),
'- great')
lf = utils.LogStringFormatter(default='-', quote=True)
self.assertEqual(lf.format('{a} {b}', a='', b='great'),
'- great')
lf = utils.LogStringFormatter(quote=True)
self.assertEqual(lf.format('{a} {b}', a='Swift is', b='great'),
'Swift%20is great')
# Unicode & co
lf = utils.LogStringFormatter()
self.assertEqual(lf.format('{a} {b}', a='Swift est',
b=u'g\u00e9nial ^^'),
u'Swift est g\u00e9nial ^^')
lf = utils.LogStringFormatter(quote=True)
self.assertEqual(lf.format('{a} {b}', a='Swift est',
b=u'g\u00e9nial ^^'),
'Swift%20est g%C3%A9nial%20%5E%5E')
def test_str_anonymizer(self):
anon = utils.StrAnonymizer('Swift is great!', 'md5', '')
self.assertEqual(anon, 'Swift is great!')
self.assertEqual(anon.anonymized,
'{MD5}45e6f00d48fdcf86213602a87df18772')
anon = utils.StrAnonymizer('Swift is great!', 'sha1', '')
self.assertEqual(anon, 'Swift is great!')
self.assertEqual(anon.anonymized,
'{SHA1}0010a3df215495d8bfa0ae4b66acc2afcc8f4c5c')
anon = utils.StrAnonymizer('Swift is great!', 'md5', 'salty_secret')
self.assertEqual(anon, 'Swift is great!')
self.assertEqual(anon.anonymized,
'{SMD5}ef4ce28fe3bdd10b6659458ceb1f3f0c')
anon = utils.StrAnonymizer('Swift is great!', 'sha1', 'salty_secret')
self.assertEqual(anon, 'Swift is great!')
self.assertEqual(anon.anonymized,
'{SSHA1}a4968f76acaddff0eb4069ebe8805d9cab44c9fe')
self.assertRaises(ValueError, utils.StrAnonymizer,
'Swift is great!', 'sha257', '')
def test_str_format_time(self):
dt = utils.StrFormatTime(10000.123456789)
self.assertEqual(str(dt), '10000.123456789')
self.assertEqual(dt.datetime, '01/Jan/1970/02/46/40')
self.assertEqual(dt.iso8601, '1970-01-01T02:46:40')
self.assertEqual(dt.asctime, 'Thu Jan 1 02:46:40 1970')
self.assertEqual(dt.s, '10000')
self.assertEqual(dt.ms, '123')
self.assertEqual(dt.us, '123456')
self.assertEqual(dt.ns, '123456789')
self.assertEqual(dt.a, 'Thu')
self.assertEqual(dt.A, 'Thursday')
self.assertEqual(dt.b, 'Jan')
self.assertEqual(dt.B, 'January')
self.assertEqual(dt.c, 'Thu Jan 1 02:46:40 1970')
self.assertEqual(dt.d, '01')
self.assertEqual(dt.H, '02')
self.assertEqual(dt.I, '02')
self.assertEqual(dt.j, '001')
self.assertEqual(dt.m, '01')
self.assertEqual(dt.M, '46')
self.assertEqual(dt.p, 'AM')
self.assertEqual(dt.S, '40')
self.assertEqual(dt.U, '00')
self.assertEqual(dt.w, '4')
self.assertEqual(dt.W, '00')
self.assertEqual(dt.x, '01/01/70')
self.assertEqual(dt.X, '02:46:40')
self.assertEqual(dt.y, '70')
self.assertEqual(dt.Y, '1970')
self.assertIn(dt.Z, ('GMT', 'UTC')) # It depends of Python 2/3
self.assertRaises(ValueError, getattr, dt, 'z')
def test_get_log_line(self):
req = Request.blank(
'/sda1/p/a/c/o',
@ -3513,14 +3608,14 @@ cluster_dfw1 = http://dfw1.host/v1/
server_pid = 1234
exp_line = '1.2.3.4 - - [01/Jan/1970:02:46:41 +0000] "HEAD ' \
'/sda1/p/a/c/o" 200 - "-" "-" "-" 1.2000 "some information" 1234 -'
with mock.patch(
'time.gmtime',
mock.MagicMock(side_effect=[time.gmtime(10001.0)])):
with mock.patch('time.time', mock.MagicMock(side_effect=[10001.0])):
with mock.patch(
'os.getpid', mock.MagicMock(return_value=server_pid)):
self.assertEqual(
exp_line,
utils.get_log_line(req, res, trans_time, additional_info))
utils.get_log_line(req, res, trans_time, additional_info,
utils.LOG_LINE_DEFAULT_FORMAT,
'md5', '54LT'))
def test_cache_from_env(self):
# should never get logging when swift.cache is found

View File

@ -24,7 +24,6 @@ from contextlib import contextmanager
from shutil import rmtree
from tempfile import mkdtemp
from test.unit import make_timestamp_iter, mock_timestamp_now
from time import gmtime
from xml.dom import minidom
import time
import random
@ -4564,16 +4563,14 @@ class TestContainerController(unittest.TestCase):
req = Request.blank(
'/sda1/p/a/c',
environ={'REQUEST_METHOD': 'HEAD', 'REMOTE_ADDR': '1.2.3.4'})
with mock.patch('time.gmtime',
mock.MagicMock(side_effect=[gmtime(10001.0)])), \
mock.patch('time.time',
mock.MagicMock(side_effect=[
10000.0, 10001.0, 10002.0])), \
with mock.patch('time.time',
mock.MagicMock(side_effect=[10000.0, 10001.0, 10002.0,
10002.0])), \
mock.patch('os.getpid', mock.MagicMock(return_value=1234)):
req.get_response(self.controller)
info_lines = self.controller.logger.get_lines_for_level('info')
self.assertEqual(info_lines, [
'1.2.3.4 - - [01/Jan/1970:02:46:41 +0000] "HEAD /sda1/p/a/c" '
'1.2.3.4 - - [01/Jan/1970:02:46:42 +0000] "HEAD /sda1/p/a/c" '
'404 - "-" "-" "-" 2.0000 "-" 1234 0',
])

View File

@ -7073,21 +7073,19 @@ class TestObjectController(unittest.TestCase):
with mock.patch.object(self.object_controller, method,
new=mock_method):
mock_method.replication = True
with mock.patch('time.gmtime',
mock.MagicMock(side_effect=[gmtime(10001.0)])):
with mock.patch('time.time',
mock.MagicMock(side_effect=[10000.0,
10001.0])):
with mock.patch('os.getpid',
mock.MagicMock(return_value=1234)):
response = self.object_controller.__call__(
env, start_response)
self.assertEqual(response, answer)
self.assertEqual(
self.logger.get_lines_for_level('info'),
['None - - [01/Jan/1970:02:46:41 +0000] "PUT'
' /sda1/p/a/c/o" 405 91 "-" "-" "-" 1.0000 "-"'
' 1234 -'])
with mock.patch('time.time',
mock.MagicMock(side_effect=[10000.0,
10001.0, 10001.0])):
with mock.patch('os.getpid',
mock.MagicMock(return_value=1234)):
response = self.object_controller.__call__(
env, start_response)
self.assertEqual(response, answer)
self.assertEqual(
self.logger.get_lines_for_level('info'),
['- - - [01/Jan/1970:02:46:41 +0000] "PUT'
' /sda1/p/a/c/o" 405 91 "-" "-" "-" 1.0000 "-"'
' 1234 -'])
def test_call_incorrect_replication_method(self):
inbuf = StringIO()
@ -7254,15 +7252,14 @@ class TestObjectController(unittest.TestCase):
'/sda1/p/a/c/o',
environ={'REQUEST_METHOD': 'HEAD', 'REMOTE_ADDR': '1.2.3.4'})
self.object_controller.logger = self.logger
with mock.patch('time.gmtime', side_effect=[gmtime(10001.0)]), \
mock.patch(
'time.time',
side_effect=[10000.0, 10000.0, 10001.0, 10002.0]), \
with mock.patch('time.time',
side_effect=[10000.0, 10000.0, 10001.0, 10002.0,
10002.0]), \
mock.patch('os.getpid', return_value=1234):
req.get_response(self.object_controller)
self.assertEqual(
self.logger.get_lines_for_level('info'),
['1.2.3.4 - - [01/Jan/1970:02:46:41 +0000] "HEAD /sda1/p/a/c/o" '
['1.2.3.4 - - [01/Jan/1970:02:46:42 +0000] "HEAD /sda1/p/a/c/o" '
'404 - "-" "-" "-" 2.0000 "-" 1234 -'])
@patch_policies([StoragePolicy(0, 'zero', True),