added a bad lines check to the access log parser

This commit is contained in:
John Dickinson 2010-10-04 16:12:43 -05:00
parent a6becf6a3f
commit 9a915a00f7
3 changed files with 12 additions and 2 deletions

View File

@ -25,6 +25,7 @@ class_path = swift.stats.access_processor.AccessLogProcessor
# lb_private_ips = # lb_private_ips =
# server_name = proxy # server_name = proxy
# user = swift # user = swift
# warn_percent = 0.8
[log-processor-stats] [log-processor-stats]
# log_dir = /var/log/swift/ # log_dir = /var/log/swift/

View File

@ -74,14 +74,14 @@ class InternalProxy(object):
:param etag: etag for object to check successful upload :param etag: etag for object to check successful upload
:returns: True if successful, False otherwise :returns: True if successful, False otherwise
""" """
log_create_pattern = '/v1/%s/%s/%s' % (account, container, object_name) target_name = '/v1/%s/%s/%s' % (account, container, object_name)
# create the container # create the container
if not self.create_container(account, container): if not self.create_container(account, container):
return False return False
# upload the file to the account # upload the file to the account
req = webob.Request.blank(log_create_pattern, req = webob.Request.blank(target_name,
environ={'REQUEST_METHOD': 'PUT'}, environ={'REQUEST_METHOD': 'PUT'},
headers={'Transfer-Encoding': 'chunked'}) headers={'Transfer-Encoding': 'chunked'})
if compress: if compress:

View File

@ -33,6 +33,7 @@ class AccessLogProcessor(object):
self.service_ips = [x.strip() for x in \ self.service_ips = [x.strip() for x in \
conf.get('service_ips', '').split(',')\ conf.get('service_ips', '').split(',')\
if x.strip()] if x.strip()]
self.warn_percent = float(conf.get('warn_percent', '0.8'))
def log_line_parser(self, raw_log): def log_line_parser(self, raw_log):
'''given a raw access log line, return a dict of the good parts''' '''given a raw access log line, return a dict of the good parts'''
@ -122,9 +123,13 @@ class AccessLogProcessor(object):
def process(self, obj_stream, account, container, object_name): def process(self, obj_stream, account, container, object_name):
'''generate hourly groupings of data from one access log file''' '''generate hourly groupings of data from one access log file'''
hourly_aggr_info = {} hourly_aggr_info = {}
total_lines = 0
bad_lines = 0
for line in obj_stream: for line in obj_stream:
line_data = self.log_line_parser(line) line_data = self.log_line_parser(line)
total_lines += 1
if not line_data: if not line_data:
bad_lines += 1
continue continue
account = line_data['account'] account = line_data['account']
container_name = line_data['container_name'] container_name = line_data['container_name']
@ -178,6 +183,10 @@ class AccessLogProcessor(object):
d[key] = d.setdefault(key, 0) + 1 d[key] = d.setdefault(key, 0) + 1
hourly_aggr_info[aggr_key] = d hourly_aggr_info[aggr_key] = d
if bad_lines > (total_lines * self.warn_percent):
name = '/'.join([account, container, object_name])
print >>sys.stderr, 'I found a bunch of bad lines in %s '\
'(%d bad, %d total)' % (name, bad_lines, total_lines)
return hourly_aggr_info return hourly_aggr_info
def keylist_mapping(self): def keylist_mapping(self):