From fc9e26a44e4d01cf2c0d44e472f282d6a4ad5cf2 Mon Sep 17 00:00:00 2001 From: John Dickinson Date: Wed, 1 Sep 2010 16:33:44 -0500 Subject: [PATCH] access log parsing tests pass --- etc/log-processing.conf-sample | 3 +++ swift/stats/access_processor.py | 22 +++++++++++----- test/unit/stats/test_log_processor.py | 38 +++++++++++++++++---------- 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/etc/log-processing.conf-sample b/etc/log-processing.conf-sample index 79d48dc3d6..6ea8d2b0cb 100644 --- a/etc/log-processing.conf-sample +++ b/etc/log-processing.conf-sample @@ -17,6 +17,9 @@ source_filename_format = %Y%m%d%H* class_path = swift.stats.access_processor.AccessLogProcessor # service ips is for client ip addresses that should be counted as servicenet # service_ips = +# load balancer private ips is for load balancer ip addresses that should be +# counted as servicenet +# lb_private_ips = # server_name = proxy [log-processor-stats] diff --git a/swift/stats/access_processor.py b/swift/stats/access_processor.py index 0fe254a4ca..a20d1741ed 100644 --- a/swift/stats/access_processor.py +++ b/swift/stats/access_processor.py @@ -13,12 +13,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections +from urllib import unquote + +from swift.common.utils import split_path + +month_map = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split() + class AccessLogProcessor(object): def __init__(self, conf): self.server_name = conf.get('server_name', 'proxy') + self.lb_private_ips = [x.strip() for x in \ + conf.get('lb_private_ips', '').split(',')\ + if x.strip()] + self.service_ips = [x.strip() for x in \ + conf.get('service_ips', '').split(',')\ + if x.strip()] - def _log_line_parser(self, raw_log): + def log_line_parser(self, raw_log): '''given a raw access log line, return a dict of the good parts''' d = {} try: @@ -106,11 +119,8 @@ class AccessLogProcessor(object): def process(self, obj_stream): '''generate hourly groupings of data from one access log file''' hourly_aggr_info = {} - aggr_account_logs = {} - container_line_counts = collections.defaultdict(int) - log_buffer = collections.defaultdict(list) for line in obj_stream: - line_data = self._log_line_parser(line) + line_data = self.log_line_parser(line) if not line_data: continue account = line_data['account'] @@ -165,4 +175,4 @@ class AccessLogProcessor(object): d[key] = d.setdefault(key, 0) + 1 hourly_aggr_info[aggr_key] = d - return hourly_aggr_info, item, aggr_account_logs + return hourly_aggr_info diff --git a/test/unit/stats/test_log_processor.py b/test/unit/stats/test_log_processor.py index 0f7699abd6..ca99890908 100644 --- a/test/unit/stats/test_log_processor.py +++ b/test/unit/stats/test_log_processor.py @@ -35,7 +35,7 @@ class TestLogProcessor(unittest.TestCase): access_test_line = 'Jul 9 04:14:30 saio proxy 1.2.3.4 4.5.6.7 '\ '09/Jul/2010/04/14/30 GET '\ - '/v1/AUTH_acct/foo/bar?format=json&foo HTTP/1.0 200 - '\ + '/v1/acct/foo/bar?format=json&foo HTTP/1.0 200 - '\ 'curl tk4e350daf-9338-4cc6-aabb-090e49babfbd '\ '6 95 - txfa431231-7f07-42fd-8fc7-7da9d8cc1f90 - 0.0262' stats_test_line = 'account,1,2,3' @@ -44,10 +44,16 @@ class TestLogProcessor(unittest.TestCase): } } - def test_log_line_parser(self): - return - p = log_processor.LogProcessor(self.proxy_config, DumbLogger()) - result = p.log_line_parser(self.access_test_line) + def test_access_log_line_parser(self): + access_proxy_config = self.proxy_config + access_proxy_config.update({ + 'log-processor-access': { + 'source_filename_format':'%Y%m%d%H*', + 'class_path': + 'swift.stats.access_processor.AccessLogProcessor' + }}) + p = log_processor.LogProcessor(access_proxy_config, DumbLogger()) + result = p.plugins['access']['instance'].log_line_parser(self.access_test_line) self.assertEquals(result, {'code': 200, 'processing_time': '0.0262', 'auth_token': 'tk4e350daf-9338-4cc6-aabb-090e49babfbd', @@ -69,22 +75,27 @@ class TestLogProcessor(unittest.TestCase): 'day': '09', 'minute': '14', 'account': 'acct', - 'reseller': 'AUTH', 'hour': '04', 'referrer': '-', - 'request': '/v1/AUTH_acct', + 'request': '/v1/acct/foo/bar', 'user_agent': 'curl', 'bytes_in': 6, 'lb_ip': '4.5.6.7'}) def test_process_one_access_file(self): - return - p = log_processor.LogProcessor(self.proxy_config, DumbLogger()) - def get_object_data(*a,**kw): + access_proxy_config = self.proxy_config + access_proxy_config.update({ + 'log-processor-access': { + 'source_filename_format':'%Y%m%d%H*', + 'class_path': + 'swift.stats.access_processor.AccessLogProcessor' + }}) + p = log_processor.LogProcessor(access_proxy_config, DumbLogger()) + def get_object_data(*a, **kw): return [self.access_test_line] p.get_object_data = get_object_data - result = p.process_one_access_file('yarr', None) - expected = ({('AUTH_acct', '2010', '07', '09', '04'): + result = p.process_one_file('access', 'a', 'c', 'o') + expected = {('acct', '2010', '07', '09', '04'): {('public', 'object', 'GET', '2xx'): 1, ('public', 'bytes_out'): 95, 'marker_query': 0, @@ -92,8 +103,7 @@ class TestLogProcessor(unittest.TestCase): 'delimiter_query': 0, 'path_query': 0, ('public', 'bytes_in'): 6, - 'prefix_query': 0}}, - 'yarr', {}) + 'prefix_query': 0}} self.assertEquals(result, expected) def test_process_one_stats_file(self):