updated with changes and suggestions from code review
This commit is contained in:
commit
ea690795df
@ -24,4 +24,4 @@ if __name__ == '__main__':
|
||||
print "Usage: swift-log-stats-collector CONFIG_FILE"
|
||||
sys.exit()
|
||||
conf = utils.readconf(sys.argv[1], log_name='log-stats-collector')
|
||||
stats = LogProcessorDaemon(conf).run(once=True, capture_stdout=False)
|
||||
stats = LogProcessorDaemon(conf).run(once=True)
|
||||
|
@ -15,6 +15,8 @@ swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
|
||||
swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
|
||||
container_name = log_data
|
||||
source_filename_format = access-%Y%m%d%H
|
||||
# new_log_cutoff = 7200
|
||||
# unlink_log = True
|
||||
class_path = swift.stats.access_processor.AccessLogProcessor
|
||||
# service ips is for client ip addresses that should be counted as servicenet
|
||||
# service_ips =
|
||||
@ -28,7 +30,9 @@ class_path = swift.stats.access_processor.AccessLogProcessor
|
||||
# log_dir = /var/log/swift/
|
||||
swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
|
||||
container_name = account_stats
|
||||
source_filename_format = stats-%Y%m%d%H
|
||||
source_filename_format = stats-%Y%m%d%H_*
|
||||
# new_log_cutoff = 7200
|
||||
# unlink_log = True
|
||||
class_path = swift.stats.stats_processor.StatsLogProcessor
|
||||
# account_server_conf = /etc/swift/account-server.conf
|
||||
# user = swift
|
@ -16,8 +16,7 @@
|
||||
import zlib
|
||||
import struct
|
||||
|
||||
|
||||
class CompressedFileReader(object):
|
||||
class CompressingFileReader(object):
|
||||
'''
|
||||
Wraps a file object and provides a read method that returns gzip'd data.
|
||||
|
@ -17,7 +17,7 @@ import webob
|
||||
from urllib import quote, unquote
|
||||
from json import loads as json_loads
|
||||
|
||||
from swift.common.compressed_file_reader import CompressedFileReader
|
||||
from swift.common.compressing_file_reader import CompressingFileReader
|
||||
from swift.proxy.server import BaseApplication
|
||||
|
||||
class MemcacheStub(object):
|
||||
@ -45,7 +45,8 @@ class InternalProxy(object):
|
||||
self.retries = retries
|
||||
|
||||
def upload_file(self, source_file, account, container, object_name,
|
||||
compress=True, content_type='application/x-gzip'):
|
||||
compress=True, content_type='application/x-gzip',
|
||||
etag=None):
|
||||
"""
|
||||
Upload a file to cloud files.
|
||||
|
||||
@ -69,9 +70,9 @@ class InternalProxy(object):
|
||||
headers={'Transfer-Encoding': 'chunked'})
|
||||
if compress:
|
||||
if hasattr(source_file, 'read'):
|
||||
compressed_file = CompressedFileReader(source_file)
|
||||
compressed_file = CompressingFileReader(source_file)
|
||||
else:
|
||||
compressed_file = CompressedFileReader(open(source_file, 'rb'))
|
||||
compressed_file = CompressingFileReader(open(source_file, 'rb'))
|
||||
req.body_file = compressed_file
|
||||
else:
|
||||
if not hasattr(source_file, 'read'):
|
||||
@ -80,6 +81,8 @@ class InternalProxy(object):
|
||||
req.account = account
|
||||
req.content_type = content_type
|
||||
req.content_length = None # to make sure we send chunked data
|
||||
if etag:
|
||||
req.etag = etag
|
||||
resp = self.upload_app.handle_request(self.upload_app.update_request(req))
|
||||
tries = 1
|
||||
while (resp.status_int < 200 or resp.status_int > 299) \
|
||||
|
@ -21,7 +21,9 @@ from swift.common.utils import split_path
|
||||
|
||||
month_map = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
|
||||
|
||||
|
||||
class AccessLogProcessor(object):
|
||||
"""Transform proxy server access logs"""
|
||||
|
||||
def __init__(self, conf):
|
||||
self.server_name = conf.get('server_name', 'proxy')
|
||||
@ -112,8 +114,8 @@ class AccessLogProcessor(object):
|
||||
d['account'] = account
|
||||
d['container_name'] = container_name
|
||||
d['object_name'] = object_name
|
||||
d['bytes_out'] = int(d['bytes_out'].replace('-','0'))
|
||||
d['bytes_in'] = int(d['bytes_in'].replace('-','0'))
|
||||
d['bytes_out'] = int(d['bytes_out'].replace('-', '0'))
|
||||
d['bytes_in'] = int(d['bytes_in'].replace('-', '0'))
|
||||
d['code'] = int(d['code'])
|
||||
return d
|
||||
|
||||
@ -155,8 +157,8 @@ class AccessLogProcessor(object):
|
||||
if line_data['client_ip'] in self.service_ips:
|
||||
source = 'service'
|
||||
|
||||
d[(source, 'bytes_out')] = d.setdefault((source, 'bytes_out'), 0) + \
|
||||
bytes_out
|
||||
d[(source, 'bytes_out')] = d.setdefault((
|
||||
source, 'bytes_out'), 0) + bytes_out
|
||||
d[(source, 'bytes_in')] = d.setdefault((source, 'bytes_in'), 0) + \
|
||||
bytes_in
|
||||
|
||||
@ -171,7 +173,7 @@ class AccessLogProcessor(object):
|
||||
path = line_data.get('path', 0)
|
||||
d['path_query'] = d.setdefault('path_query', 0) + path
|
||||
|
||||
code = '%dxx' % (code/100)
|
||||
code = '%dxx' % (code / 100)
|
||||
key = (source, op_level, method, code)
|
||||
d[key] = d.setdefault(key, 0) + 1
|
||||
|
||||
@ -220,5 +222,5 @@ class AccessLogProcessor(object):
|
||||
keylist_mapping[code].add(
|
||||
(source, level, verb, code))
|
||||
keylist_mapping['ops_count'].add(
|
||||
(source,level,verb,code))
|
||||
(source, level, verb, code))
|
||||
return keylist_mapping
|
||||
|
@ -16,18 +16,26 @@
|
||||
import os
|
||||
import time
|
||||
from paste.deploy import appconfig
|
||||
import shutil
|
||||
import hashlib
|
||||
|
||||
from swift.account.server import DATADIR as account_server_data_dir
|
||||
from swift.common.db import AccountBroker
|
||||
from swift.common.internal_proxy import InternalProxy
|
||||
from swift.common.utils import renamer, get_logger, readconf
|
||||
from swift.common.utils import renamer, get_logger, readconf, mkdirs
|
||||
from swift.common.constraints import check_mount
|
||||
from swift.common.daemon import Daemon
|
||||
|
||||
|
||||
class AccountStat(Daemon):
|
||||
"""
|
||||
Extract storage stats from account databases on the account
|
||||
storage nodes
|
||||
"""
|
||||
|
||||
def __init__(self, stats_conf):
|
||||
super(AccountStat, self).__init__(stats_conf)
|
||||
target_dir = stats_conf.get('log_dir', '/var/log/swift')
|
||||
#TODO: figure out the server configs. also figure out internal_proxy
|
||||
account_server_conf_loc = stats_conf.get('account_server_conf',
|
||||
'/etc/swift/account-server.conf')
|
||||
server_conf = appconfig('config:%s' % account_server_conf_loc,
|
||||
@ -35,6 +43,7 @@ class AccountStat(Daemon):
|
||||
filename_format = stats_conf['source_filename_format']
|
||||
self.filename_format = filename_format
|
||||
self.target_dir = target_dir
|
||||
mkdirs(self.target_dir)
|
||||
self.devices = server_conf.get('devices', '/srv/node')
|
||||
self.mount_check = server_conf.get('mount_check', 'true').lower() in \
|
||||
('true', 't', '1', 'on', 'yes', 'y')
|
||||
@ -45,18 +54,19 @@ class AccountStat(Daemon):
|
||||
start = time.time()
|
||||
self.find_and_process()
|
||||
self.logger.info("Gathering account stats complete (%0.2f minutes)" %
|
||||
((time.time()-start)/60))
|
||||
((time.time() - start) / 60))
|
||||
|
||||
def find_and_process(self):
|
||||
#TODO: handle a counter in the filename to prevent overwrites?
|
||||
src_filename = time.strftime(self.filename_format)
|
||||
#TODO: don't use /tmp?
|
||||
tmp_filename = os.path.join('/tmp', src_filename)
|
||||
working_dir = os.path.join(self.target_dir, '.stats_tmp')
|
||||
shutil.rmtree(working_dir, ignore_errors=True)
|
||||
tmp_filename = os.path.join(working_dir, src_filename)
|
||||
hasher = hashlib.md5()
|
||||
with open(tmp_filename, 'wb') as statfile:
|
||||
#statfile.write('Account Name, Container Count, Object Count, Bytes Used\n')
|
||||
# csv has the following columns:
|
||||
# Account Name, Container Count, Object Count, Bytes Used
|
||||
for device in os.listdir(self.devices):
|
||||
if self.mount_check and \
|
||||
not os.path.ismount(os.path.join(self.devices, device)):
|
||||
if self.mount_check and not check_mount(self.devices, device):
|
||||
self.logger.error("Device %s is not mounted, skipping." %
|
||||
device)
|
||||
continue
|
||||
@ -70,7 +80,8 @@ class AccountStat(Daemon):
|
||||
for root, dirs, files in os.walk(accounts, topdown=False):
|
||||
for filename in files:
|
||||
if filename.endswith('.db'):
|
||||
broker = AccountBroker(os.path.join(root, filename))
|
||||
db_path = os.path.join(root, filename)
|
||||
broker = AccountBroker(db_path)
|
||||
if not broker.is_deleted():
|
||||
(account_name,
|
||||
_, _, _,
|
||||
@ -78,9 +89,12 @@ class AccountStat(Daemon):
|
||||
object_count,
|
||||
bytes_used,
|
||||
_, _) = broker.get_info()
|
||||
line_data = '"%s",%d,%d,%d\n' % (account_name,
|
||||
container_count,
|
||||
object_count,
|
||||
bytes_used)
|
||||
line_data = '"%s",%d,%d,%d\n' % (
|
||||
account_name, container_count,
|
||||
object_count, bytes_used)
|
||||
statfile.write(line_data)
|
||||
hasher.update(line_data)
|
||||
file_hash = hasher.hexdigest()
|
||||
src_filename = '_'.join([src_filename, file_hash])
|
||||
renamer(tmp_filename, os.path.join(self.target_dir, src_filename))
|
||||
shutil.rmtree(working_dir, ignore_errors=True)
|
||||
|
@ -23,31 +23,28 @@ from paste.deploy import appconfig
|
||||
import multiprocessing
|
||||
import Queue
|
||||
import cPickle
|
||||
import hashlib
|
||||
|
||||
from swift.common.internal_proxy import InternalProxy
|
||||
from swift.common.exceptions import ChunkReadTimeout
|
||||
from swift.common.utils import get_logger, readconf
|
||||
from swift.common.daemon import Daemon
|
||||
|
||||
|
||||
class BadFileDownload(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class LogProcessor(object):
|
||||
"""Load plugins, process logs"""
|
||||
|
||||
def __init__(self, conf, logger):
|
||||
stats_conf = conf.get('log-processor', {})
|
||||
|
||||
proxy_server_conf_loc = stats_conf.get('proxy_server_conf',
|
||||
'/etc/swift/proxy-server.conf')
|
||||
self.proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
|
||||
name='proxy-server')
|
||||
if isinstance(logger, tuple):
|
||||
self.logger = get_logger(*logger)
|
||||
else:
|
||||
self.logger = logger
|
||||
self.internal_proxy = InternalProxy(self.proxy_server_conf,
|
||||
self.logger,
|
||||
retries=3)
|
||||
|
||||
# load the processing plugins
|
||||
self.plugins = {}
|
||||
@ -56,11 +53,26 @@ class LogProcessor(object):
|
||||
plugin_name = section[len(plugin_prefix):]
|
||||
plugin_conf = conf.get(section, {})
|
||||
self.plugins[plugin_name] = plugin_conf
|
||||
import_target, class_name = plugin_conf['class_path'].rsplit('.', 1)
|
||||
class_path = self.plugins[plugin_name]['class_path']
|
||||
import_target, class_name = class_path.rsplit('.', 1)
|
||||
module = __import__(import_target, fromlist=[import_target])
|
||||
klass = getattr(module, class_name)
|
||||
self.plugins[plugin_name]['instance'] = klass(plugin_conf)
|
||||
|
||||
@property
|
||||
def internal_proxy(self):
|
||||
'''Lazy load internal proxy'''
|
||||
if self._internal_proxy is None:
|
||||
proxy_server_conf_loc = stats_conf.get('proxy_server_conf',
|
||||
'/etc/swift/proxy-server.conf')
|
||||
self.proxy_server_conf = appconfig(
|
||||
'config:%s' % proxy_server_conf_loc,
|
||||
name='proxy-server')
|
||||
self._internal_proxy = InternalProxy(self.proxy_server_conf,
|
||||
self.logger,
|
||||
retries=3)
|
||||
return self._internal_proxy
|
||||
|
||||
def process_one_file(self, plugin_name, account, container, object_name):
|
||||
# get an iter of the object data
|
||||
compressed = object_name.endswith('.gz')
|
||||
@ -72,25 +84,27 @@ class LogProcessor(object):
|
||||
container,
|
||||
object_name)
|
||||
|
||||
def get_data_list(self, start_date=None, end_date=None, listing_filter=None):
|
||||
def get_data_list(self, start_date=None, end_date=None,
|
||||
listing_filter=None):
|
||||
total_list = []
|
||||
for name, data in self.plugins.items():
|
||||
for plugin_name, data in self.plugins.items():
|
||||
account = data['swift_account']
|
||||
container = data['container_name']
|
||||
l = self.get_container_listing(account,
|
||||
container,
|
||||
start_date,
|
||||
end_date)
|
||||
for i in l:
|
||||
listing = self.get_container_listing(account,
|
||||
container,
|
||||
start_date,
|
||||
end_date)
|
||||
for object_name in listing:
|
||||
# The items in this list end up being passed as positional
|
||||
# parameters to process_one_file.
|
||||
x = (name, account, container, i)
|
||||
x = (plugin_name, account, container, object_name)
|
||||
if x not in listing_filter:
|
||||
total_list.append(x)
|
||||
return total_list
|
||||
|
||||
def get_container_listing(self, swift_account, container_name, start_date=None,
|
||||
end_date=None, listing_filter=None):
|
||||
def get_container_listing(self, swift_account, container_name,
|
||||
start_date=None, end_date=None,
|
||||
listing_filter=None):
|
||||
'''
|
||||
Get a container listing, filtered by start_date, end_date, and
|
||||
listing_filter. Dates, if given, should be in YYYYMMDDHH format
|
||||
@ -162,15 +176,16 @@ class LogProcessor(object):
|
||||
last_part = ''
|
||||
last_compressed_part = ''
|
||||
# magic in the following zlib.decompressobj argument is courtesy of
|
||||
# http://stackoverflow.com/questions/2423866/python-decompressing-gzip-chunk-by-chunk
|
||||
d = zlib.decompressobj(16+zlib.MAX_WBITS)
|
||||
# Python decompressing gzip chunk-by-chunk
|
||||
# http://stackoverflow.com/questions/2423866
|
||||
d = zlib.decompressobj(16 + zlib.MAX_WBITS)
|
||||
try:
|
||||
for chunk in o:
|
||||
if compressed:
|
||||
try:
|
||||
chunk = d.decompress(chunk)
|
||||
except zlib.error:
|
||||
raise BadFileDownload() # bad compressed data
|
||||
raise BadFileDownload() # bad compressed data
|
||||
parts = chunk.split('\n')
|
||||
parts[0] = last_part + parts[0]
|
||||
for part in parts[:-1]:
|
||||
@ -208,6 +223,11 @@ class LogProcessor(object):
|
||||
|
||||
|
||||
class LogProcessorDaemon(Daemon):
|
||||
"""
|
||||
Gather raw log data and farm proccessing to generate a csv that is
|
||||
uploaded to swift.
|
||||
"""
|
||||
|
||||
def __init__(self, conf):
|
||||
c = conf.get('log-processor')
|
||||
super(LogProcessorDaemon, self).__init__(c)
|
||||
@ -220,6 +240,7 @@ class LogProcessorDaemon(Daemon):
|
||||
self.log_processor_account = c['swift_account']
|
||||
self.log_processor_container = c.get('container_name',
|
||||
'log_processing_data')
|
||||
self.worker_count = int(c.get('worker_count', '1'))
|
||||
|
||||
def run_once(self):
|
||||
self.logger.info("Beginning log processing")
|
||||
@ -228,19 +249,26 @@ class LogProcessorDaemon(Daemon):
|
||||
lookback_start = None
|
||||
lookback_end = None
|
||||
else:
|
||||
lookback_start = datetime.datetime.now() - \
|
||||
datetime.timedelta(hours=self.lookback_hours)
|
||||
delta_hours = datetime.timedelta(hours=self.lookback_hours)
|
||||
lookback_start = datetime.datetime.now() - delta_hours
|
||||
lookback_start = lookback_start.strftime('%Y%m%d%H')
|
||||
if self.lookback_window == 0:
|
||||
lookback_end = None
|
||||
else:
|
||||
delta_window = datetime.timedelta(hours=self.lookback_window)
|
||||
lookback_end = datetime.datetime.now() - \
|
||||
datetime.timedelta(hours=self.lookback_hours) + \
|
||||
datetime.timedelta(hours=self.lookback_window)
|
||||
delta_hours + \
|
||||
delta_window
|
||||
lookback_end = lookback_end.strftime('%Y%m%d%H')
|
||||
self.logger.debug('lookback_start: %s' % lookback_start)
|
||||
self.logger.debug('lookback_end: %s' % lookback_end)
|
||||
try:
|
||||
# Note: this file (or data set) will grow without bound.
|
||||
# In practice, if it becomes a problem (say, after many months of
|
||||
# running), one could manually prune the file to remove older
|
||||
# entries. Automatically pruning on each run could be dangerous.
|
||||
# There is not a good way to determine when an old entry should be
|
||||
# pruned (lookback_hours could be set to anything and could change)
|
||||
processed_files_stream = self.log_processor.get_object_data(
|
||||
self.log_processor_account,
|
||||
self.log_processor_container,
|
||||
@ -261,12 +289,13 @@ class LogProcessorDaemon(Daemon):
|
||||
self.logger.info('loaded %d files to process' % len(logs_to_process))
|
||||
if not logs_to_process:
|
||||
self.logger.info("Log processing done (%0.2f minutes)" %
|
||||
((time.time()-start)/60))
|
||||
((time.time() - start) / 60))
|
||||
return
|
||||
|
||||
# map
|
||||
processor_args = (self.total_conf, self.logger)
|
||||
results = multiprocess_collate(processor_args, logs_to_process)
|
||||
results = multiprocess_collate(processor_args, logs_to_process,
|
||||
self.worker_count)
|
||||
|
||||
#reduce
|
||||
aggr_data = {}
|
||||
@ -307,16 +336,23 @@ class LogProcessorDaemon(Daemon):
|
||||
|
||||
# output
|
||||
sorted_keylist_mapping = sorted(keylist_mapping)
|
||||
columns = 'bill_ts,data_ts,account,' + ','.join(sorted_keylist_mapping)
|
||||
print columns
|
||||
columns = 'data_ts,account,' + ','.join(sorted_keylist_mapping)
|
||||
out_buf = [columns]
|
||||
for (account, year, month, day, hour), d in final_info.items():
|
||||
bill_ts = ''
|
||||
data_ts = '%s/%s/%s %s:00:00' % (year, month, day, hour)
|
||||
row = [bill_ts, data_ts]
|
||||
row = [data_ts]
|
||||
row.append('%s' % account)
|
||||
for k in sorted_keylist_mapping:
|
||||
row.append('%s'%d[k])
|
||||
print ','.join(row)
|
||||
row.append('%s' % d[k])
|
||||
out_buf.append(','.join(row))
|
||||
out_buf = '\n'.join(out_buf)
|
||||
h = hashlib.md5(out_buf).hexdigest()
|
||||
upload_name = time.strftime('%Y/%m/%d/%H/') + '%s.csv.gz' % h
|
||||
f = cStringIO.StringIO(out_buf)
|
||||
self.log_processor.internal_proxy.upload_file(f,
|
||||
self.log_processor_account,
|
||||
self.log_processor_container,
|
||||
upload_name)
|
||||
|
||||
# cleanup
|
||||
s = cPickle.dumps(processed_files, cPickle.HIGHEST_PROTOCOL)
|
||||
@ -327,11 +363,11 @@ class LogProcessorDaemon(Daemon):
|
||||
'processed_files.pickle.gz')
|
||||
|
||||
self.logger.info("Log processing done (%0.2f minutes)" %
|
||||
((time.time()-start)/60))
|
||||
((time.time() - start) / 60))
|
||||
|
||||
def multiprocess_collate(processor_args, logs_to_process):
|
||||
|
||||
def multiprocess_collate(processor_args, logs_to_process, worker_count):
|
||||
'''yield hourly data from logs_to_process'''
|
||||
worker_count = multiprocessing.cpu_count()
|
||||
results = []
|
||||
in_queue = multiprocessing.Queue()
|
||||
out_queue = multiprocessing.Queue()
|
||||
@ -361,6 +397,7 @@ def multiprocess_collate(processor_args, logs_to_process):
|
||||
for r in results:
|
||||
r.join()
|
||||
|
||||
|
||||
def collate_worker(processor_args, in_queue, out_queue):
|
||||
'''worker process for multiprocess_collate'''
|
||||
p = LogProcessor(*processor_args)
|
||||
|
@ -25,15 +25,16 @@ from swift.common.internal_proxy import InternalProxy
|
||||
from swift.common.daemon import Daemon
|
||||
from swift.common import utils
|
||||
|
||||
|
||||
class LogUploader(Daemon):
|
||||
'''
|
||||
Given a local directory, a swift account, and a container name, LogParser
|
||||
will upload all files in the local directory to the given account/container.
|
||||
All but the newest files will be uploaded, and the files' md5 sum will be
|
||||
computed. The hash is used to prevent duplicate data from being uploaded
|
||||
multiple times in different files (ex: log lines). Since the hash is
|
||||
computed, it is also used as the uploaded object's etag to ensure data
|
||||
integrity.
|
||||
will upload all files in the local directory to the given account/
|
||||
container. All but the newest files will be uploaded, and the files' md5
|
||||
sum will be computed. The hash is used to prevent duplicate data from
|
||||
being uploaded multiple times in different files (ex: log lines). Since
|
||||
the hash is computed, it is also used as the uploaded object's etag to
|
||||
ensure data integrity.
|
||||
|
||||
Note that after the file is successfully uploaded, it will be unlinked.
|
||||
|
||||
@ -51,6 +52,11 @@ class LogUploader(Daemon):
|
||||
'/etc/swift/proxy-server.conf')
|
||||
proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
|
||||
name='proxy-server')
|
||||
new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
|
||||
unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
|
||||
('true', 'on', '1', 'yes')
|
||||
self.unlink_log = unlink_log
|
||||
self.new_log_cutoff = new_log_cutoff
|
||||
if not log_dir.endswith('/'):
|
||||
log_dir = log_dir + '/'
|
||||
self.log_dir = log_dir
|
||||
@ -66,7 +72,7 @@ class LogUploader(Daemon):
|
||||
start = time.time()
|
||||
self.upload_all_logs()
|
||||
self.logger.info("Uploading logs complete (%0.2f minutes)" %
|
||||
((time.time()-start)/60))
|
||||
((time.time() - start) / 60))
|
||||
|
||||
def upload_all_logs(self):
|
||||
i = [(self.filename_format.index(c), c) for c in '%Y %m %d %H'.split()]
|
||||
@ -76,17 +82,17 @@ class LogUploader(Daemon):
|
||||
for start, c in i:
|
||||
offset = base_offset + start
|
||||
if c == '%Y':
|
||||
year_offset = offset, offset+4
|
||||
year_offset = offset, offset + 4
|
||||
# Add in the difference between len(%Y) and the expanded
|
||||
# version of %Y (????). This makes sure the codes after this
|
||||
# one will align properly in the final filename.
|
||||
base_offset += 2
|
||||
elif c == '%m':
|
||||
month_offset = offset, offset+2
|
||||
month_offset = offset, offset + 2
|
||||
elif c == '%d':
|
||||
day_offset = offset, offset+2
|
||||
day_offset = offset, offset + 2
|
||||
elif c == '%H':
|
||||
hour_offset = offset, offset+2
|
||||
hour_offset = offset, offset + 2
|
||||
if not (year_offset and month_offset and day_offset and hour_offset):
|
||||
# don't have all the parts, can't upload anything
|
||||
return
|
||||
@ -122,9 +128,12 @@ class LogUploader(Daemon):
|
||||
# unexpected filename format, move on
|
||||
self.logger.error("Unexpected log: %s" % filename)
|
||||
continue
|
||||
if (time.time() - os.stat(filename).st_mtime) < 7200:
|
||||
if ((time.time() - os.stat(filename).st_mtime) <
|
||||
self.new_log_cutoff):
|
||||
# don't process very new logs
|
||||
self.logger.debug("Skipping log: %s (< 2 hours old)" % filename)
|
||||
self.logger.debug(
|
||||
"Skipping log: %s (< %d seconds old)" % (filename,
|
||||
self.new_log_cutoff))
|
||||
continue
|
||||
self.upload_one_log(filename, year, month, day, hour)
|
||||
|
||||
@ -147,7 +156,7 @@ class LogUploader(Daemon):
|
||||
# By adding a hash to the filename, we ensure that uploaded files
|
||||
# have unique filenames and protect against uploading one file
|
||||
# more than one time. By using md5, we get an etag for free.
|
||||
target_filename = '/'.join([year, month, day, hour, filehash+'.gz'])
|
||||
target_filename = '/'.join([year, month, day, hour, filehash + '.gz'])
|
||||
if self.internal_proxy.upload_file(filename,
|
||||
self.swift_account,
|
||||
self.container_name,
|
||||
@ -155,6 +164,7 @@ class LogUploader(Daemon):
|
||||
compress=(not already_compressed)):
|
||||
self.logger.debug("Uploaded log %s to %s" %
|
||||
(filename, target_filename))
|
||||
os.unlink(filename)
|
||||
if self.unlink_log:
|
||||
os.unlink(filename)
|
||||
else:
|
||||
self.logger.error("ERROR: Upload of log %s failed!" % filename)
|
||||
|
@ -13,7 +13,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
class StatsLogProcessor(object):
|
||||
"""Transform account storage stat logs"""
|
||||
|
||||
def __init__(self, conf):
|
||||
pass
|
||||
|
@ -112,7 +112,7 @@ class TestLogProcessor(unittest.TestCase):
|
||||
|
||||
def test_get_container_listing(self):
|
||||
p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
|
||||
p.internal_proxy = DumbInternalProxy()
|
||||
p._internal_proxy = DumbInternalProxy()
|
||||
result = p.get_container_listing('a', 'foo')
|
||||
expected = ['2010/03/14/13/obj1']
|
||||
self.assertEquals(result, expected)
|
||||
@ -133,7 +133,7 @@ class TestLogProcessor(unittest.TestCase):
|
||||
|
||||
def test_get_object_data(self):
|
||||
p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
|
||||
p.internal_proxy = DumbInternalProxy()
|
||||
p._internal_proxy = DumbInternalProxy()
|
||||
result = list(p.get_object_data('a', 'c', 'o', False))
|
||||
expected = ['obj','data']
|
||||
self.assertEquals(result, expected)
|
||||
@ -148,7 +148,7 @@ class TestLogProcessor(unittest.TestCase):
|
||||
'swift.stats.stats_processor.StatsLogProcessor'
|
||||
}})
|
||||
p = log_processor.LogProcessor(stats_proxy_config, DumbLogger())
|
||||
p.internal_proxy = DumbInternalProxy()
|
||||
p._internal_proxy = DumbInternalProxy()
|
||||
def get_object_data(*a,**kw):
|
||||
return [self.stats_test_line]
|
||||
p.get_object_data = get_object_data
|
||||
|
Loading…
x
Reference in New Issue
Block a user