pep8 issues, refactored log_processor a tiny bit for testing (lazy load internal proxy), added a few comments

This commit is contained in:
Clay Gerrard 2010-09-20 17:52:58 -05:00
parent 55c997aa94
commit 0bb5857da3
6 changed files with 89 additions and 59 deletions

View File

@ -21,7 +21,9 @@ from swift.common.utils import split_path
month_map = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split() month_map = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
class AccessLogProcessor(object): class AccessLogProcessor(object):
"""Transform proxy server access logs"""
def __init__(self, conf): def __init__(self, conf):
self.server_name = conf.get('server_name', 'proxy') self.server_name = conf.get('server_name', 'proxy')
@ -155,8 +157,8 @@ class AccessLogProcessor(object):
if line_data['client_ip'] in self.service_ips: if line_data['client_ip'] in self.service_ips:
source = 'service' source = 'service'
d[(source, 'bytes_out')] = d.setdefault((source, 'bytes_out'), 0) + \ d[(source, 'bytes_out')] = d.setdefault((
bytes_out source, 'bytes_out'), 0) + bytes_out
d[(source, 'bytes_in')] = d.setdefault((source, 'bytes_in'), 0) + \ d[(source, 'bytes_in')] = d.setdefault((source, 'bytes_in'), 0) + \
bytes_in bytes_in

View File

@ -21,9 +21,15 @@ from swift.account.server import DATADIR as account_server_data_dir
from swift.common.db import AccountBroker from swift.common.db import AccountBroker
from swift.common.internal_proxy import InternalProxy from swift.common.internal_proxy import InternalProxy
from swift.common.utils import renamer, get_logger, readconf from swift.common.utils import renamer, get_logger, readconf
from swift.common.constraints import check_mount
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
class AccountStat(Daemon): class AccountStat(Daemon):
"""Extract storage stats from account databases on the account
storage nodes
"""
def __init__(self, stats_conf): def __init__(self, stats_conf):
super(AccountStat, self).__init__(stats_conf) super(AccountStat, self).__init__(stats_conf)
target_dir = stats_conf.get('log_dir', '/var/log/swift') target_dir = stats_conf.get('log_dir', '/var/log/swift')
@ -53,10 +59,10 @@ class AccountStat(Daemon):
#TODO: don't use /tmp? #TODO: don't use /tmp?
tmp_filename = os.path.join('/tmp', src_filename) tmp_filename = os.path.join('/tmp', src_filename)
with open(tmp_filename, 'wb') as statfile: with open(tmp_filename, 'wb') as statfile:
#statfile.write('Account Name, Container Count, Object Count, Bytes Used\n') #statfile.write(
# 'Account Name, Container Count, Object Count, Bytes Used\n')
for device in os.listdir(self.devices): for device in os.listdir(self.devices):
if self.mount_check and \ if self.mount_check and not check_mount(self.devices, device):
not os.path.ismount(os.path.join(self.devices, device)):
self.logger.error("Device %s is not mounted, skipping." % self.logger.error("Device %s is not mounted, skipping." %
device) device)
continue continue
@ -70,7 +76,8 @@ class AccountStat(Daemon):
for root, dirs, files in os.walk(accounts, topdown=False): for root, dirs, files in os.walk(accounts, topdown=False):
for filename in files: for filename in files:
if filename.endswith('.db'): if filename.endswith('.db'):
broker = AccountBroker(os.path.join(root, filename)) db_path = os.path.join(root, filename)
broker = AccountBroker(db_path)
if not broker.is_deleted(): if not broker.is_deleted():
(account_name, (account_name,
_, _, _, _, _, _,
@ -78,9 +85,8 @@ class AccountStat(Daemon):
object_count, object_count,
bytes_used, bytes_used,
_, _) = broker.get_info() _, _) = broker.get_info()
line_data = '"%s",%d,%d,%d\n' % (account_name, line_data = '"%s",%d,%d,%d\n' % (
container_count, account_name, container_count,
object_count, object_count, bytes_used)
bytes_used)
statfile.write(line_data) statfile.write(line_data)
renamer(tmp_filename, os.path.join(self.target_dir, src_filename)) renamer(tmp_filename, os.path.join(self.target_dir, src_filename))

View File

@ -29,25 +29,21 @@ from swift.common.exceptions import ChunkReadTimeout
from swift.common.utils import get_logger, readconf from swift.common.utils import get_logger, readconf
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
class BadFileDownload(Exception): class BadFileDownload(Exception):
pass pass
class LogProcessor(object): class LogProcessor(object):
"""Load plugins, process logs"""
def __init__(self, conf, logger): def __init__(self, conf, logger):
stats_conf = conf.get('log-processor', {}) stats_conf = conf.get('log-processor', {})
proxy_server_conf_loc = stats_conf.get('proxy_server_conf',
'/etc/swift/proxy-server.conf')
self.proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
name='proxy-server')
if isinstance(logger, tuple): if isinstance(logger, tuple):
self.logger = get_logger(*logger) self.logger = get_logger(*logger)
else: else:
self.logger = logger self.logger = logger
self.internal_proxy = InternalProxy(self.proxy_server_conf,
self.logger,
retries=3)
# load the processing plugins # load the processing plugins
self.plugins = {} self.plugins = {}
@ -56,11 +52,25 @@ class LogProcessor(object):
plugin_name = section[len(plugin_prefix):] plugin_name = section[len(plugin_prefix):]
plugin_conf = conf.get(section, {}) plugin_conf = conf.get(section, {})
self.plugins[plugin_name] = plugin_conf self.plugins[plugin_name] = plugin_conf
import_target, class_name = plugin_conf['class_path'].rsplit('.', 1) class_path = self.plugins[plugin_name]['class_path']
import_target, class_name = class_path.rsplit('.', 1)
module = __import__(import_target, fromlist=[import_target]) module = __import__(import_target, fromlist=[import_target])
klass = getattr(module, class_name) klass = getattr(module, class_name)
self.plugins[plugin_name]['instance'] = klass(plugin_conf) self.plugins[plugin_name]['instance'] = klass(plugin_conf)
@property
def internal_proxy(self):
'''Lazy load internal proxy'''
if self._internal_proxy is None:
proxy_server_conf_loc = stats_conf.get('proxy_server_conf',
'/etc/swift/proxy-server.conf')
self.proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
name='proxy-server')
self._internal_proxy = InternalProxy(self.proxy_server_conf,
self.logger,
retries=3)
return self._internal_proxy
def process_one_file(self, plugin_name, account, container, object_name): def process_one_file(self, plugin_name, account, container, object_name):
# get an iter of the object data # get an iter of the object data
compressed = object_name.endswith('.gz') compressed = object_name.endswith('.gz')
@ -72,7 +82,8 @@ class LogProcessor(object):
container, container,
object_name) object_name)
def get_data_list(self, start_date=None, end_date=None, listing_filter=None): def get_data_list(self, start_date=None, end_date=None,
listing_filter=None):
total_list = [] total_list = []
for name, data in self.plugins.items(): for name, data in self.plugins.items():
account = data['swift_account'] account = data['swift_account']
@ -89,8 +100,9 @@ class LogProcessor(object):
total_list.append(x) total_list.append(x)
return total_list return total_list
def get_container_listing(self, swift_account, container_name, start_date=None, def get_container_listing(self, swift_account, container_name,
end_date=None, listing_filter=None): start_date=None, end_date=None,
listing_filter=None):
''' '''
Get a container listing, filtered by start_date, end_date, and Get a container listing, filtered by start_date, end_date, and
listing_filter. Dates, if given, should be in YYYYMMDDHH format listing_filter. Dates, if given, should be in YYYYMMDDHH format
@ -162,7 +174,8 @@ class LogProcessor(object):
last_part = '' last_part = ''
last_compressed_part = '' last_compressed_part = ''
# magic in the following zlib.decompressobj argument is courtesy of # magic in the following zlib.decompressobj argument is courtesy of
# http://stackoverflow.com/questions/2423866/python-decompressing-gzip-chunk-by-chunk # Python decompressing gzip chunk-by-chunk
# http://stackoverflow.com/questions/2423866
d = zlib.decompressobj(16 + zlib.MAX_WBITS) d = zlib.decompressobj(16 + zlib.MAX_WBITS)
try: try:
for chunk in o: for chunk in o:
@ -208,6 +221,8 @@ class LogProcessor(object):
class LogProcessorDaemon(Daemon): class LogProcessorDaemon(Daemon):
"""Gather raw log data and farm proccessing, results output via print"""
def __init__(self, conf): def __init__(self, conf):
c = conf.get('log-processor') c = conf.get('log-processor')
super(LogProcessorDaemon, self).__init__(c) super(LogProcessorDaemon, self).__init__(c)
@ -228,15 +243,16 @@ class LogProcessorDaemon(Daemon):
lookback_start = None lookback_start = None
lookback_end = None lookback_end = None
else: else:
lookback_start = datetime.datetime.now() - \ delta_hours = datetime.timedelta(hours=self.lookback_hours)
datetime.timedelta(hours=self.lookback_hours) lookback_start = datetime.datetime.now() - delta_hours
lookback_start = lookback_start.strftime('%Y%m%d%H') lookback_start = lookback_start.strftime('%Y%m%d%H')
if self.lookback_window == 0: if self.lookback_window == 0:
lookback_end = None lookback_end = None
else: else:
delta_window = datetime.timedelta(hours=self.lookback_window)
lookback_end = datetime.datetime.now() - \ lookback_end = datetime.datetime.now() - \
datetime.timedelta(hours=self.lookback_hours) + \ delta_hours + \
datetime.timedelta(hours=self.lookback_window) delta_window
lookback_end = lookback_end.strftime('%Y%m%d%H') lookback_end = lookback_end.strftime('%Y%m%d%H')
self.logger.debug('lookback_start: %s' % lookback_start) self.logger.debug('lookback_start: %s' % lookback_start)
self.logger.debug('lookback_end: %s' % lookback_end) self.logger.debug('lookback_end: %s' % lookback_end)
@ -329,6 +345,7 @@ class LogProcessorDaemon(Daemon):
self.logger.info("Log processing done (%0.2f minutes)" % self.logger.info("Log processing done (%0.2f minutes)" %
((time.time() - start) / 60)) ((time.time() - start) / 60))
def multiprocess_collate(processor_args, logs_to_process): def multiprocess_collate(processor_args, logs_to_process):
'''yield hourly data from logs_to_process''' '''yield hourly data from logs_to_process'''
worker_count = multiprocessing.cpu_count() worker_count = multiprocessing.cpu_count()
@ -361,6 +378,7 @@ def multiprocess_collate(processor_args, logs_to_process):
for r in results: for r in results:
r.join() r.join()
def collate_worker(processor_args, in_queue, out_queue): def collate_worker(processor_args, in_queue, out_queue):
'''worker process for multiprocess_collate''' '''worker process for multiprocess_collate'''
p = LogProcessor(*processor_args) p = LogProcessor(*processor_args)

View File

@ -25,15 +25,16 @@ from swift.common.internal_proxy import InternalProxy
from swift.common.daemon import Daemon from swift.common.daemon import Daemon
from swift.common import utils from swift.common import utils
class LogUploader(Daemon): class LogUploader(Daemon):
''' '''
Given a local directory, a swift account, and a container name, LogParser Given a local directory, a swift account, and a container name, LogParser
will upload all files in the local directory to the given account/container. will upload all files in the local directory to the given account/
All but the newest files will be uploaded, and the files' md5 sum will be container. All but the newest files will be uploaded, and the files' md5
computed. The hash is used to prevent duplicate data from being uploaded sum will be computed. The hash is used to prevent duplicate data from
multiple times in different files (ex: log lines). Since the hash is being uploaded multiple times in different files (ex: log lines). Since
computed, it is also used as the uploaded object's etag to ensure data the hash is computed, it is also used as the uploaded object's etag to
integrity. ensure data integrity.
Note that after the file is successfully uploaded, it will be unlinked. Note that after the file is successfully uploaded, it will be unlinked.
@ -124,7 +125,8 @@ class LogUploader(Daemon):
continue continue
if (time.time() - os.stat(filename).st_mtime) < 7200: if (time.time() - os.stat(filename).st_mtime) < 7200:
# don't process very new logs # don't process very new logs
self.logger.debug("Skipping log: %s (< 2 hours old)" % filename) self.logger.debug(
"Skipping log: %s (< 2 hours old)" % filename)
continue continue
self.upload_one_log(filename, year, month, day, hour) self.upload_one_log(filename, year, month, day, hour)

View File

@ -13,7 +13,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
class StatsLogProcessor(object): class StatsLogProcessor(object):
"""Transform account storage stat logs"""
def __init__(self, conf): def __init__(self, conf):
pass pass

View File

@ -112,7 +112,7 @@ class TestLogProcessor(unittest.TestCase):
def test_get_container_listing(self): def test_get_container_listing(self):
p = log_processor.LogProcessor(self.proxy_config, DumbLogger()) p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
p.internal_proxy = DumbInternalProxy() p._internal_proxy = DumbInternalProxy()
result = p.get_container_listing('a', 'foo') result = p.get_container_listing('a', 'foo')
expected = ['2010/03/14/13/obj1'] expected = ['2010/03/14/13/obj1']
self.assertEquals(result, expected) self.assertEquals(result, expected)
@ -133,7 +133,7 @@ class TestLogProcessor(unittest.TestCase):
def test_get_object_data(self): def test_get_object_data(self):
p = log_processor.LogProcessor(self.proxy_config, DumbLogger()) p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
p.internal_proxy = DumbInternalProxy() p._internal_proxy = DumbInternalProxy()
result = list(p.get_object_data('a', 'c', 'o', False)) result = list(p.get_object_data('a', 'c', 'o', False))
expected = ['obj','data'] expected = ['obj','data']
self.assertEquals(result, expected) self.assertEquals(result, expected)
@ -148,7 +148,7 @@ class TestLogProcessor(unittest.TestCase):
'swift.stats.stats_processor.StatsLogProcessor' 'swift.stats.stats_processor.StatsLogProcessor'
}}) }})
p = log_processor.LogProcessor(stats_proxy_config, DumbLogger()) p = log_processor.LogProcessor(stats_proxy_config, DumbLogger())
p.internal_proxy = DumbInternalProxy() p._internal_proxy = DumbInternalProxy()
def get_object_data(*a,**kw): def get_object_data(*a,**kw):
return [self.stats_test_line] return [self.stats_test_line]
p.get_object_data = get_object_data p.get_object_data = get_object_data