replace file and unit tests are working

This commit is contained in:
David Goetz 2011-05-09 14:18:20 -07:00
parent 1ff91eabcd
commit e35f8bf5a7
7 changed files with 97 additions and 274 deletions

View File

@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from swift.stats.account_stats import AccountStat
from swift.stats.db_stats import AccountStat
from swift.common.utils import parse_options
from swift.common.daemon import run_daemon

View File

@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from swift.stats.account_stats import ContainerStat
from swift.stats.db_stats import ContainerStat
from swift.common.utils import parse_options
from swift.common.daemon import run_daemon

View File

@ -14,7 +14,12 @@ swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
# log_dir = /var/log/swift/
swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
container_name = log_data
source_filename_pattern = access-%Y%m%d%H
source_filename_pattern = ^
(?P<year>[0-9]{4})
(?P<month>[0-1][0-9])
(?P<day>[0-3][0-9])
(?P<hour>[0-2][0-9])
.*$
# new_log_cutoff = 7200
# unlink_log = True
class_path = swift.stats.access_processor.AccessLogProcessor
@ -31,7 +36,6 @@ class_path = swift.stats.access_processor.AccessLogProcessor
# log_dir = /var/log/swift/
swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
container_name = account_stats
source_filename_pattern = stats-%Y%m%d%H_.*
# new_log_cutoff = 7200
# unlink_log = True
class_path = swift.stats.stats_processor.StatsLogProcessor
@ -42,7 +46,6 @@ class_path = swift.stats.stats_processor.StatsLogProcessor
# log_dir = /var/log/swift/
swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
container_name = container_stats
source_filename_pattern = container-stats-%Y%m%d%H_.*
# new_log_cutoff = 7200
# unlink_log = True
class_path = swift.stats.stats_processor.StatsLogProcessor

View File

@ -27,25 +27,29 @@ from swift.common.utils import renamer, get_logger, readconf, mkdirs, \
from swift.common.constraints import check_mount
from swift.common.daemon import Daemon
class DatabaseStatCollector(Daemon):
class DatabaseStatsCollector(Daemon):
"""
Extract storage stats from account databases on the account
storage nodes
Any subclasses must define self.stats_type, self.logger, self.data_dir
and the function get_data.
Any subclasses must define the function get_data.
"""
def __init__(self, stats_conf, server_conf):
super(DatabaseStatCollector, self).__init__(stats_conf)
self.filename_format = stats_conf['source_filename_format']
if self.filename_format.count('*') > 1:
raise Exception('source filename format should have at max one *')
def __init__(self, stats_conf, stats_type, data_dir, filename_format):
super(DatabaseStatsCollector, self).__init__(stats_conf)
self.target_dir = stats_conf.get('log_dir', '/var/log/swift')
self.stats_type = stats_type
server_conf_loc = stats_conf.get('%s_server_conf' % stats_type,
'/etc/swift/%s-server.conf' % stats_type)
server_conf = appconfig('config:%s' % server_conf_loc,
name='%s-server' % stats_type)
self.filename_format = filename_format
mkdirs(self.target_dir)
self.devices = server_conf.get('devices', '/srv/node')
self.mount_check = server_conf.get('mount_check',
'true').lower() in TRUE_VALUES
self.logger = get_logger(stats_conf,
log_route='%s-stats' % stats_type)
def run_once(self, *args, **kwargs):
self.logger.info(_("Gathering %s stats" % self.stats_type))
@ -54,9 +58,13 @@ class DatabaseStatCollector(Daemon):
self.logger.info(_("Gathering %s stats complete (%0.2f minutes)") %
(self.stats_type, (time.time() - start) / 60))
def get_data(self):
raise Exception('Not Implemented')
def find_and_process(self):
src_filename = time.strftime(self.filename_format)
working_dir = os.path.join(self.target_dir, '.stats_tmp')
working_dir = os.path.join(self.target_dir,
'.%-stats_tmp' % self.stats_type)
shutil.rmtree(working_dir, ignore_errors=True)
mkdirs(working_dir)
tmp_filename = os.path.join(working_dir, src_filename)
@ -83,35 +91,20 @@ class DatabaseStatCollector(Daemon):
statfile.write(line_data)
hasher.update(line_data)
file_hash = hasher.hexdigest()
hash_index = src_filename.find('*')
if hash_index < 0:
# if there is no * in the target filename, the uploader probably
# won't work because we are crafting a filename that doesn't
# fit the pattern
src_filename = '_'.join([src_filename, file_hash])
else:
parts = src_filename[:hash_index], src_filename[hash_index + 1:]
src_filename = ''.join([parts[0], file_hash, parts[1]])
src_filename += hasher.hexdigest()
renamer(tmp_filename, os.path.join(self.target_dir, src_filename))
shutil.rmtree(working_dir, ignore_errors=True)
class AccountStat(DatabaseStatCollector):
class AccountStat(DatabaseStatsCollector):
"""
Extract storage stats from account databases on the account
storage nodes
"""
def __init__(self, stats_conf):
server_conf_loc = stats_conf.get('account_server_conf',
'/etc/swift/account-server.conf')
server_conf = appconfig('config:%s' % server_conf_loc,
name='account-server')
self.logger = get_logger(stats_conf, log_route='account-stats')
self.data_dir = account_server_data_dir
self.stats_type = 'account'
super(AccountStat, self).__init__(stats_conf, server_conf)
super(AccountStat, self).__init__(stats_conf, 'account',
account_server_data_dir,
'stats-%Y%m%d%H_')
def get_data(self, db_path):
"""
@ -128,21 +121,15 @@ class AccountStat(DatabaseStatCollector):
info['bytes_used'])
return line_data
class ContainerStat(DatabaseStatCollector):
class ContainerStat(DatabaseStatsCollector):
"""
Extract storage stats from container databases on the container
storage nodes
"""
def __init__(self, stats_conf):
server_conf_loc = stats_conf.get('container_server_conf',
'/etc/swift/container-server.conf')
server_conf = appconfig('config:%s' % server_conf_loc,
name='container-server')
self.logger = get_logger(stats_conf, log_route='container-stats')
self.data_dir = container_server_data_dir
self.stats_type = 'container'
super(ContainerStat, self).__init__(stats_conf, server_conf)
super(ContainerStat, self).__init__(stats_conf, 'container',
container_server_data_dir,
'container-stats-%Y%m%d%H_')
def get_data(self, db_path):
"""

View File

@ -19,6 +19,7 @@ import hashlib
import time
import gzip
import re
import sys
from paste.deploy import appconfig
from swift.common.internal_proxy import InternalProxy
@ -40,6 +41,17 @@ class LogUploader(Daemon):
The given proxy server config is used to instantiate a proxy server for
the object uploads.
The default log file format is: plugin_name-%Y%m%d%H* . Any other format
of log file names must supply a regular expression that defines groups
for year, month, day, and hour. The regular expression will be evaluated
with re.VERBOSE. A common example may be:
source_filename_pattern = ^cdn_logger-
(?P<year>[0-9]{4})
(?P<month>[0-1][0-9])
(?P<day>[0-3][0-9])
(?P<hour>[0-2][0-9])
.*$
'''
def __init__(self, uploader_conf, plugin_name):
@ -58,16 +70,14 @@ class LogUploader(Daemon):
self.new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
self.unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
utils.TRUE_VALUES
# source_filename_format is deprecated
source_filename_format = uploader_conf.get('source_filename_format')
source_filename_pattern = uploader_conf.get('source_filename_pattern')
if source_filename_format and not source_filename_pattern:
self.logger.warning(_('source_filename_format is unreliable and '
'deprecated; use source_filename_pattern'))
self.pattern = self.convert_glob_to_regex(source_filename_format)
else:
self.pattern = source_filename_pattern or '%Y%m%d%H'
self.filename_pattern = uploader_conf.get('source_filename_pattern',
'''
^%s-
(?P<year>[0-9]{4})
(?P<month>[0-1][0-9])
(?P<day>[0-3][0-9])
(?P<hour>[0-2][0-9])
.*$''' % plugin_name)
def run_once(self, *args, **kwargs):
self.logger.info(_("Uploading logs"))
@ -76,44 +86,6 @@ class LogUploader(Daemon):
self.logger.info(_("Uploading logs complete (%0.2f minutes)") %
((time.time() - start) / 60))
def convert_glob_to_regex(self, glob):
"""
Make a best effort to support old style config globs
:param : old style config source_filename_format
:returns : new style config source_filename_pattern
"""
pattern = glob
pattern = pattern.replace('.', r'\.')
pattern = pattern.replace('*', r'.*')
pattern = pattern.replace('?', r'.?')
return pattern
def validate_filename_pattern(self):
"""
Validate source_filename_pattern
:returns : valid regex pattern based on soruce_filename_pattern with
group matches substituded for date fmt markers
"""
pattern = self.pattern
markers = {
'%Y': ('year', '(?P<year>[0-9]{4})'),
'%m': ('month', '(?P<month>[0-1][0-9])'),
'%d': ('day', '(?P<day>[0-3][0-9])'),
'%H': ('hour', '(?P<hour>[0-2][0-9])'),
}
for marker, (mtype, group) in markers.items():
if marker not in self.pattern:
self.logger.error(_('source_filename_pattern much contain a '
'marker %(marker)s to match the '
'%(mtype)s') % {'marker': marker,
'mtype': mtype})
return
pattern = pattern.replace(marker, group)
return pattern
def get_relpath_to_files_under_log_dir(self):
"""
Look under log_dir recursively and return all filenames as relpaths
@ -125,7 +97,7 @@ class LogUploader(Daemon):
all_files.extend(os.path.join(path, f) for f in files)
return [os.path.relpath(f, start=self.log_dir) for f in all_files]
def filter_files(self, all_files, pattern):
def filter_files(self, all_files):
"""
Filter files based on regex pattern
@ -137,15 +109,15 @@ class LogUploader(Daemon):
filename2match = {}
found_match = False
for filename in all_files:
match = re.match(pattern, filename)
match = re.match(self.filename_pattern, filename, re.VERBOSE)
if match:
found_match = True
full_path = os.path.join(self.log_dir, filename)
filename2match[full_path] = match.groupdict()
else:
self.logger.debug(_('%(filename)s does not match '
'%(pattern)s') % {'filename': filename,
'pattern': pattern})
'%(pattern)s') % {'filename': filename,
'pattern': self.filename_pattern})
return filename2match
def upload_all_logs(self):
@ -153,16 +125,12 @@ class LogUploader(Daemon):
Match files under log_dir to source_filename_pattern and upload to
swift
"""
pattern = self.validate_filename_pattern()
if not pattern:
self.logger.error(_('Invalid filename_format'))
return
all_files = self.get_relpath_to_files_under_log_dir()
filename2match = self.filter_files(all_files, pattern)
filename2match = self.filter_files(all_files)
if not filename2match:
self.logger.info(_('No files in %(log_dir)s match %(pattern)s') %
{'log_dir': self.log_dir, 'pattern': pattern})
return
sys.exit(_('No files in %(log_dir)s match %(pattern)s') %
{'log_dir': self.log_dir,
'pattern': self.filename_pattern})
if not self.internal_proxy.create_container(self.swift_account,
self.container_name):
self.logger.error(_('Unable to create container for '

View File

@ -16,7 +16,7 @@
# TODO: Tests
import unittest
from swift.stats import account_stats
from swift.stats import db_stats
class TestAccountStats(unittest.TestCase):

View File

@ -35,6 +35,14 @@ DEFAULT_GLOB = '%Y%m%d%H'
COMPRESSED_DATA = '\x1f\x8b\x08\x08\x87\xa5zM\x02\xffdata\x00KI,I\x04\x00c' \
'\xf3\xf3\xad\x04\x00\x00\x00'
access_regex = '''
^
(?P<year>[0-9]{4})
(?P<month>[0-1][0-9])
(?P<day>[0-3][0-9])
(?P<hour>[0-2][0-9])
.*$
'''
def mock_appconfig(*args, **kwargs):
pass
@ -87,179 +95,27 @@ class TestLogUploader(unittest.TestCase):
log_uploader.appconfig = self._orig_appconfig
log_uploader.InternalProxy = self._orig_InternalProxy
def test_deprecated_glob_style_upload_all_logs(self):
tmpdir = mkdtemp()
try:
today = datetime.now()
year = today.year
month = today.month
day = today.day
today_str = today.strftime('%Y%m%d')
time_strs = []
for i in range(24):
time_strs.append('%s%0.2d' % (today_str, i))
for ts in time_strs:
open(os.path.join(tmpdir, ts), 'w').close()
conf = {'log_dir': tmpdir}
uploader = MockLogUploader(conf)
uploader.upload_all_logs()
self.assertEquals(len(uploader.uploaded_files), 24)
for i, file_date in enumerate(sorted(uploader.uploaded_files)):
d = {'year': year, 'month': month, 'day': day, 'hour': i}
for k, v in d.items():
d[k] = '%0.2d' % v
expected = (os.path.join(tmpdir, '%s%0.2d' %
(today_str, i)), d)
self.assertEquals(file_date, expected)
finally:
rmtree(tmpdir)
tmpdir = mkdtemp()
try:
today = datetime.now()
year = today.year
month = today.month
day = today.day
today_str = today.strftime('%Y%m%d')
time_strs = []
for i in range(24):
time_strs.append('%s-%0.2d00' % (today_str, i))
for ts in time_strs:
open(os.path.join(tmpdir, 'swift-blah_98764.%s-2400.tar.gz' %
ts), 'w').close()
open(os.path.join(tmpdir, 'swift.blah_98764.%s-2400.tar.gz' % ts),
'w').close()
open(os.path.join(tmpdir, 'swift-blah_98764.%s-2400.tar.g' % ts),
'w').close()
open(os.path.join(tmpdir,
'swift-blah_201102160100.%s-2400.tar.gz' %
'201102160100'), 'w').close()
conf = {
'log_dir': '%s/' % tmpdir,
'filename_format': 'swift-blah_98764.%Y%m%d-%H*.tar.gz',
}
uploader = MockLogUploader(conf)
uploader.upload_all_logs()
self.assertEquals(len(uploader.uploaded_files), 24)
for i, file_date in enumerate(sorted(uploader.uploaded_files)):
filename, date_dict = file_date
filename = os.path.basename(filename)
self.assert_(today_str in filename, filename)
self.assert_(filename.startswith('swift'), filename)
self.assert_(filename.endswith('tar.gz'), filename)
d = {'year': year, 'month': month, 'day': day, 'hour': i}
for k, v in d.items():
d[k] = '%0.2d' % v
self.assertEquals(d, date_dict)
finally:
rmtree(tmpdir)
tmpdir = mkdtemp()
try:
today = datetime.now()
year = today.year
month = today.month
day = today.day
today_str = today.strftime('%Y%m%d')
time_strs = []
for i in range(24):
time_strs.append('%s%0.2d' % (today_str, i))
for i, ts in enumerate(time_strs):
open(os.path.join(tmpdir, '%s.%s.log' % (i, ts)), 'w').close()
conf = {
'log_dir': tmpdir,
'filename_format': '*.%Y%m%d%H.log',
}
uploader = MockLogUploader(conf)
uploader.upload_all_logs()
self.assertEquals(len(uploader.uploaded_files), 24)
fname_to_int = lambda x: int(os.path.basename(x[0]).split('.')[0])
numerically = lambda x, y: cmp(fname_to_int(x),
fname_to_int(y))
for i, file_date in enumerate(sorted(uploader.uploaded_files,
cmp=numerically)):
d = {'year': year, 'month': month, 'day': day, 'hour': i}
for k, v in d.items():
d[k] = '%0.2d' % v
expected = (os.path.join(tmpdir, '%s.%s%0.2d.log' %
(i, today_str, i)), d)
self.assertEquals(file_date, expected)
finally:
rmtree(tmpdir)
def test_bad_pattern_in_config(self):
files = [datetime.now().strftime('%Y%m%d%H')]
with temptree(files, contents=[COMPRESSED_DATA] * len(files)) as t:
# invalid pattern
conf = {'log_dir': t, 'source_filename_pattern': '%Y%m%d%h'} # should be %H
conf = {'log_dir': t,
'source_filename_pattern': '%Y%m%d%h'} # should be %H
uploader = MockLogUploader(conf)
self.assertFalse(uploader.validate_filename_pattern())
uploader.upload_all_logs()
self.assertEquals(uploader.uploaded_files, [])
self.assertRaises(SystemExit, uploader.upload_all_logs)
conf = {'log_dir': t, 'source_filename_pattern': '%Y%m%d%H'}
conf = {'log_dir': t, 'source_filename_pattern': access_regex}
uploader = MockLogUploader(conf)
self.assert_(uploader.validate_filename_pattern())
uploader.upload_all_logs()
self.assertEquals(len(uploader.uploaded_files), 1)
# deprecated warning on source_filename_format
class MockLogger():
def __init__(self):
self.msgs = defaultdict(list)
def log(self, level, msg):
self.msgs[level].append(msg)
def __getattr__(self, attr):
return partial(self.log, attr)
logger = MockLogger.logger = MockLogger()
def mock_get_logger(*args, **kwargs):
return MockLogger.logger
_orig_get_logger = log_uploader.utils.get_logger
try:
log_uploader.utils.get_logger = mock_get_logger
conf = {'source_filename_format': '%Y%m%d%H'}
uploader = MockLogUploader(conf, logger=logger)
self.assert_([m for m in logger.msgs['warning']
if 'deprecated' in m])
finally:
log_uploader.utils.get_logger = _orig_get_logger
# convert source_filename_format to regex
conf = {'source_filename_format': 'pattern-*.%Y%m%d%H.*.gz'}
uploader = MockLogUploader(conf)
expected = r'pattern-.*\.%Y%m%d%H\..*\.gz'
self.assertEquals(uploader.pattern, expected)
# use source_filename_pattern if we have the choice!
conf = {
'source_filename_format': 'bad',
'source_filename_pattern': 'good',
}
uploader = MockLogUploader(conf)
self.assertEquals(uploader.pattern, 'good')
def test_pattern_upload_all_logs(self):
# test empty dir
with temptree([]) as t:
conf = {'log_dir': t}
uploader = MockLogUploader(conf)
uploader.run_once()
self.assertEquals(len(uploader.uploaded_files), 0)
self.assertRaises(SystemExit, uploader.run_once)
def get_random_length_str(max_len=10, chars=string.ascii_letters):
return ''.join(random.choice(chars) for x in
@ -267,8 +123,12 @@ class TestLogUploader(unittest.TestCase):
template = 'prefix_%(random)s_%(digits)s.blah.' \
'%(datestr)s%(hour)0.2d00-%(next_hour)0.2d00-%(number)s.gz'
pattern = r'prefix_.*_[0-9]+\.blah\.%Y%m%d%H00-[0-9]{2}00' \
'-[0-9]?[0-9]\.gz'
pattern = '''prefix_.*_[0-9]+\.blah\.
(?P<year>[0-9]{4})
(?P<month>[0-1][0-9])
(?P<day>[0-3][0-9])
(?P<hour>[0-2][0-9])00-[0-9]{2}00
-[0-9]?[0-9]\.gz'''
files_that_should_match = []
# add some files that match
for i in range(24):
@ -313,25 +173,28 @@ class TestLogUploader(unittest.TestCase):
def test_log_cutoff(self):
files = [datetime.now().strftime('%Y%m%d%H')]
with temptree(files) as t:
conf = {'log_dir': t, 'new_log_cutoff': '7200'}
conf = {'log_dir': t, 'new_log_cutoff': '7200',
'source_filename_pattern': access_regex}
uploader = MockLogUploader(conf)
uploader.run_once()
self.assertEquals(len(uploader.uploaded_files), 0)
conf = {'log_dir': t, 'new_log_cutoff': '0'}
conf = {'log_dir': t, 'new_log_cutoff': '0',
'source_filename_pattern': access_regex}
uploader = MockLogUploader(conf)
uploader.run_once()
self.assertEquals(len(uploader.uploaded_files), 1)
def test_create_container_fail(self):
files = [datetime.now().strftime('%Y%m%d%H')]
conf = {'source_filename_pattern': access_regex}
with temptree(files) as t:
conf = {'log_dir': t}
conf['log_dir'] = t
uploader = MockLogUploader(conf)
uploader.run_once()
self.assertEquals(len(uploader.uploaded_files), 1)
with temptree(files) as t:
conf = {'log_dir': t}
conf['log_dir'] = t
uploader = MockLogUploader(conf)
# mock create_container to fail
uploader.internal_proxy.create_container = lambda *args: False
@ -341,14 +204,16 @@ class TestLogUploader(unittest.TestCase):
def test_unlink_log(self):
files = [datetime.now().strftime('%Y%m%d%H')]
with temptree(files, contents=[COMPRESSED_DATA]) as t:
conf = {'log_dir': t, 'unlink_log': 'false'}
conf = {'log_dir': t, 'unlink_log': 'false',
'source_filename_pattern': access_regex}
uploader = MockLogUploader(conf)
uploader.run_once()
self.assertEquals(len(uploader.uploaded_files), 1)
# file still there
self.assertEquals(len(os.listdir(t)), 1)
conf = {'log_dir': t, 'unlink_log': 'true'}
conf = {'log_dir': t, 'unlink_log': 'true',
'source_filename_pattern': access_regex}
uploader = MockLogUploader(conf)
uploader.run_once()
self.assertEquals(len(uploader.uploaded_files), 1)
@ -356,9 +221,10 @@ class TestLogUploader(unittest.TestCase):
self.assertEquals(len(os.listdir(t)), 0)
def test_upload_file_failed(self):
files = [datetime.now().strftime('%Y%m%d%H')]
files = ['plugin-%s' % datetime.now().strftime('%Y%m%d%H')]
with temptree(files, contents=[COMPRESSED_DATA]) as t:
conf = {'log_dir': t, 'unlink_log': 'true'}
conf = {'log_dir': t, 'unlink_log': 'true',
'source_filename_pattern': access_regex}
uploader = MockLogUploader(conf)
# mock upload_file to fail, and clean up mock
@ -366,8 +232,7 @@ class TestLogUploader(unittest.TestCase):
uploader.uploaded_files.pop()
return False
uploader.internal_proxy.upload_file = mock_upload_file
uploader.run_once()
self.assertEquals(len(uploader.uploaded_files), 0)
self.assertRaises(SystemExit, uploader.run_once)
# file still there
self.assertEquals(len(os.listdir(t)), 1)