Move remaining bin scripts to cli modules
- swift-account-audit - swift-config - swift-drive-audit - swift-get-nodes - swift-reconciler-enqueue - swift-oldies - swift-orphans Related-Change: Ifcc8138e7b55d5b82bea0d411ec6bfcca2c77c83 Change-Id: Ie4a252ec51d23caa9e90e6c2f772847d0d71b1c8
This commit is contained in:
parent
2c5dc64d25
commit
c3e1d91d1f
@ -14,373 +14,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import sys
|
||||
from hashlib import md5
|
||||
import getopt
|
||||
from itertools import chain
|
||||
from swift.cli import account_audit
|
||||
|
||||
import json
|
||||
from eventlet.greenpool import GreenPool
|
||||
from eventlet.event import Event
|
||||
from six.moves.urllib.parse import quote
|
||||
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.utils import split_path
|
||||
from swift.common.bufferedhttp import http_connect
|
||||
|
||||
|
||||
usage = """
|
||||
Usage!
|
||||
|
||||
%(cmd)s [options] [url 1] [url 2] ...
|
||||
-c [concurrency] Set the concurrency, default 50
|
||||
-r [ring dir] Ring locations, default /etc/swift
|
||||
-e [filename] File for writing a list of inconsistent urls
|
||||
-d Also download files and verify md5
|
||||
|
||||
You can also feed a list of urls to the script through stdin.
|
||||
|
||||
Examples!
|
||||
|
||||
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076
|
||||
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container/object
|
||||
%(cmd)s -e errors.txt AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container
|
||||
%(cmd)s < errors.txt
|
||||
%(cmd)s -c 25 -d < errors.txt
|
||||
""" % {'cmd': sys.argv[0]}
|
||||
|
||||
|
||||
class Auditor(object):
|
||||
def __init__(self, swift_dir='/etc/swift', concurrency=50, deep=False,
|
||||
error_file=None):
|
||||
self.pool = GreenPool(concurrency)
|
||||
self.object_ring = Ring(swift_dir, ring_name='object')
|
||||
self.container_ring = Ring(swift_dir, ring_name='container')
|
||||
self.account_ring = Ring(swift_dir, ring_name='account')
|
||||
self.deep = deep
|
||||
self.error_file = error_file
|
||||
# zero out stats
|
||||
self.accounts_checked = self.account_exceptions = \
|
||||
self.account_not_found = self.account_container_mismatch = \
|
||||
self.account_object_mismatch = self.objects_checked = \
|
||||
self.object_exceptions = self.object_not_found = \
|
||||
self.object_checksum_mismatch = self.containers_checked = \
|
||||
self.container_exceptions = self.container_count_mismatch = \
|
||||
self.container_not_found = self.container_obj_mismatch = 0
|
||||
self.list_cache = {}
|
||||
self.in_progress = {}
|
||||
|
||||
def audit_object(self, account, container, name):
|
||||
path = '/%s/%s/%s' % (account, container, name)
|
||||
part, nodes = self.object_ring.get_nodes(
|
||||
account, container.encode('utf-8'), name.encode('utf-8'))
|
||||
container_listing = self.audit_container(account, container)
|
||||
consistent = True
|
||||
if name not in container_listing:
|
||||
print(" Object %s missing in container listing!" % path)
|
||||
consistent = False
|
||||
hash = None
|
||||
else:
|
||||
hash = container_listing[name]['hash']
|
||||
etags = []
|
||||
for node in nodes:
|
||||
try:
|
||||
if self.deep:
|
||||
conn = http_connect(node['ip'], node['port'],
|
||||
node['device'], part, 'GET', path, {})
|
||||
resp = conn.getresponse()
|
||||
calc_hash = md5()
|
||||
chunk = True
|
||||
while chunk:
|
||||
chunk = resp.read(8192)
|
||||
calc_hash.update(chunk)
|
||||
calc_hash = calc_hash.hexdigest()
|
||||
if resp.status // 100 != 2:
|
||||
self.object_not_found += 1
|
||||
consistent = False
|
||||
print(' Bad status %s GETting object "%s" on %s/%s'
|
||||
% (resp.status, path,
|
||||
node['ip'], node['device']))
|
||||
continue
|
||||
if resp.getheader('ETag').strip('"') != calc_hash:
|
||||
self.object_checksum_mismatch += 1
|
||||
consistent = False
|
||||
print(' MD5 does not match etag for "%s" on %s/%s'
|
||||
% (path, node['ip'], node['device']))
|
||||
else:
|
||||
conn = http_connect(node['ip'], node['port'],
|
||||
node['device'], part, 'HEAD',
|
||||
path.encode('utf-8'), {})
|
||||
resp = conn.getresponse()
|
||||
if resp.status // 100 != 2:
|
||||
self.object_not_found += 1
|
||||
consistent = False
|
||||
print(' Bad status %s HEADing object "%s" on %s/%s'
|
||||
% (resp.status, path,
|
||||
node['ip'], node['device']))
|
||||
continue
|
||||
|
||||
override_etag = resp.getheader(
|
||||
'X-Object-Sysmeta-Container-Update-Override-Etag')
|
||||
if override_etag:
|
||||
etags.append((override_etag, node))
|
||||
else:
|
||||
etags.append((resp.getheader('ETag'), node))
|
||||
except Exception:
|
||||
self.object_exceptions += 1
|
||||
consistent = False
|
||||
print(' Exception fetching object "%s" on %s/%s'
|
||||
% (path, node['ip'], node['device']))
|
||||
continue
|
||||
if not etags:
|
||||
consistent = False
|
||||
print(" Failed fo fetch object %s at all!" % path)
|
||||
elif hash:
|
||||
for etag, node in etags:
|
||||
if etag.strip('"') != hash:
|
||||
consistent = False
|
||||
self.object_checksum_mismatch += 1
|
||||
print(' ETag mismatch for "%s" on %s/%s'
|
||||
% (path, node['ip'], node['device']))
|
||||
if not consistent and self.error_file:
|
||||
with open(self.error_file, 'a') as err_file:
|
||||
print(path, file=err_file)
|
||||
self.objects_checked += 1
|
||||
|
||||
def audit_container(self, account, name, recurse=False):
|
||||
if (account, name) in self.in_progress:
|
||||
self.in_progress[(account, name)].wait()
|
||||
if (account, name) in self.list_cache:
|
||||
return self.list_cache[(account, name)]
|
||||
self.in_progress[(account, name)] = Event()
|
||||
print('Auditing container "%s"' % name)
|
||||
path = '/%s/%s' % (account, name)
|
||||
account_listing = self.audit_account(account)
|
||||
consistent = True
|
||||
if name not in account_listing:
|
||||
consistent = False
|
||||
print(" Container %s not in account listing!" % path)
|
||||
part, nodes = \
|
||||
self.container_ring.get_nodes(account, name.encode('utf-8'))
|
||||
rec_d = {}
|
||||
responses = {}
|
||||
for node in nodes:
|
||||
marker = ''
|
||||
results = True
|
||||
while results:
|
||||
try:
|
||||
conn = http_connect(node['ip'], node['port'],
|
||||
node['device'], part, 'GET',
|
||||
path.encode('utf-8'), {},
|
||||
'format=json&marker=%s' %
|
||||
quote(marker.encode('utf-8')))
|
||||
resp = conn.getresponse()
|
||||
if resp.status // 100 != 2:
|
||||
self.container_not_found += 1
|
||||
consistent = False
|
||||
print(' Bad status GETting container "%s" on %s/%s' %
|
||||
(path, node['ip'], node['device']))
|
||||
break
|
||||
if node['id'] not in responses:
|
||||
responses[node['id']] = {
|
||||
h.lower(): v for h, v in resp.getheaders()}
|
||||
results = json.loads(resp.read())
|
||||
except Exception:
|
||||
self.container_exceptions += 1
|
||||
consistent = False
|
||||
print(' Exception GETting container "%s" on %s/%s' %
|
||||
(path, node['ip'], node['device']))
|
||||
break
|
||||
if results:
|
||||
marker = results[-1]['name']
|
||||
for obj in results:
|
||||
obj_name = obj['name']
|
||||
if obj_name not in rec_d:
|
||||
rec_d[obj_name] = obj
|
||||
if (obj['last_modified'] !=
|
||||
rec_d[obj_name]['last_modified']):
|
||||
self.container_obj_mismatch += 1
|
||||
consistent = False
|
||||
print(" Different versions of %s/%s "
|
||||
"in container dbs." % (name, obj['name']))
|
||||
if (obj['last_modified'] >
|
||||
rec_d[obj_name]['last_modified']):
|
||||
rec_d[obj_name] = obj
|
||||
obj_counts = [int(header['x-container-object-count'])
|
||||
for header in responses.values()]
|
||||
if not obj_counts:
|
||||
consistent = False
|
||||
print(" Failed to fetch container %s at all!" % path)
|
||||
else:
|
||||
if len(set(obj_counts)) != 1:
|
||||
self.container_count_mismatch += 1
|
||||
consistent = False
|
||||
print(
|
||||
" Container databases don't agree on number of objects.")
|
||||
print(
|
||||
" Max: %s, Min: %s" % (max(obj_counts), min(obj_counts)))
|
||||
self.containers_checked += 1
|
||||
self.list_cache[(account, name)] = rec_d
|
||||
self.in_progress[(account, name)].send(True)
|
||||
del self.in_progress[(account, name)]
|
||||
if recurse:
|
||||
for obj in rec_d.keys():
|
||||
self.pool.spawn_n(self.audit_object, account, name, obj)
|
||||
if not consistent and self.error_file:
|
||||
with open(self.error_file, 'a') as error_file:
|
||||
print(path, file=error_file)
|
||||
return rec_d
|
||||
|
||||
def audit_account(self, account, recurse=False):
|
||||
if account in self.in_progress:
|
||||
self.in_progress[account].wait()
|
||||
if account in self.list_cache:
|
||||
return self.list_cache[account]
|
||||
self.in_progress[account] = Event()
|
||||
print('Auditing account "%s"' % account)
|
||||
consistent = True
|
||||
path = '/%s' % account
|
||||
part, nodes = self.account_ring.get_nodes(account)
|
||||
responses = {}
|
||||
for node in nodes:
|
||||
marker = ''
|
||||
results = True
|
||||
while results:
|
||||
node_id = node['id']
|
||||
try:
|
||||
conn = http_connect(node['ip'], node['port'],
|
||||
node['device'], part, 'GET', path, {},
|
||||
'format=json&marker=%s' %
|
||||
quote(marker.encode('utf-8')))
|
||||
resp = conn.getresponse()
|
||||
if resp.status // 100 != 2:
|
||||
self.account_not_found += 1
|
||||
consistent = False
|
||||
print(" Bad status GETting account '%s' "
|
||||
" from %s:%s" %
|
||||
(account, node['ip'], node['device']))
|
||||
break
|
||||
results = json.loads(resp.read())
|
||||
except Exception:
|
||||
self.account_exceptions += 1
|
||||
consistent = False
|
||||
print(" Exception GETting account '%s' on %s:%s" %
|
||||
(account, node['ip'], node['device']))
|
||||
break
|
||||
if node_id not in responses:
|
||||
responses[node_id] = [
|
||||
{h.lower(): v for h, v in resp.getheaders()}, []]
|
||||
responses[node_id][1].extend(results)
|
||||
if results:
|
||||
marker = results[-1]['name']
|
||||
headers = [r[0] for r in responses.values()]
|
||||
cont_counts = [int(header['x-account-container-count'])
|
||||
for header in headers]
|
||||
if len(set(cont_counts)) != 1:
|
||||
self.account_container_mismatch += 1
|
||||
consistent = False
|
||||
print(" Account databases for '%s' don't agree on"
|
||||
" number of containers." % account)
|
||||
if cont_counts:
|
||||
print(" Max: %s, Min: %s" % (max(cont_counts),
|
||||
min(cont_counts)))
|
||||
obj_counts = [int(header['x-account-object-count'])
|
||||
for header in headers]
|
||||
if len(set(obj_counts)) != 1:
|
||||
self.account_object_mismatch += 1
|
||||
consistent = False
|
||||
print(" Account databases for '%s' don't agree on"
|
||||
" number of objects." % account)
|
||||
if obj_counts:
|
||||
print(" Max: %s, Min: %s" % (max(obj_counts),
|
||||
min(obj_counts)))
|
||||
containers = set()
|
||||
for resp in responses.values():
|
||||
containers.update(container['name'] for container in resp[1])
|
||||
self.list_cache[account] = containers
|
||||
self.in_progress[account].send(True)
|
||||
del self.in_progress[account]
|
||||
self.accounts_checked += 1
|
||||
if recurse:
|
||||
for container in containers:
|
||||
self.pool.spawn_n(self.audit_container, account,
|
||||
container, True)
|
||||
if not consistent and self.error_file:
|
||||
with open(self.error_file, 'a') as error_file:
|
||||
print(path, error_file)
|
||||
return containers
|
||||
|
||||
def audit(self, account, container=None, obj=None):
|
||||
if obj and container:
|
||||
self.pool.spawn_n(self.audit_object, account, container, obj)
|
||||
elif container:
|
||||
self.pool.spawn_n(self.audit_container, account, container, True)
|
||||
else:
|
||||
self.pool.spawn_n(self.audit_account, account, True)
|
||||
|
||||
def wait(self):
|
||||
self.pool.waitall()
|
||||
|
||||
def print_stats(self):
|
||||
|
||||
def _print_stat(name, stat):
|
||||
# Right align stat name in a field of 18 characters
|
||||
print("{0:>18}: {1}".format(name, stat))
|
||||
|
||||
print()
|
||||
_print_stat("Accounts checked", self.accounts_checked)
|
||||
if self.account_not_found:
|
||||
_print_stat("Missing Replicas", self.account_not_found)
|
||||
if self.account_exceptions:
|
||||
_print_stat("Exceptions", self.account_exceptions)
|
||||
if self.account_container_mismatch:
|
||||
_print_stat("Container mismatch", self.account_container_mismatch)
|
||||
if self.account_object_mismatch:
|
||||
_print_stat("Object mismatch", self.account_object_mismatch)
|
||||
print()
|
||||
_print_stat("Containers checked", self.containers_checked)
|
||||
if self.container_not_found:
|
||||
_print_stat("Missing Replicas", self.container_not_found)
|
||||
if self.container_exceptions:
|
||||
_print_stat("Exceptions", self.container_exceptions)
|
||||
if self.container_count_mismatch:
|
||||
_print_stat("Count mismatch", self.container_count_mismatch)
|
||||
if self.container_obj_mismatch:
|
||||
_print_stat("Object mismatch", self.container_obj_mismatch)
|
||||
print()
|
||||
_print_stat("Objects checked", self.objects_checked)
|
||||
if self.object_not_found:
|
||||
_print_stat("Missing Replicas", self.object_not_found)
|
||||
if self.object_exceptions:
|
||||
_print_stat("Exceptions", self.object_exceptions)
|
||||
if self.object_checksum_mismatch:
|
||||
_print_stat("MD5 Mismatch", self.object_checksum_mismatch)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
optlist, args = getopt.getopt(sys.argv[1:], 'c:r:e:d')
|
||||
except getopt.GetoptError as err:
|
||||
print(str(err))
|
||||
print(usage)
|
||||
sys.exit(2)
|
||||
if not args and os.isatty(sys.stdin.fileno()):
|
||||
print(usage)
|
||||
sys.exit()
|
||||
opts = dict(optlist)
|
||||
options = {
|
||||
'concurrency': int(opts.get('-c', 50)),
|
||||
'error_file': opts.get('-e', None),
|
||||
'swift_dir': opts.get('-r', '/etc/swift'),
|
||||
'deep': '-d' in opts,
|
||||
}
|
||||
auditor = Auditor(**options)
|
||||
if not os.isatty(sys.stdin.fileno()):
|
||||
args = chain(args, sys.stdin)
|
||||
for path in args:
|
||||
path = '/' + path.rstrip('\r\n').lstrip('/')
|
||||
auditor.audit(*split_path(path, 1, 3, True))
|
||||
auditor.wait()
|
||||
auditor.print_stats()
|
||||
if __name__ == "__main__":
|
||||
account_audit.main()
|
||||
|
@ -12,80 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import optparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
from swift.common.manager import Server
|
||||
from swift.common.utils import readconf
|
||||
from swift.common.wsgi import appconfig
|
||||
|
||||
parser = optparse.OptionParser('%prog [options] SERVER')
|
||||
parser.add_option('-c', '--config-num', metavar="N", type="int",
|
||||
dest="number", default=0,
|
||||
help="parse config for the Nth server only")
|
||||
parser.add_option('-s', '--section', help="only display matching sections")
|
||||
parser.add_option('-w', '--wsgi', action='store_true',
|
||||
help="use wsgi/paste parser instead of readconf")
|
||||
|
||||
|
||||
def _context_name(context):
|
||||
return ':'.join((context.object_type.name, context.name))
|
||||
|
||||
|
||||
def inspect_app_config(app_config):
|
||||
conf = {}
|
||||
context = app_config.context
|
||||
section_name = _context_name(context)
|
||||
conf[section_name] = context.config()
|
||||
if context.object_type.name == 'pipeline':
|
||||
filters = context.filter_contexts
|
||||
pipeline = []
|
||||
for filter_context in filters:
|
||||
conf[_context_name(filter_context)] = filter_context.config()
|
||||
pipeline.append(filter_context.entry_point_name)
|
||||
app_context = context.app_context
|
||||
conf[_context_name(app_context)] = app_context.config()
|
||||
pipeline.append(app_context.entry_point_name)
|
||||
conf[section_name]['pipeline'] = ' '.join(pipeline)
|
||||
return conf
|
||||
|
||||
|
||||
def main():
|
||||
options, args = parser.parse_args()
|
||||
options = dict(vars(options))
|
||||
|
||||
if not args:
|
||||
return 'ERROR: specify type of server or conf_path'
|
||||
conf_files = []
|
||||
for arg in args:
|
||||
if os.path.exists(arg):
|
||||
conf_files.append(arg)
|
||||
else:
|
||||
conf_files += Server(arg).conf_files(**options)
|
||||
for conf_file in conf_files:
|
||||
print('# %s' % conf_file)
|
||||
if options['wsgi']:
|
||||
app_config = appconfig(conf_file)
|
||||
conf = inspect_app_config(app_config)
|
||||
else:
|
||||
conf = readconf(conf_file)
|
||||
flat_vars = {}
|
||||
for k, v in conf.items():
|
||||
if options['section'] and k != options['section']:
|
||||
continue
|
||||
if not isinstance(v, dict):
|
||||
flat_vars[k] = v
|
||||
continue
|
||||
print('[%s]' % k)
|
||||
for opt, value in v.items():
|
||||
print('%s = %s' % (opt, value))
|
||||
print()
|
||||
for k, v in flat_vars.items():
|
||||
print('# %s = %s' % (k, v))
|
||||
print()
|
||||
|
||||
from swift.cli import config
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
config.main()
|
||||
|
@ -14,251 +14,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import datetime
|
||||
import glob
|
||||
import locale
|
||||
import os
|
||||
import os.path
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from swift.cli import drive_audit
|
||||
|
||||
|
||||
import six
|
||||
from six.moves.configparser import ConfigParser
|
||||
|
||||
from swift.common.utils import backward, get_logger, dump_recon_cache, \
|
||||
config_true_value
|
||||
|
||||
|
||||
def get_devices(device_dir, logger):
|
||||
devices = []
|
||||
majmin_devices = {}
|
||||
|
||||
# List /dev/block
|
||||
# Using os.scandir on recent versions of python, else os.listdir
|
||||
if 'scandir' in dir(os):
|
||||
with os.scandir("/dev/block") as it:
|
||||
for ent in it:
|
||||
if ent.is_symlink():
|
||||
dev_name = os.path.basename(os.readlink(ent.path))
|
||||
majmin = os.path.basename(ent.path).split(':')
|
||||
majmin_devices[dev_name] = {'major': majmin[0],
|
||||
'minor': majmin[1]}
|
||||
else:
|
||||
for ent in os.listdir("/dev/block"):
|
||||
ent_path = os.path.join("/dev/block", ent)
|
||||
if os.path.is_symlink(ent_path):
|
||||
dev_name = os.path.basename(os.readlink(ent_path))
|
||||
majmin = os.path.basename(ent_path).split(':')
|
||||
majmin_devices[dev_name] = {'major': majmin[0],
|
||||
'minor': majmin[1]}
|
||||
|
||||
for line in open('/proc/mounts').readlines():
|
||||
data = line.strip().split()
|
||||
block_device = data[0]
|
||||
mount_point = data[1]
|
||||
if mount_point.startswith(device_dir):
|
||||
device = {}
|
||||
device['mount_point'] = mount_point
|
||||
device['block_device'] = block_device
|
||||
dev_name = os.path.basename(block_device)
|
||||
if dev_name in majmin_devices:
|
||||
# If symlink is in /dev/block
|
||||
device['major'] = majmin_devices[dev_name]['major']
|
||||
device['minor'] = majmin_devices[dev_name]['minor']
|
||||
else:
|
||||
# Else we try to stat block_device
|
||||
try:
|
||||
device_num = os.stat(block_device).st_rdev
|
||||
except OSError:
|
||||
# If we can't stat the device,
|
||||
# then something weird is going on
|
||||
logger.error(
|
||||
'Could not determine major:minor numbers for %s '
|
||||
'(mounted at %s)! Skipping...',
|
||||
block_device, mount_point)
|
||||
continue
|
||||
device['major'] = str(os.major(device_num))
|
||||
device['minor'] = str(os.minor(device_num))
|
||||
devices.append(device)
|
||||
for line in open('/proc/partitions').readlines()[2:]:
|
||||
major, minor, blocks, kernel_device = line.strip().split()
|
||||
device = [d for d in devices
|
||||
if d['major'] == major and d['minor'] == minor]
|
||||
if device:
|
||||
device[0]['kernel_device'] = kernel_device
|
||||
return devices
|
||||
|
||||
|
||||
def get_errors(error_re, log_file_pattern, minutes, logger,
|
||||
log_file_encoding):
|
||||
# Assuming log rotation is being used, we need to examine
|
||||
# recently rotated files in case the rotation occurred
|
||||
# just before the script is being run - the data we are
|
||||
# looking for may have rotated.
|
||||
#
|
||||
# The globbing used before would not work with all out-of-box
|
||||
# distro setup for logrotate and syslog therefore moving this
|
||||
# to the config where one can set it with the desired
|
||||
# globbing pattern.
|
||||
log_files = [f for f in glob.glob(log_file_pattern)]
|
||||
try:
|
||||
log_files.sort(key=lambda f: os.stat(f).st_mtime, reverse=True)
|
||||
except (IOError, OSError) as exc:
|
||||
logger.error(exc)
|
||||
print(exc)
|
||||
sys.exit(1)
|
||||
|
||||
now_time = datetime.datetime.now()
|
||||
end_time = now_time - datetime.timedelta(minutes=minutes)
|
||||
# kern.log does not contain the year so we need to keep
|
||||
# track of the year and month in case the year recently
|
||||
# ticked over
|
||||
year = now_time.year
|
||||
prev_ent_month = now_time.strftime('%b')
|
||||
errors = {}
|
||||
|
||||
reached_old_logs = False
|
||||
for path in log_files:
|
||||
try:
|
||||
f = open(path, 'rb')
|
||||
except IOError:
|
||||
logger.error("Error: Unable to open " + path)
|
||||
print("Unable to open " + path)
|
||||
sys.exit(1)
|
||||
for line in backward(f):
|
||||
if not six.PY2:
|
||||
line = line.decode(log_file_encoding, 'surrogateescape')
|
||||
if '[ 0.000000]' in line \
|
||||
or 'KERNEL supported cpus:' in line \
|
||||
or 'BIOS-provided physical RAM map:' in line:
|
||||
# Ignore anything before the last boot
|
||||
reached_old_logs = True
|
||||
break
|
||||
# Solves the problem with year change - kern.log does not
|
||||
# keep track of the year.
|
||||
log_time_ent = line.split()[:3]
|
||||
if log_time_ent[0] == 'Dec' and prev_ent_month == 'Jan':
|
||||
year -= 1
|
||||
prev_ent_month = log_time_ent[0]
|
||||
log_time_string = '%d %s' % (year, ' '.join(log_time_ent))
|
||||
try:
|
||||
log_time = datetime.datetime.strptime(
|
||||
log_time_string, '%Y %b %d %H:%M:%S')
|
||||
except ValueError:
|
||||
# Some versions use ISO timestamps instead
|
||||
try:
|
||||
log_time = datetime.datetime.strptime(
|
||||
line[0:19], '%Y-%m-%dT%H:%M:%S')
|
||||
except ValueError:
|
||||
continue
|
||||
if log_time > end_time:
|
||||
for err in error_re:
|
||||
for device in err.findall(line):
|
||||
errors[device] = errors.get(device, 0) + 1
|
||||
else:
|
||||
reached_old_logs = True
|
||||
break
|
||||
if reached_old_logs:
|
||||
break
|
||||
return errors
|
||||
|
||||
|
||||
def comment_fstab(mount_point):
|
||||
with open('/etc/fstab', 'r') as fstab:
|
||||
with open('/etc/fstab.new', 'w') as new_fstab:
|
||||
for line in fstab:
|
||||
parts = line.split()
|
||||
if len(parts) > 2 \
|
||||
and parts[1] == mount_point \
|
||||
and not line.startswith('#'):
|
||||
new_fstab.write('#' + line)
|
||||
else:
|
||||
new_fstab.write(line)
|
||||
os.rename('/etc/fstab.new', '/etc/fstab')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
c = ConfigParser()
|
||||
try:
|
||||
conf_path = sys.argv[1]
|
||||
except Exception:
|
||||
print("Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1])
|
||||
sys.exit(1)
|
||||
if not c.read(conf_path):
|
||||
print("Unable to read config file %s" % conf_path)
|
||||
sys.exit(1)
|
||||
conf = dict(c.items('drive-audit'))
|
||||
device_dir = conf.get('device_dir', '/srv/node')
|
||||
minutes = int(conf.get('minutes', 60))
|
||||
error_limit = int(conf.get('error_limit', 1))
|
||||
recon_cache_path = conf.get('recon_cache_path', "/var/cache/swift")
|
||||
log_file_pattern = conf.get('log_file_pattern',
|
||||
'/var/log/kern.*[!.][!g][!z]')
|
||||
log_file_encoding = conf.get('log_file_encoding', 'auto')
|
||||
if log_file_encoding == 'auto':
|
||||
log_file_encoding = locale.getpreferredencoding()
|
||||
log_to_console = config_true_value(conf.get('log_to_console', False))
|
||||
error_re = []
|
||||
for conf_key in conf:
|
||||
if conf_key.startswith('regex_pattern_'):
|
||||
error_pattern = conf[conf_key]
|
||||
try:
|
||||
r = re.compile(error_pattern)
|
||||
except re.error:
|
||||
sys.exit('Error: unable to compile regex pattern "%s"' %
|
||||
error_pattern)
|
||||
error_re.append(r)
|
||||
if not error_re:
|
||||
error_re = [
|
||||
re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
|
||||
re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
|
||||
]
|
||||
conf['log_name'] = conf.get('log_name', 'drive-audit')
|
||||
logger = get_logger(conf, log_to_console=log_to_console,
|
||||
log_route='drive-audit')
|
||||
devices = get_devices(device_dir, logger)
|
||||
logger.debug("Devices found: %s" % str(devices))
|
||||
if not devices:
|
||||
logger.error("Error: No devices found!")
|
||||
recon_errors = {}
|
||||
total_errors = 0
|
||||
for device in devices:
|
||||
recon_errors[device['mount_point']] = 0
|
||||
errors = get_errors(error_re, log_file_pattern, minutes, logger,
|
||||
log_file_encoding)
|
||||
logger.debug("Errors found: %s" % str(errors))
|
||||
unmounts = 0
|
||||
for kernel_device, count in errors.items():
|
||||
if count >= error_limit:
|
||||
device = \
|
||||
[d for d in devices if d['kernel_device'] == kernel_device]
|
||||
if device:
|
||||
mount_point = device[0]['mount_point']
|
||||
if mount_point.startswith(device_dir):
|
||||
if config_true_value(conf.get('unmount_failed_device',
|
||||
True)):
|
||||
logger.info("Unmounting %s with %d errors" %
|
||||
(mount_point, count))
|
||||
subprocess.call(['umount', '-fl', mount_point])
|
||||
logger.info("Commenting out %s from /etc/fstab" %
|
||||
(mount_point))
|
||||
comment_fstab(mount_point)
|
||||
unmounts += 1
|
||||
else:
|
||||
logger.info("Detected %s with %d errors "
|
||||
"(Device not unmounted)" %
|
||||
(mount_point, count))
|
||||
recon_errors[mount_point] = count
|
||||
total_errors += count
|
||||
recon_file = recon_cache_path + "/drive.recon"
|
||||
dump_recon_cache(recon_errors, recon_file, logger)
|
||||
dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger,
|
||||
set_owner=conf.get("user", "swift"))
|
||||
|
||||
if unmounts == 0:
|
||||
logger.info("No drives were unmounted")
|
||||
elif os.path.isdir("/run/systemd/system"):
|
||||
logger.debug("fstab updated, calling systemctl daemon-reload")
|
||||
subprocess.call(["/usr/bin/systemctl", "daemon-reload"])
|
||||
if __name__ == "__main__":
|
||||
drive_audit.main()
|
||||
|
@ -14,63 +14,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
from os.path import basename
|
||||
from swift.cli import get_nodes
|
||||
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.storage_policy import reload_storage_policies
|
||||
from swift.common.utils import set_swift_dir
|
||||
from swift.cli.info import (parse_get_node_args, print_item_locations,
|
||||
InfoSystemExit)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
usage = '''
|
||||
Shows the nodes responsible for the item specified.
|
||||
Usage: %prog [-a] <ring.gz> <account> [<container> [<object>]]
|
||||
Or: %prog [-a] <ring.gz> -p partition
|
||||
Or: %prog [-a] -P policy_name <account> [<container> [<object>]]
|
||||
Or: %prog [-a] -P policy_name -p partition
|
||||
Note: account, container, object can also be a single arg separated by /
|
||||
Example:
|
||||
$ %prog -a /etc/swift/account.ring.gz MyAccount
|
||||
Partition 5743883
|
||||
Hash 96ae332a60b58910784e4417a03e1ad0
|
||||
10.1.1.7:8000 sdd1
|
||||
10.1.9.2:8000 sdb1
|
||||
10.1.5.5:8000 sdf1
|
||||
10.1.5.9:8000 sdt1 # [Handoff]
|
||||
'''
|
||||
parser = OptionParser(usage)
|
||||
parser.add_option('-a', '--all', action='store_true',
|
||||
help='Show all handoff nodes')
|
||||
parser.add_option('-p', '--partition', metavar='PARTITION',
|
||||
help='Show nodes for a given partition')
|
||||
parser.add_option('-P', '--policy-name', dest='policy_name',
|
||||
help='Specify which policy to use')
|
||||
parser.add_option('-d', '--swift-dir', default='/etc/swift',
|
||||
dest='swift_dir', help='Path to swift directory')
|
||||
parser.add_option('-Q', '--quoted', action='store_true',
|
||||
help='Assume swift paths are quoted')
|
||||
options, args = parser.parse_args()
|
||||
|
||||
if set_swift_dir(options.swift_dir):
|
||||
reload_storage_policies()
|
||||
|
||||
try:
|
||||
ring_path, args = parse_get_node_args(options, args)
|
||||
except InfoSystemExit as e:
|
||||
parser.print_help()
|
||||
sys.exit('ERROR: %s' % e)
|
||||
|
||||
ring = ring_name = None
|
||||
if ring_path:
|
||||
ring_name = basename(ring_path)[:-len('.ring.gz')]
|
||||
ring = Ring(ring_path)
|
||||
|
||||
try:
|
||||
print_item_locations(ring, ring_name, *args, **vars(options))
|
||||
except InfoSystemExit:
|
||||
sys.exit(1)
|
||||
if __name__ == "__main__":
|
||||
get_nodes.main()
|
||||
|
@ -12,79 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import optparse
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from swift.cli import oldies
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = optparse.OptionParser(usage='''%prog [options]
|
||||
|
||||
Lists old Swift processes.
|
||||
'''.strip())
|
||||
parser.add_option('-a', '--age', dest='hours', type='int', default=720,
|
||||
help='look for processes at least HOURS old; '
|
||||
'default: 720 (30 days)')
|
||||
parser.add_option('-p', '--pids', action='store_true',
|
||||
help='only print the pids found; for example, to pipe '
|
||||
'to xargs kill')
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
listing = []
|
||||
for line in subprocess.Popen(
|
||||
['ps', '-eo', 'etime,pid,args', '--no-headers'],
|
||||
stdout=subprocess.PIPE).communicate()[0].split(b'\n'):
|
||||
if not line:
|
||||
continue
|
||||
hours = 0
|
||||
try:
|
||||
etime, pid, args = line.decode('ascii').split(None, 2)
|
||||
except ValueError:
|
||||
# This covers both decoding and not-enough-values-to-unpack errors
|
||||
sys.exit('Could not process ps line %r' % line)
|
||||
if not args.startswith((
|
||||
'/usr/bin/python /usr/bin/swift-',
|
||||
'/usr/bin/python /usr/local/bin/swift-',
|
||||
'/bin/python /usr/bin/swift-',
|
||||
'/usr/bin/python3 /usr/bin/swift-',
|
||||
'/usr/bin/python3 /usr/local/bin/swift-',
|
||||
'/bin/python3 /usr/bin/swift-')):
|
||||
continue
|
||||
args = args.split('-', 1)[1]
|
||||
etime = etime.split('-')
|
||||
if len(etime) == 2:
|
||||
hours = int(etime[0]) * 24
|
||||
etime = etime[1]
|
||||
elif len(etime) == 1:
|
||||
etime = etime[0]
|
||||
else:
|
||||
sys.exit('Could not process etime value from %r' % line)
|
||||
etime = etime.split(':')
|
||||
if len(etime) == 3:
|
||||
hours += int(etime[0])
|
||||
elif len(etime) != 2:
|
||||
sys.exit('Could not process etime value from %r' % line)
|
||||
if hours >= options.hours:
|
||||
listing.append((str(hours), pid, args))
|
||||
|
||||
if not listing:
|
||||
sys.exit()
|
||||
|
||||
if options.pids:
|
||||
for hours, pid, args in listing:
|
||||
print(pid)
|
||||
else:
|
||||
hours_len = len('Hours')
|
||||
pid_len = len('PID')
|
||||
args_len = len('Command')
|
||||
for hours, pid, args in listing:
|
||||
hours_len = max(hours_len, len(hours))
|
||||
pid_len = max(pid_len, len(pid))
|
||||
args_len = max(args_len, len(args))
|
||||
args_len = min(args_len, 78 - hours_len - pid_len)
|
||||
|
||||
print('%*s %*s %s' % (hours_len, 'Hours', pid_len, 'PID', 'Command'))
|
||||
for hours, pid, args in listing:
|
||||
print('%*s %*s %s' % (hours_len, hours, pid_len,
|
||||
pid, args[:args_len]))
|
||||
oldies.main()
|
||||
|
@ -12,121 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from swift.common.manager import RUN_DIR
|
||||
from swift.cli import orphans
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = optparse.OptionParser(usage='''%prog [options]
|
||||
|
||||
Lists and optionally kills orphaned Swift processes. This is done by scanning
|
||||
/var/run/swift for .pid files and listing any processes that look like Swift
|
||||
processes but aren't associated with the pids in those .pid files. Any Swift
|
||||
processes running with the 'once' parameter are ignored, as those are usually
|
||||
for full-speed audit scans and such.
|
||||
|
||||
Example (sends SIGTERM to all orphaned Swift processes older than two hours):
|
||||
%prog -a 2 -k TERM
|
||||
'''.strip())
|
||||
parser.add_option('-a', '--age', dest='hours', type='int', default=24,
|
||||
help="look for processes at least HOURS old; "
|
||||
"default: 24")
|
||||
parser.add_option('-k', '--kill', dest='signal',
|
||||
help='send SIGNAL to matched processes; default: just '
|
||||
'list process information')
|
||||
parser.add_option('-w', '--wide', dest='wide', default=False,
|
||||
action='store_true',
|
||||
help="don't clip the listing at 80 characters")
|
||||
parser.add_option('-r', '--run-dir', type="str",
|
||||
dest="run_dir", default=RUN_DIR,
|
||||
help="alternative directory to store running pid files "
|
||||
"default: %s" % RUN_DIR)
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
pids = []
|
||||
|
||||
for root, directories, files in os.walk(options.run_dir):
|
||||
for name in files:
|
||||
if name.endswith(('.pid', '.pid.d')):
|
||||
pids.append(open(os.path.join(root, name)).read().strip())
|
||||
pids.extend(subprocess.Popen(
|
||||
['ps', '--ppid', pids[-1], '-o', 'pid', '--no-headers'],
|
||||
stdout=subprocess.PIPE).communicate()[0].decode().split())
|
||||
|
||||
listing = []
|
||||
swift_cmd_re = re.compile(
|
||||
'^/usr/bin/python[23]? /usr(?:/local)?/bin/swift-')
|
||||
for line in subprocess.Popen(
|
||||
['ps', '-eo', 'etime,pid,args', '--no-headers'],
|
||||
stdout=subprocess.PIPE).communicate()[0].split(b'\n'):
|
||||
if not line:
|
||||
continue
|
||||
hours = 0
|
||||
try:
|
||||
etime, pid, args = line.decode('ascii').split(None, 2)
|
||||
except ValueError:
|
||||
sys.exit('Could not process ps line %r' % line)
|
||||
if pid in pids:
|
||||
continue
|
||||
if any([
|
||||
not swift_cmd_re.match(args),
|
||||
'swift-orphans' in args,
|
||||
'once' in args.split(),
|
||||
]):
|
||||
continue
|
||||
args = args.split('swift-', 1)[1]
|
||||
etime = etime.split('-')
|
||||
if len(etime) == 2:
|
||||
hours = int(etime[0]) * 24
|
||||
etime = etime[1]
|
||||
elif len(etime) == 1:
|
||||
etime = etime[0]
|
||||
else:
|
||||
sys.exit('Could not process etime value from %r' % line)
|
||||
etime = etime.split(':')
|
||||
if len(etime) == 3:
|
||||
hours += int(etime[0])
|
||||
elif len(etime) != 2:
|
||||
sys.exit('Could not process etime value from %r' % line)
|
||||
if hours >= options.hours:
|
||||
listing.append((str(hours), pid, args))
|
||||
|
||||
if not listing:
|
||||
sys.exit()
|
||||
|
||||
hours_len = len('Hours')
|
||||
pid_len = len('PID')
|
||||
args_len = len('Command')
|
||||
for hours, pid, args in listing:
|
||||
hours_len = max(hours_len, len(hours))
|
||||
pid_len = max(pid_len, len(pid))
|
||||
args_len = max(args_len, len(args))
|
||||
args_len = min(args_len, 78 - hours_len - pid_len)
|
||||
|
||||
print('%*s %*s %s' %
|
||||
(hours_len, 'Hours', pid_len, 'PID', 'Command'))
|
||||
for hours, pid, args in listing:
|
||||
print('%*s %*s %s' %
|
||||
(hours_len, hours, pid_len, pid, args[:args_len]))
|
||||
|
||||
if options.signal:
|
||||
try:
|
||||
signum = int(options.signal)
|
||||
except ValueError:
|
||||
signum = getattr(signal, options.signal.upper(),
|
||||
getattr(signal, 'SIG' + options.signal.upper(),
|
||||
None))
|
||||
if not signum:
|
||||
sys.exit('Could not translate %r to a signal number.' %
|
||||
options.signal)
|
||||
print('Sending processes %s (%d) signal...' % (options.signal, signum),
|
||||
end='')
|
||||
for hours, pid, args in listing:
|
||||
os.kill(int(pid), signum)
|
||||
print('Done.')
|
||||
orphans.main()
|
||||
|
@ -11,65 +11,7 @@
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from __future__ import print_function
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
|
||||
import eventlet.debug
|
||||
eventlet.debug.hub_exceptions(True)
|
||||
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.utils import split_path
|
||||
from swift.common.storage_policy import POLICIES
|
||||
|
||||
from swift.container.reconciler import add_to_reconciler_queue
|
||||
"""
|
||||
This tool is primarily for debugging and development but can be used an example
|
||||
of how an operator could enqueue objects manually if a problem is discovered -
|
||||
might be particularly useful if you need to hack a fix into the reconciler
|
||||
and re-run it.
|
||||
"""
|
||||
|
||||
USAGE = """
|
||||
%prog <policy_index> </a/c/o> <timestamp> [options]
|
||||
|
||||
This script enqueues an object to be evaluated by the reconciler.
|
||||
|
||||
Arguments:
|
||||
policy_index: the policy the object is currently stored in.
|
||||
/a/c/o: the full path of the object - utf-8
|
||||
timestamp: the timestamp of the datafile/tombstone.
|
||||
|
||||
""".strip()
|
||||
|
||||
parser = OptionParser(USAGE)
|
||||
parser.add_option('-X', '--op', default='PUT', choices=('PUT', 'DELETE'),
|
||||
help='the method of the misplaced operation')
|
||||
parser.add_option('-f', '--force', action='store_true',
|
||||
help='force an object to be re-enqueued')
|
||||
|
||||
|
||||
def main():
|
||||
options, args = parser.parse_args()
|
||||
try:
|
||||
policy_index, path, timestamp = args
|
||||
except ValueError:
|
||||
sys.exit(parser.print_help())
|
||||
container_ring = Ring('/etc/swift/container.ring.gz')
|
||||
policy = POLICIES.get_by_index(policy_index)
|
||||
if not policy:
|
||||
return 'ERROR: invalid storage policy index: %s' % policy
|
||||
try:
|
||||
account, container, obj = split_path(path, 3, 3, True)
|
||||
except ValueError as e:
|
||||
return 'ERROR: %s' % e
|
||||
container_name = add_to_reconciler_queue(
|
||||
container_ring, account, container, obj,
|
||||
policy.idx, timestamp, options.op, force=options.force)
|
||||
if not container_name:
|
||||
return 'ERROR: unable to enqueue!'
|
||||
print(container_name)
|
||||
|
||||
from swift.cli import reconciler_enqueue
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
reconciler_enqueue.main()
|
||||
|
17
setup.cfg
17
setup.cfg
@ -40,16 +40,6 @@ skip_reno = True
|
||||
packages =
|
||||
swift
|
||||
|
||||
[files]
|
||||
scripts =
|
||||
bin/swift-account-audit
|
||||
bin/swift-config
|
||||
bin/swift-reconciler-enqueue
|
||||
bin/swift-drive-audit
|
||||
bin/swift-get-nodes
|
||||
bin/swift-oldies
|
||||
bin/swift-orphans
|
||||
|
||||
[extras]
|
||||
kms_keymaster =
|
||||
oslo.config>=4.0.0,!=4.3.0,!=4.4.0 # Apache-2.0
|
||||
@ -63,11 +53,13 @@ keystone =
|
||||
|
||||
[entry_points]
|
||||
console_scripts =
|
||||
swift-account-audit = swift.cli.account_audit:main
|
||||
swift-account-auditor = swift.account.auditor:main
|
||||
swift-account-info = swift.cli.info:account_main
|
||||
swift-account-reaper = swift.account.reaper:main
|
||||
swift-account-replicator = swift.account.replicator:main
|
||||
swift-account-server = swift.account.server:main
|
||||
swift-config = swift.cli.config:main
|
||||
swift-container-auditor = swift.container.auditor:main
|
||||
swift-container-deleter = swift.cli.container_deleter:main
|
||||
swift-container-info = swift.cli.info:container_main
|
||||
@ -79,7 +71,9 @@ console_scripts =
|
||||
swift-container-updater = swift.container.updater:main
|
||||
swift-dispersion-populate = swift.cli.dispersion_populate:main
|
||||
swift-dispersion-report = swift.cli.dispersion_report:main
|
||||
swift-drive-audit = swift.cli.drive_audit:main
|
||||
swift-form-signature = swift.cli.form_signature:main
|
||||
swift-get-nodes = swift.cli.get_nodes:main
|
||||
swift-init = swift.common.manager:main
|
||||
swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main
|
||||
swift-object-auditor = swift.obj.auditor:main
|
||||
@ -90,9 +84,12 @@ console_scripts =
|
||||
swift-object-replicator = swift.obj.replicator:main
|
||||
swift-object-server = swift.obj.server:main
|
||||
swift-object-updater = swift.obj.updater:main
|
||||
swift-oldies = swift.cli.oldies:main
|
||||
swift-orphans = swift.cli.orphans:main
|
||||
swift-proxy-server = swift.proxy.server:main
|
||||
swift-recon = swift.cli.recon:main
|
||||
swift-recon-cron = swift.cli.recon_cron:main
|
||||
swift-reconciler-enqueue = swift.cli.reconciler_enqueue:main
|
||||
swift-reload = swift.cli.reload:main
|
||||
swift-ring-builder = swift.cli.ringbuilder:error_handling_main
|
||||
swift-ring-builder-analyzer = swift.cli.ring_builder_analyzer:main
|
||||
|
390
swift/cli/account_audit.py
Executable file
390
swift/cli/account_audit.py
Executable file
@ -0,0 +1,390 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (c) 2010-2012 OpenStack Foundation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import sys
|
||||
from hashlib import md5
|
||||
import getopt
|
||||
from itertools import chain
|
||||
|
||||
import json
|
||||
from eventlet.greenpool import GreenPool
|
||||
from eventlet.event import Event
|
||||
from six.moves.urllib.parse import quote
|
||||
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.utils import split_path
|
||||
from swift.common.bufferedhttp import http_connect
|
||||
|
||||
|
||||
usage = """
|
||||
Usage!
|
||||
|
||||
%(cmd)s [options] [url 1] [url 2] ...
|
||||
-c [concurrency] Set the concurrency, default 50
|
||||
-r [ring dir] Ring locations, default /etc/swift
|
||||
-e [filename] File for writing a list of inconsistent urls
|
||||
-d Also download files and verify md5
|
||||
|
||||
You can also feed a list of urls to the script through stdin.
|
||||
|
||||
Examples!
|
||||
|
||||
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076
|
||||
%(cmd)s AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container/object
|
||||
%(cmd)s -e errors.txt AUTH_88ad0b83-b2c5-4fa1-b2d6-60c597202076/container
|
||||
%(cmd)s < errors.txt
|
||||
%(cmd)s -c 25 -d < errors.txt
|
||||
""" % {'cmd': sys.argv[0]}
|
||||
|
||||
|
||||
class Auditor(object):
|
||||
def __init__(self, swift_dir='/etc/swift', concurrency=50, deep=False,
|
||||
error_file=None):
|
||||
self.pool = GreenPool(concurrency)
|
||||
self.object_ring = Ring(swift_dir, ring_name='object')
|
||||
self.container_ring = Ring(swift_dir, ring_name='container')
|
||||
self.account_ring = Ring(swift_dir, ring_name='account')
|
||||
self.deep = deep
|
||||
self.error_file = error_file
|
||||
# zero out stats
|
||||
self.accounts_checked = self.account_exceptions = \
|
||||
self.account_not_found = self.account_container_mismatch = \
|
||||
self.account_object_mismatch = self.objects_checked = \
|
||||
self.object_exceptions = self.object_not_found = \
|
||||
self.object_checksum_mismatch = self.containers_checked = \
|
||||
self.container_exceptions = self.container_count_mismatch = \
|
||||
self.container_not_found = self.container_obj_mismatch = 0
|
||||
self.list_cache = {}
|
||||
self.in_progress = {}
|
||||
|
||||
def audit_object(self, account, container, name):
|
||||
path = '/%s/%s/%s' % (account, container, name)
|
||||
part, nodes = self.object_ring.get_nodes(
|
||||
account, container.encode('utf-8'), name.encode('utf-8'))
|
||||
container_listing = self.audit_container(account, container)
|
||||
consistent = True
|
||||
if name not in container_listing:
|
||||
print(" Object %s missing in container listing!" % path)
|
||||
consistent = False
|
||||
hash = None
|
||||
else:
|
||||
hash = container_listing[name]['hash']
|
||||
etags = []
|
||||
for node in nodes:
|
||||
try:
|
||||
if self.deep:
|
||||
conn = http_connect(node['ip'], node['port'],
|
||||
node['device'], part, 'GET', path, {})
|
||||
resp = conn.getresponse()
|
||||
calc_hash = md5()
|
||||
chunk = True
|
||||
while chunk:
|
||||
chunk = resp.read(8192)
|
||||
calc_hash.update(chunk)
|
||||
calc_hash = calc_hash.hexdigest()
|
||||
if resp.status // 100 != 2:
|
||||
self.object_not_found += 1
|
||||
consistent = False
|
||||
print(' Bad status %s GETting object "%s" on %s/%s'
|
||||
% (resp.status, path,
|
||||
node['ip'], node['device']))
|
||||
continue
|
||||
if resp.getheader('ETag').strip('"') != calc_hash:
|
||||
self.object_checksum_mismatch += 1
|
||||
consistent = False
|
||||
print(' MD5 does not match etag for "%s" on %s/%s'
|
||||
% (path, node['ip'], node['device']))
|
||||
else:
|
||||
conn = http_connect(node['ip'], node['port'],
|
||||
node['device'], part, 'HEAD',
|
||||
path.encode('utf-8'), {})
|
||||
resp = conn.getresponse()
|
||||
if resp.status // 100 != 2:
|
||||
self.object_not_found += 1
|
||||
consistent = False
|
||||
print(' Bad status %s HEADing object "%s" on %s/%s'
|
||||
% (resp.status, path,
|
||||
node['ip'], node['device']))
|
||||
continue
|
||||
|
||||
override_etag = resp.getheader(
|
||||
'X-Object-Sysmeta-Container-Update-Override-Etag')
|
||||
if override_etag:
|
||||
etags.append((override_etag, node))
|
||||
else:
|
||||
etags.append((resp.getheader('ETag'), node))
|
||||
except Exception:
|
||||
self.object_exceptions += 1
|
||||
consistent = False
|
||||
print(' Exception fetching object "%s" on %s/%s'
|
||||
% (path, node['ip'], node['device']))
|
||||
continue
|
||||
if not etags:
|
||||
consistent = False
|
||||
print(" Failed fo fetch object %s at all!" % path)
|
||||
elif hash:
|
||||
for etag, node in etags:
|
||||
if etag.strip('"') != hash:
|
||||
consistent = False
|
||||
self.object_checksum_mismatch += 1
|
||||
print(' ETag mismatch for "%s" on %s/%s'
|
||||
% (path, node['ip'], node['device']))
|
||||
if not consistent and self.error_file:
|
||||
with open(self.error_file, 'a') as err_file:
|
||||
print(path, file=err_file)
|
||||
self.objects_checked += 1
|
||||
|
||||
def audit_container(self, account, name, recurse=False):
|
||||
if (account, name) in self.in_progress:
|
||||
self.in_progress[(account, name)].wait()
|
||||
if (account, name) in self.list_cache:
|
||||
return self.list_cache[(account, name)]
|
||||
self.in_progress[(account, name)] = Event()
|
||||
print('Auditing container "%s"' % name)
|
||||
path = '/%s/%s' % (account, name)
|
||||
account_listing = self.audit_account(account)
|
||||
consistent = True
|
||||
if name not in account_listing:
|
||||
consistent = False
|
||||
print(" Container %s not in account listing!" % path)
|
||||
part, nodes = \
|
||||
self.container_ring.get_nodes(account, name.encode('utf-8'))
|
||||
rec_d = {}
|
||||
responses = {}
|
||||
for node in nodes:
|
||||
marker = ''
|
||||
results = True
|
||||
while results:
|
||||
try:
|
||||
conn = http_connect(node['ip'], node['port'],
|
||||
node['device'], part, 'GET',
|
||||
path.encode('utf-8'), {},
|
||||
'format=json&marker=%s' %
|
||||
quote(marker.encode('utf-8')))
|
||||
resp = conn.getresponse()
|
||||
if resp.status // 100 != 2:
|
||||
self.container_not_found += 1
|
||||
consistent = False
|
||||
print(' Bad status GETting container "%s" on %s/%s' %
|
||||
(path, node['ip'], node['device']))
|
||||
break
|
||||
if node['id'] not in responses:
|
||||
responses[node['id']] = {
|
||||
h.lower(): v for h, v in resp.getheaders()}
|
||||
results = json.loads(resp.read())
|
||||
except Exception:
|
||||
self.container_exceptions += 1
|
||||
consistent = False
|
||||
print(' Exception GETting container "%s" on %s/%s' %
|
||||
(path, node['ip'], node['device']))
|
||||
break
|
||||
if results:
|
||||
marker = results[-1]['name']
|
||||
for obj in results:
|
||||
obj_name = obj['name']
|
||||
if obj_name not in rec_d:
|
||||
rec_d[obj_name] = obj
|
||||
if (obj['last_modified'] !=
|
||||
rec_d[obj_name]['last_modified']):
|
||||
self.container_obj_mismatch += 1
|
||||
consistent = False
|
||||
print(" Different versions of %s/%s "
|
||||
"in container dbs." % (name, obj['name']))
|
||||
if (obj['last_modified'] >
|
||||
rec_d[obj_name]['last_modified']):
|
||||
rec_d[obj_name] = obj
|
||||
obj_counts = [int(header['x-container-object-count'])
|
||||
for header in responses.values()]
|
||||
if not obj_counts:
|
||||
consistent = False
|
||||
print(" Failed to fetch container %s at all!" % path)
|
||||
else:
|
||||
if len(set(obj_counts)) != 1:
|
||||
self.container_count_mismatch += 1
|
||||
consistent = False
|
||||
print(
|
||||
" Container databases don't agree on number of objects.")
|
||||
print(
|
||||
" Max: %s, Min: %s" % (max(obj_counts), min(obj_counts)))
|
||||
self.containers_checked += 1
|
||||
self.list_cache[(account, name)] = rec_d
|
||||
self.in_progress[(account, name)].send(True)
|
||||
del self.in_progress[(account, name)]
|
||||
if recurse:
|
||||
for obj in rec_d.keys():
|
||||
self.pool.spawn_n(self.audit_object, account, name, obj)
|
||||
if not consistent and self.error_file:
|
||||
with open(self.error_file, 'a') as error_file:
|
||||
print(path, file=error_file)
|
||||
return rec_d
|
||||
|
||||
def audit_account(self, account, recurse=False):
|
||||
if account in self.in_progress:
|
||||
self.in_progress[account].wait()
|
||||
if account in self.list_cache:
|
||||
return self.list_cache[account]
|
||||
self.in_progress[account] = Event()
|
||||
print('Auditing account "%s"' % account)
|
||||
consistent = True
|
||||
path = '/%s' % account
|
||||
part, nodes = self.account_ring.get_nodes(account)
|
||||
responses = {}
|
||||
for node in nodes:
|
||||
marker = ''
|
||||
results = True
|
||||
while results:
|
||||
node_id = node['id']
|
||||
try:
|
||||
conn = http_connect(node['ip'], node['port'],
|
||||
node['device'], part, 'GET', path, {},
|
||||
'format=json&marker=%s' %
|
||||
quote(marker.encode('utf-8')))
|
||||
resp = conn.getresponse()
|
||||
if resp.status // 100 != 2:
|
||||
self.account_not_found += 1
|
||||
consistent = False
|
||||
print(" Bad status GETting account '%s' "
|
||||
" from %s:%s" %
|
||||
(account, node['ip'], node['device']))
|
||||
break
|
||||
results = json.loads(resp.read())
|
||||
except Exception:
|
||||
self.account_exceptions += 1
|
||||
consistent = False
|
||||
print(" Exception GETting account '%s' on %s:%s" %
|
||||
(account, node['ip'], node['device']))
|
||||
break
|
||||
if node_id not in responses:
|
||||
responses[node_id] = [
|
||||
{h.lower(): v for h, v in resp.getheaders()}, []]
|
||||
responses[node_id][1].extend(results)
|
||||
if results:
|
||||
marker = results[-1]['name']
|
||||
headers = [r[0] for r in responses.values()]
|
||||
cont_counts = [int(header['x-account-container-count'])
|
||||
for header in headers]
|
||||
if len(set(cont_counts)) != 1:
|
||||
self.account_container_mismatch += 1
|
||||
consistent = False
|
||||
print(" Account databases for '%s' don't agree on"
|
||||
" number of containers." % account)
|
||||
if cont_counts:
|
||||
print(" Max: %s, Min: %s" % (max(cont_counts),
|
||||
min(cont_counts)))
|
||||
obj_counts = [int(header['x-account-object-count'])
|
||||
for header in headers]
|
||||
if len(set(obj_counts)) != 1:
|
||||
self.account_object_mismatch += 1
|
||||
consistent = False
|
||||
print(" Account databases for '%s' don't agree on"
|
||||
" number of objects." % account)
|
||||
if obj_counts:
|
||||
print(" Max: %s, Min: %s" % (max(obj_counts),
|
||||
min(obj_counts)))
|
||||
containers = set()
|
||||
for resp in responses.values():
|
||||
containers.update(container['name'] for container in resp[1])
|
||||
self.list_cache[account] = containers
|
||||
self.in_progress[account].send(True)
|
||||
del self.in_progress[account]
|
||||
self.accounts_checked += 1
|
||||
if recurse:
|
||||
for container in containers:
|
||||
self.pool.spawn_n(self.audit_container, account,
|
||||
container, True)
|
||||
if not consistent and self.error_file:
|
||||
with open(self.error_file, 'a') as error_file:
|
||||
print(path, error_file)
|
||||
return containers
|
||||
|
||||
def audit(self, account, container=None, obj=None):
|
||||
if obj and container:
|
||||
self.pool.spawn_n(self.audit_object, account, container, obj)
|
||||
elif container:
|
||||
self.pool.spawn_n(self.audit_container, account, container, True)
|
||||
else:
|
||||
self.pool.spawn_n(self.audit_account, account, True)
|
||||
|
||||
def wait(self):
|
||||
self.pool.waitall()
|
||||
|
||||
def print_stats(self):
|
||||
|
||||
def _print_stat(name, stat):
|
||||
# Right align stat name in a field of 18 characters
|
||||
print("{0:>18}: {1}".format(name, stat))
|
||||
|
||||
print()
|
||||
_print_stat("Accounts checked", self.accounts_checked)
|
||||
if self.account_not_found:
|
||||
_print_stat("Missing Replicas", self.account_not_found)
|
||||
if self.account_exceptions:
|
||||
_print_stat("Exceptions", self.account_exceptions)
|
||||
if self.account_container_mismatch:
|
||||
_print_stat("Container mismatch", self.account_container_mismatch)
|
||||
if self.account_object_mismatch:
|
||||
_print_stat("Object mismatch", self.account_object_mismatch)
|
||||
print()
|
||||
_print_stat("Containers checked", self.containers_checked)
|
||||
if self.container_not_found:
|
||||
_print_stat("Missing Replicas", self.container_not_found)
|
||||
if self.container_exceptions:
|
||||
_print_stat("Exceptions", self.container_exceptions)
|
||||
if self.container_count_mismatch:
|
||||
_print_stat("Count mismatch", self.container_count_mismatch)
|
||||
if self.container_obj_mismatch:
|
||||
_print_stat("Object mismatch", self.container_obj_mismatch)
|
||||
print()
|
||||
_print_stat("Objects checked", self.objects_checked)
|
||||
if self.object_not_found:
|
||||
_print_stat("Missing Replicas", self.object_not_found)
|
||||
if self.object_exceptions:
|
||||
_print_stat("Exceptions", self.object_exceptions)
|
||||
if self.object_checksum_mismatch:
|
||||
_print_stat("MD5 Mismatch", self.object_checksum_mismatch)
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
optlist, args = getopt.getopt(sys.argv[1:], 'c:r:e:d')
|
||||
except getopt.GetoptError as err:
|
||||
print(str(err))
|
||||
print(usage)
|
||||
sys.exit(2)
|
||||
if not args and os.isatty(sys.stdin.fileno()):
|
||||
print(usage)
|
||||
sys.exit()
|
||||
opts = dict(optlist)
|
||||
options = {
|
||||
'concurrency': int(opts.get('-c', 50)),
|
||||
'error_file': opts.get('-e', None),
|
||||
'swift_dir': opts.get('-r', '/etc/swift'),
|
||||
'deep': '-d' in opts,
|
||||
}
|
||||
auditor = Auditor(**options)
|
||||
if not os.isatty(sys.stdin.fileno()):
|
||||
args = chain(args, sys.stdin)
|
||||
for path in args:
|
||||
path = '/' + path.rstrip('\r\n').lstrip('/')
|
||||
auditor.audit(*split_path(path, 1, 3, True))
|
||||
auditor.wait()
|
||||
auditor.print_stats()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
91
swift/cli/config.py
Executable file
91
swift/cli/config.py
Executable file
@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import optparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
from swift.common.manager import Server
|
||||
from swift.common.utils import readconf
|
||||
from swift.common.wsgi import appconfig
|
||||
|
||||
parser = optparse.OptionParser('%prog [options] SERVER')
|
||||
parser.add_option('-c', '--config-num', metavar="N", type="int",
|
||||
dest="number", default=0,
|
||||
help="parse config for the Nth server only")
|
||||
parser.add_option('-s', '--section', help="only display matching sections")
|
||||
parser.add_option('-w', '--wsgi', action='store_true',
|
||||
help="use wsgi/paste parser instead of readconf")
|
||||
|
||||
|
||||
def _context_name(context):
|
||||
return ':'.join((context.object_type.name, context.name))
|
||||
|
||||
|
||||
def inspect_app_config(app_config):
|
||||
conf = {}
|
||||
context = app_config.context
|
||||
section_name = _context_name(context)
|
||||
conf[section_name] = context.config()
|
||||
if context.object_type.name == 'pipeline':
|
||||
filters = context.filter_contexts
|
||||
pipeline = []
|
||||
for filter_context in filters:
|
||||
conf[_context_name(filter_context)] = filter_context.config()
|
||||
pipeline.append(filter_context.entry_point_name)
|
||||
app_context = context.app_context
|
||||
conf[_context_name(app_context)] = app_context.config()
|
||||
pipeline.append(app_context.entry_point_name)
|
||||
conf[section_name]['pipeline'] = ' '.join(pipeline)
|
||||
return conf
|
||||
|
||||
|
||||
def main():
|
||||
options, args = parser.parse_args()
|
||||
options = dict(vars(options))
|
||||
|
||||
if not args:
|
||||
return 'ERROR: specify type of server or conf_path'
|
||||
conf_files = []
|
||||
for arg in args:
|
||||
if os.path.exists(arg):
|
||||
conf_files.append(arg)
|
||||
else:
|
||||
conf_files += Server(arg).conf_files(**options)
|
||||
for conf_file in conf_files:
|
||||
print('# %s' % conf_file)
|
||||
if options['wsgi']:
|
||||
app_config = appconfig(conf_file)
|
||||
conf = inspect_app_config(app_config)
|
||||
else:
|
||||
conf = readconf(conf_file)
|
||||
flat_vars = {}
|
||||
for k, v in conf.items():
|
||||
if options['section'] and k != options['section']:
|
||||
continue
|
||||
if not isinstance(v, dict):
|
||||
flat_vars[k] = v
|
||||
continue
|
||||
print('[%s]' % k)
|
||||
for opt, value in v.items():
|
||||
print('%s = %s' % (opt, value))
|
||||
print()
|
||||
for k, v in flat_vars.items():
|
||||
print('# %s = %s' % (k, v))
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
268
swift/cli/drive_audit.py
Executable file
268
swift/cli/drive_audit.py
Executable file
@ -0,0 +1,268 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (c) 2010-2012 OpenStack Foundation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import datetime
|
||||
import glob
|
||||
import locale
|
||||
import os
|
||||
import os.path
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
import six
|
||||
from six.moves.configparser import ConfigParser
|
||||
|
||||
from swift.common.utils import backward, get_logger, dump_recon_cache, \
|
||||
config_true_value
|
||||
|
||||
|
||||
def get_devices(device_dir, logger):
|
||||
devices = []
|
||||
majmin_devices = {}
|
||||
|
||||
# List /dev/block
|
||||
# Using os.scandir on recent versions of python, else os.listdir
|
||||
if 'scandir' in dir(os):
|
||||
with os.scandir("/dev/block") as it:
|
||||
for ent in it:
|
||||
if ent.is_symlink():
|
||||
dev_name = os.path.basename(os.readlink(ent.path))
|
||||
majmin = os.path.basename(ent.path).split(':')
|
||||
majmin_devices[dev_name] = {'major': majmin[0],
|
||||
'minor': majmin[1]}
|
||||
else:
|
||||
for ent in os.listdir("/dev/block"):
|
||||
ent_path = os.path.join("/dev/block", ent)
|
||||
if os.path.is_symlink(ent_path):
|
||||
dev_name = os.path.basename(os.readlink(ent_path))
|
||||
majmin = os.path.basename(ent_path).split(':')
|
||||
majmin_devices[dev_name] = {'major': majmin[0],
|
||||
'minor': majmin[1]}
|
||||
|
||||
for line in open('/proc/mounts').readlines():
|
||||
data = line.strip().split()
|
||||
block_device = data[0]
|
||||
mount_point = data[1]
|
||||
if mount_point.startswith(device_dir):
|
||||
device = {}
|
||||
device['mount_point'] = mount_point
|
||||
device['block_device'] = block_device
|
||||
dev_name = os.path.basename(block_device)
|
||||
if dev_name in majmin_devices:
|
||||
# If symlink is in /dev/block
|
||||
device['major'] = majmin_devices[dev_name]['major']
|
||||
device['minor'] = majmin_devices[dev_name]['minor']
|
||||
else:
|
||||
# Else we try to stat block_device
|
||||
try:
|
||||
device_num = os.stat(block_device).st_rdev
|
||||
except OSError:
|
||||
# If we can't stat the device,
|
||||
# then something weird is going on
|
||||
logger.error(
|
||||
'Could not determine major:minor numbers for %s '
|
||||
'(mounted at %s)! Skipping...',
|
||||
block_device, mount_point)
|
||||
continue
|
||||
device['major'] = str(os.major(device_num))
|
||||
device['minor'] = str(os.minor(device_num))
|
||||
devices.append(device)
|
||||
for line in open('/proc/partitions').readlines()[2:]:
|
||||
major, minor, blocks, kernel_device = line.strip().split()
|
||||
device = [d for d in devices
|
||||
if d['major'] == major and d['minor'] == minor]
|
||||
if device:
|
||||
device[0]['kernel_device'] = kernel_device
|
||||
return devices
|
||||
|
||||
|
||||
def get_errors(error_re, log_file_pattern, minutes, logger,
|
||||
log_file_encoding):
|
||||
# Assuming log rotation is being used, we need to examine
|
||||
# recently rotated files in case the rotation occurred
|
||||
# just before the script is being run - the data we are
|
||||
# looking for may have rotated.
|
||||
#
|
||||
# The globbing used before would not work with all out-of-box
|
||||
# distro setup for logrotate and syslog therefore moving this
|
||||
# to the config where one can set it with the desired
|
||||
# globbing pattern.
|
||||
log_files = [f for f in glob.glob(log_file_pattern)]
|
||||
try:
|
||||
log_files.sort(key=lambda f: os.stat(f).st_mtime, reverse=True)
|
||||
except (IOError, OSError) as exc:
|
||||
logger.error(exc)
|
||||
print(exc)
|
||||
sys.exit(1)
|
||||
|
||||
now_time = datetime.datetime.now()
|
||||
end_time = now_time - datetime.timedelta(minutes=minutes)
|
||||
# kern.log does not contain the year so we need to keep
|
||||
# track of the year and month in case the year recently
|
||||
# ticked over
|
||||
year = now_time.year
|
||||
prev_ent_month = now_time.strftime('%b')
|
||||
errors = {}
|
||||
|
||||
reached_old_logs = False
|
||||
for path in log_files:
|
||||
try:
|
||||
f = open(path, 'rb')
|
||||
except IOError:
|
||||
logger.error("Error: Unable to open " + path)
|
||||
print("Unable to open " + path)
|
||||
sys.exit(1)
|
||||
for line in backward(f):
|
||||
if not six.PY2:
|
||||
line = line.decode(log_file_encoding, 'surrogateescape')
|
||||
if '[ 0.000000]' in line \
|
||||
or 'KERNEL supported cpus:' in line \
|
||||
or 'BIOS-provided physical RAM map:' in line:
|
||||
# Ignore anything before the last boot
|
||||
reached_old_logs = True
|
||||
break
|
||||
# Solves the problem with year change - kern.log does not
|
||||
# keep track of the year.
|
||||
log_time_ent = line.split()[:3]
|
||||
if log_time_ent[0] == 'Dec' and prev_ent_month == 'Jan':
|
||||
year -= 1
|
||||
prev_ent_month = log_time_ent[0]
|
||||
log_time_string = '%d %s' % (year, ' '.join(log_time_ent))
|
||||
try:
|
||||
log_time = datetime.datetime.strptime(
|
||||
log_time_string, '%Y %b %d %H:%M:%S')
|
||||
except ValueError:
|
||||
# Some versions use ISO timestamps instead
|
||||
try:
|
||||
log_time = datetime.datetime.strptime(
|
||||
line[0:19], '%Y-%m-%dT%H:%M:%S')
|
||||
except ValueError:
|
||||
continue
|
||||
if log_time > end_time:
|
||||
for err in error_re:
|
||||
for device in err.findall(line):
|
||||
errors[device] = errors.get(device, 0) + 1
|
||||
else:
|
||||
reached_old_logs = True
|
||||
break
|
||||
if reached_old_logs:
|
||||
break
|
||||
return errors
|
||||
|
||||
|
||||
def comment_fstab(mount_point):
|
||||
with open('/etc/fstab', 'r') as fstab:
|
||||
with open('/etc/fstab.new', 'w') as new_fstab:
|
||||
for line in fstab:
|
||||
parts = line.split()
|
||||
if len(parts) > 2 \
|
||||
and parts[1] == mount_point \
|
||||
and not line.startswith('#'):
|
||||
new_fstab.write('#' + line)
|
||||
else:
|
||||
new_fstab.write(line)
|
||||
os.rename('/etc/fstab.new', '/etc/fstab')
|
||||
|
||||
|
||||
def main():
|
||||
c = ConfigParser()
|
||||
try:
|
||||
conf_path = sys.argv[1]
|
||||
except Exception:
|
||||
print("Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1])
|
||||
sys.exit(1)
|
||||
if not c.read(conf_path):
|
||||
print("Unable to read config file %s" % conf_path)
|
||||
sys.exit(1)
|
||||
conf = dict(c.items('drive-audit'))
|
||||
device_dir = conf.get('device_dir', '/srv/node')
|
||||
minutes = int(conf.get('minutes', 60))
|
||||
error_limit = int(conf.get('error_limit', 1))
|
||||
recon_cache_path = conf.get('recon_cache_path', "/var/cache/swift")
|
||||
log_file_pattern = conf.get('log_file_pattern',
|
||||
'/var/log/kern.*[!.][!g][!z]')
|
||||
log_file_encoding = conf.get('log_file_encoding', 'auto')
|
||||
if log_file_encoding == 'auto':
|
||||
log_file_encoding = locale.getpreferredencoding()
|
||||
log_to_console = config_true_value(conf.get('log_to_console', False))
|
||||
error_re = []
|
||||
for conf_key in conf:
|
||||
if conf_key.startswith('regex_pattern_'):
|
||||
error_pattern = conf[conf_key]
|
||||
try:
|
||||
r = re.compile(error_pattern)
|
||||
except re.error:
|
||||
sys.exit('Error: unable to compile regex pattern "%s"' %
|
||||
error_pattern)
|
||||
error_re.append(r)
|
||||
if not error_re:
|
||||
error_re = [
|
||||
re.compile(r'\berror\b.*\b(sd[a-z]{1,2}\d?)\b'),
|
||||
re.compile(r'\b(sd[a-z]{1,2}\d?)\b.*\berror\b'),
|
||||
]
|
||||
conf['log_name'] = conf.get('log_name', 'drive-audit')
|
||||
logger = get_logger(conf, log_to_console=log_to_console,
|
||||
log_route='drive-audit')
|
||||
devices = get_devices(device_dir, logger)
|
||||
logger.debug("Devices found: %s" % str(devices))
|
||||
if not devices:
|
||||
logger.error("Error: No devices found!")
|
||||
recon_errors = {}
|
||||
total_errors = 0
|
||||
for device in devices:
|
||||
recon_errors[device['mount_point']] = 0
|
||||
errors = get_errors(error_re, log_file_pattern, minutes, logger,
|
||||
log_file_encoding)
|
||||
logger.debug("Errors found: %s" % str(errors))
|
||||
unmounts = 0
|
||||
for kernel_device, count in errors.items():
|
||||
if count >= error_limit:
|
||||
device = \
|
||||
[d for d in devices if d['kernel_device'] == kernel_device]
|
||||
if device:
|
||||
mount_point = device[0]['mount_point']
|
||||
if mount_point.startswith(device_dir):
|
||||
if config_true_value(conf.get('unmount_failed_device',
|
||||
True)):
|
||||
logger.info("Unmounting %s with %d errors" %
|
||||
(mount_point, count))
|
||||
subprocess.call(['umount', '-fl', mount_point])
|
||||
logger.info("Commenting out %s from /etc/fstab" %
|
||||
(mount_point))
|
||||
comment_fstab(mount_point)
|
||||
unmounts += 1
|
||||
else:
|
||||
logger.info("Detected %s with %d errors "
|
||||
"(Device not unmounted)" %
|
||||
(mount_point, count))
|
||||
recon_errors[mount_point] = count
|
||||
total_errors += count
|
||||
recon_file = recon_cache_path + "/drive.recon"
|
||||
dump_recon_cache(recon_errors, recon_file, logger)
|
||||
dump_recon_cache({'drive_audit_errors': total_errors}, recon_file, logger,
|
||||
set_owner=conf.get("user", "swift"))
|
||||
|
||||
if unmounts == 0:
|
||||
logger.info("No drives were unmounted")
|
||||
elif os.path.isdir("/run/systemd/system"):
|
||||
logger.debug("fstab updated, calling systemctl daemon-reload")
|
||||
subprocess.call(["/usr/bin/systemctl", "daemon-reload"])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
80
swift/cli/get_nodes.py
Executable file
80
swift/cli/get_nodes.py
Executable file
@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (c) 2010-2012 OpenStack Foundation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
from os.path import basename
|
||||
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.storage_policy import reload_storage_policies
|
||||
from swift.common.utils import set_swift_dir
|
||||
from swift.cli.info import (parse_get_node_args, print_item_locations,
|
||||
InfoSystemExit)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
usage = '''
|
||||
Shows the nodes responsible for the item specified.
|
||||
Usage: %prog [-a] <ring.gz> <account> [<container> [<object>]]
|
||||
Or: %prog [-a] <ring.gz> -p partition
|
||||
Or: %prog [-a] -P policy_name <account> [<container> [<object>]]
|
||||
Or: %prog [-a] -P policy_name -p partition
|
||||
Note: account, container, object can also be a single arg separated by /
|
||||
Example:
|
||||
$ %prog -a /etc/swift/account.ring.gz MyAccount
|
||||
Partition 5743883
|
||||
Hash 96ae332a60b58910784e4417a03e1ad0
|
||||
10.1.1.7:8000 sdd1
|
||||
10.1.9.2:8000 sdb1
|
||||
10.1.5.5:8000 sdf1
|
||||
10.1.5.9:8000 sdt1 # [Handoff]
|
||||
'''
|
||||
parser = OptionParser(usage)
|
||||
parser.add_option('-a', '--all', action='store_true',
|
||||
help='Show all handoff nodes')
|
||||
parser.add_option('-p', '--partition', metavar='PARTITION',
|
||||
help='Show nodes for a given partition')
|
||||
parser.add_option('-P', '--policy-name', dest='policy_name',
|
||||
help='Specify which policy to use')
|
||||
parser.add_option('-d', '--swift-dir', default='/etc/swift',
|
||||
dest='swift_dir', help='Path to swift directory')
|
||||
parser.add_option('-Q', '--quoted', action='store_true',
|
||||
help='Assume swift paths are quoted')
|
||||
options, args = parser.parse_args()
|
||||
|
||||
if set_swift_dir(options.swift_dir):
|
||||
reload_storage_policies()
|
||||
|
||||
try:
|
||||
ring_path, args = parse_get_node_args(options, args)
|
||||
except InfoSystemExit as e:
|
||||
parser.print_help()
|
||||
sys.exit('ERROR: %s' % e)
|
||||
|
||||
ring = ring_name = None
|
||||
if ring_path:
|
||||
ring_name = basename(ring_path)[:-len('.ring.gz')]
|
||||
ring = Ring(ring_path)
|
||||
|
||||
try:
|
||||
print_item_locations(ring, ring_name, *args, **vars(options))
|
||||
except InfoSystemExit:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
93
swift/cli/oldies.py
Executable file
93
swift/cli/oldies.py
Executable file
@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env python
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import optparse
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='''%prog [options]
|
||||
|
||||
Lists old Swift processes.
|
||||
'''.strip())
|
||||
parser.add_option('-a', '--age', dest='hours', type='int', default=720,
|
||||
help='look for processes at least HOURS old; '
|
||||
'default: 720 (30 days)')
|
||||
parser.add_option('-p', '--pids', action='store_true',
|
||||
help='only print the pids found; for example, to pipe '
|
||||
'to xargs kill')
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
listing = []
|
||||
for line in subprocess.Popen(
|
||||
['ps', '-eo', 'etime,pid,args', '--no-headers'],
|
||||
stdout=subprocess.PIPE).communicate()[0].split(b'\n'):
|
||||
if not line:
|
||||
continue
|
||||
hours = 0
|
||||
try:
|
||||
etime, pid, args = line.decode('ascii').split(None, 2)
|
||||
except ValueError:
|
||||
# This covers both decoding and not-enough-values-to-unpack errors
|
||||
sys.exit('Could not process ps line %r' % line)
|
||||
if not args.startswith((
|
||||
'/usr/bin/python /usr/bin/swift-',
|
||||
'/usr/bin/python /usr/local/bin/swift-',
|
||||
'/bin/python /usr/bin/swift-',
|
||||
'/usr/bin/python3 /usr/bin/swift-',
|
||||
'/usr/bin/python3 /usr/local/bin/swift-',
|
||||
'/bin/python3 /usr/bin/swift-')):
|
||||
continue
|
||||
args = args.split('-', 1)[1]
|
||||
etime = etime.split('-')
|
||||
if len(etime) == 2:
|
||||
hours = int(etime[0]) * 24
|
||||
etime = etime[1]
|
||||
elif len(etime) == 1:
|
||||
etime = etime[0]
|
||||
else:
|
||||
sys.exit('Could not process etime value from %r' % line)
|
||||
etime = etime.split(':')
|
||||
if len(etime) == 3:
|
||||
hours += int(etime[0])
|
||||
elif len(etime) != 2:
|
||||
sys.exit('Could not process etime value from %r' % line)
|
||||
if hours >= options.hours:
|
||||
listing.append((str(hours), pid, args))
|
||||
|
||||
if not listing:
|
||||
sys.exit()
|
||||
|
||||
if options.pids:
|
||||
for hours, pid, args in listing:
|
||||
print(pid)
|
||||
else:
|
||||
hours_len = len('Hours')
|
||||
pid_len = len('PID')
|
||||
args_len = len('Command')
|
||||
for hours, pid, args in listing:
|
||||
hours_len = max(hours_len, len(hours))
|
||||
pid_len = max(pid_len, len(pid))
|
||||
args_len = max(args_len, len(args))
|
||||
args_len = min(args_len, 78 - hours_len - pid_len)
|
||||
|
||||
print('%*s %*s %s' % (hours_len, 'Hours', pid_len, 'PID', 'Command'))
|
||||
for hours, pid, args in listing:
|
||||
print('%*s %*s %s' % (hours_len, hours, pid_len,
|
||||
pid, args[:args_len]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
137
swift/cli/orphans.py
Executable file
137
swift/cli/orphans.py
Executable file
@ -0,0 +1,137 @@
|
||||
#!/usr/bin/env python
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
import optparse
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from swift.common.manager import RUN_DIR
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser(usage='''%prog [options]
|
||||
|
||||
Lists and optionally kills orphaned Swift processes. This is done by scanning
|
||||
/var/run/swift for .pid files and listing any processes that look like Swift
|
||||
processes but aren't associated with the pids in those .pid files. Any Swift
|
||||
processes running with the 'once' parameter are ignored, as those are usually
|
||||
for full-speed audit scans and such.
|
||||
|
||||
Example (sends SIGTERM to all orphaned Swift processes older than two hours):
|
||||
%prog -a 2 -k TERM
|
||||
'''.strip())
|
||||
parser.add_option('-a', '--age', dest='hours', type='int', default=24,
|
||||
help="look for processes at least HOURS old; "
|
||||
"default: 24")
|
||||
parser.add_option('-k', '--kill', dest='signal',
|
||||
help='send SIGNAL to matched processes; default: just '
|
||||
'list process information')
|
||||
parser.add_option('-w', '--wide', dest='wide', default=False,
|
||||
action='store_true',
|
||||
help="don't clip the listing at 80 characters")
|
||||
parser.add_option('-r', '--run-dir', type="str",
|
||||
dest="run_dir", default=RUN_DIR,
|
||||
help="alternative directory to store running pid files "
|
||||
"default: %s" % RUN_DIR)
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
pids = []
|
||||
|
||||
for root, directories, files in os.walk(options.run_dir):
|
||||
for name in files:
|
||||
if name.endswith(('.pid', '.pid.d')):
|
||||
pids.append(open(os.path.join(root, name)).read().strip())
|
||||
pids.extend(subprocess.Popen(
|
||||
['ps', '--ppid', pids[-1], '-o', 'pid', '--no-headers'],
|
||||
stdout=subprocess.PIPE).communicate()[0].decode().split())
|
||||
|
||||
listing = []
|
||||
swift_cmd_re = re.compile(
|
||||
'^/usr/bin/python[23]? /usr(?:/local)?/bin/swift-')
|
||||
for line in subprocess.Popen(
|
||||
['ps', '-eo', 'etime,pid,args', '--no-headers'],
|
||||
stdout=subprocess.PIPE).communicate()[0].split(b'\n'):
|
||||
if not line:
|
||||
continue
|
||||
hours = 0
|
||||
try:
|
||||
etime, pid, args = line.decode('ascii').split(None, 2)
|
||||
except ValueError:
|
||||
sys.exit('Could not process ps line %r' % line)
|
||||
if pid in pids:
|
||||
continue
|
||||
if any([
|
||||
not swift_cmd_re.match(args),
|
||||
'swift-orphans' in args,
|
||||
'once' in args.split(),
|
||||
]):
|
||||
continue
|
||||
args = args.split('swift-', 1)[1]
|
||||
etime = etime.split('-')
|
||||
if len(etime) == 2:
|
||||
hours = int(etime[0]) * 24
|
||||
etime = etime[1]
|
||||
elif len(etime) == 1:
|
||||
etime = etime[0]
|
||||
else:
|
||||
sys.exit('Could not process etime value from %r' % line)
|
||||
etime = etime.split(':')
|
||||
if len(etime) == 3:
|
||||
hours += int(etime[0])
|
||||
elif len(etime) != 2:
|
||||
sys.exit('Could not process etime value from %r' % line)
|
||||
if hours >= options.hours:
|
||||
listing.append((str(hours), pid, args))
|
||||
|
||||
if not listing:
|
||||
sys.exit()
|
||||
|
||||
hours_len = len('Hours')
|
||||
pid_len = len('PID')
|
||||
args_len = len('Command')
|
||||
for hours, pid, args in listing:
|
||||
hours_len = max(hours_len, len(hours))
|
||||
pid_len = max(pid_len, len(pid))
|
||||
args_len = max(args_len, len(args))
|
||||
args_len = min(args_len, 78 - hours_len - pid_len)
|
||||
|
||||
print('%*s %*s %s' %
|
||||
(hours_len, 'Hours', pid_len, 'PID', 'Command'))
|
||||
for hours, pid, args in listing:
|
||||
print('%*s %*s %s' %
|
||||
(hours_len, hours, pid_len, pid, args[:args_len]))
|
||||
|
||||
if options.signal:
|
||||
try:
|
||||
signum = int(options.signal)
|
||||
except ValueError:
|
||||
signum = getattr(signal, options.signal.upper(),
|
||||
getattr(signal, 'SIG' + options.signal.upper(),
|
||||
None))
|
||||
if not signum:
|
||||
sys.exit('Could not translate %r to a signal number.' %
|
||||
options.signal)
|
||||
print('Sending processes %s (%d) signal...' % (options.signal, signum),
|
||||
end='')
|
||||
for hours, pid, args in listing:
|
||||
os.kill(int(pid), signum)
|
||||
print('Done.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
74
swift/cli/reconciler_enqueue.py
Normal file
74
swift/cli/reconciler_enqueue.py
Normal file
@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import sys
|
||||
from optparse import OptionParser
|
||||
|
||||
import eventlet.debug
|
||||
|
||||
from swift.common.ring import Ring
|
||||
from swift.common.utils import split_path
|
||||
from swift.common.storage_policy import POLICIES
|
||||
|
||||
from swift.container.reconciler import add_to_reconciler_queue
|
||||
"""
|
||||
This tool is primarily for debugging and development but can be used an example
|
||||
of how an operator could enqueue objects manually if a problem is discovered -
|
||||
might be particularly useful if you need to hack a fix into the reconciler
|
||||
and re-run it.
|
||||
"""
|
||||
|
||||
USAGE = """
|
||||
%prog <policy_index> </a/c/o> <timestamp> [options]
|
||||
|
||||
This script enqueues an object to be evaluated by the reconciler.
|
||||
|
||||
Arguments:
|
||||
policy_index: the policy the object is currently stored in.
|
||||
/a/c/o: the full path of the object - utf-8
|
||||
timestamp: the timestamp of the datafile/tombstone.
|
||||
|
||||
""".strip()
|
||||
|
||||
parser = OptionParser(USAGE)
|
||||
parser.add_option('-X', '--op', default='PUT', choices=('PUT', 'DELETE'),
|
||||
help='the method of the misplaced operation')
|
||||
parser.add_option('-f', '--force', action='store_true',
|
||||
help='force an object to be re-enqueued')
|
||||
|
||||
|
||||
def main():
|
||||
eventlet.debug.hub_exceptions(True)
|
||||
options, args = parser.parse_args()
|
||||
try:
|
||||
policy_index, path, timestamp = args
|
||||
except ValueError:
|
||||
sys.exit(parser.print_help())
|
||||
container_ring = Ring('/etc/swift/container.ring.gz')
|
||||
policy = POLICIES.get_by_index(policy_index)
|
||||
if not policy:
|
||||
return 'ERROR: invalid storage policy index: %s' % policy
|
||||
try:
|
||||
account, container, obj = split_path(path, 3, 3, True)
|
||||
except ValueError as e:
|
||||
return 'ERROR: %s' % e
|
||||
container_name = add_to_reconciler_queue(
|
||||
container_ring, account, container, obj,
|
||||
policy.idx, timestamp, options.op, force=options.force)
|
||||
if not container_name:
|
||||
return 'ERROR: unable to enqueue!'
|
||||
print(container_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
Loading…
x
Reference in New Issue
Block a user