127 lines
4.6 KiB
Python
Executable File
127 lines
4.6 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright (c) 2010-2011 OpenStack, LLC.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import datetime
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from ConfigParser import ConfigParser
|
|
|
|
from swift.common.utils import get_logger
|
|
|
|
# To search for more types of errors, add the regex to the list below
|
|
error_re = [
|
|
'error.*(sd[a-z])',
|
|
'(sd[a-z]).*error',
|
|
]
|
|
|
|
def get_devices(device_dir, logger):
|
|
devices = []
|
|
for line in open('/proc/mounts').readlines():
|
|
data = line.strip().split()
|
|
block_device = data[0]
|
|
mount_point = data[1]
|
|
if mount_point.startswith(device_dir):
|
|
device = {}
|
|
device['mount_point'] = mount_point
|
|
device['block_device'] = block_device
|
|
try:
|
|
device_num = os.stat(block_device).st_rdev
|
|
except OSError, e:
|
|
# If we can't stat the device, then something weird is going on
|
|
logger.error("Error: Could not stat %s!" %
|
|
block_device)
|
|
continue
|
|
device['major'] = str(os.major(device_num))
|
|
device['minor'] = str(os.minor(device_num))
|
|
devices.append(device)
|
|
for line in open('/proc/partitions').readlines()[2:]:
|
|
major,minor,blocks,kernel_device = line.strip().split()
|
|
device = [d for d in devices
|
|
if d['major'] == major and d['minor'] == minor]
|
|
if device:
|
|
device[0]['kernel_device'] = kernel_device
|
|
return devices
|
|
|
|
def get_errors(minutes):
|
|
errors = {}
|
|
start_time = datetime.datetime.now() - datetime.timedelta(minutes=minutes)
|
|
for line in open('/var/log/kern.log'):
|
|
if '[ 0.000000]' in line:
|
|
# Ignore anything before the last boot
|
|
errors = {}
|
|
continue
|
|
log_time_string = '%s %s' % (start_time.year,' '.join(line.split()[:3]))
|
|
log_time = datetime.datetime.strptime(
|
|
log_time_string,'%Y %b %d %H:%M:%S')
|
|
if log_time > start_time:
|
|
for err in error_re:
|
|
for device in re.findall(err,line):
|
|
errors[device] = errors.get(device,0) + 1
|
|
return errors
|
|
|
|
def comment_fstab(mount_point):
|
|
with open('/etc/fstab', 'r') as fstab:
|
|
with open('/etc/fstab.new', 'w') as new_fstab:
|
|
for line in fstab:
|
|
parts = line.split()
|
|
if len(parts) > 2 and line.split()[1] == mount_point:
|
|
new_fstab.write('#' + line)
|
|
else:
|
|
new_fstab.write(line)
|
|
os.rename('/etc/fstab.new', '/etc/fstab')
|
|
|
|
if __name__ == '__main__':
|
|
c = ConfigParser()
|
|
try:
|
|
conf_path = sys.argv[1]
|
|
except Exception:
|
|
print "Usage: %s CONF_FILE" % sys.argv[0].split('/')[-1]
|
|
sys.exit(1)
|
|
if not c.read(conf_path):
|
|
print "Unable to read config file %s" % conf_path
|
|
sys.exit(1)
|
|
conf = dict(c.items('drive-audit'))
|
|
device_dir = conf.get('device_dir', '/srv/node')
|
|
minutes = int(conf.get('minutes', 60))
|
|
error_limit = int(conf.get('error_limit', 1))
|
|
conf['log_name'] = conf.get('log_name', 'drive-audit')
|
|
logger = get_logger(conf, log_route='drive-audit')
|
|
devices = get_devices(device_dir, logger)
|
|
logger.debug("Devices found: %s" % str(devices))
|
|
if not devices:
|
|
logger.error("Error: No devices found!")
|
|
errors = get_errors(minutes)
|
|
logger.debug("Errors found: %s" % str(errors))
|
|
unmounts = 0
|
|
for kernel_device,count in errors.items():
|
|
if count >= error_limit:
|
|
device = [d for d in devices
|
|
if d['kernel_device'].startswith(kernel_device)]
|
|
if device:
|
|
mount_point = device[0]['mount_point']
|
|
if mount_point.startswith('/srv/node'):
|
|
logger.info("Unmounting %s with %d errors" %
|
|
(mount_point, count))
|
|
subprocess.call(['umount','-fl',mount_point])
|
|
logger.info("Commenting out %s from /etc/fstab" %
|
|
(mount_point))
|
|
comment_fstab(mount_point)
|
|
unmounts += 1
|
|
if unmounts == 0:
|
|
logger.info("No drives were unmounted")
|