dispersion report option to output missing parts

Adds a -p option to swift-dispersion-report that will output the
partitions missing copies to standard error. Another thing we've been
meaning to add for forever. It's useful when you want to do some
further research on whether a partition truly has fewer copies or if
they're just somewhere else in the cluster due to a ring change, for
instance.

DocImpact

Change-Id: I5e47aa5818483ecc34b39ef6f8cd83ad312ed9a0
This commit is contained in:
gholt 2012-10-25 18:27:16 +00:00
parent f9b49a8052
commit dcc89e3ad6
2 changed files with 36 additions and 8 deletions

View File

@ -66,7 +66,7 @@ def get_error_log(prefix):
def container_dispersion_report(coropool, connpool, account, container_ring, def container_dispersion_report(coropool, connpool, account, container_ring,
retries): retries, output_missing_partitions):
with connpool.item() as conn: with connpool.item() as conn:
containers = [c['name'] for c in conn.get_account(prefix='dispersion_', containers = [c['name'] for c in conn.get_account(prefix='dispersion_',
full_listing=True)[1]] full_listing=True)[1]]
@ -100,6 +100,13 @@ def container_dispersion_report(coropool, connpool, account, container_ring,
except (Exception, Timeout), err: except (Exception, Timeout), err:
error_log('Giving up on /%s/%s/%s: %s' % (part, account, error_log('Giving up on /%s/%s/%s: %s' % (part, account,
container, err)) container, err))
if output_missing_partitions and \
found_count < container_ring.replica_count:
missing = container_ring.replica_count - found_count
print '\r\x1B[K',
stdout.flush()
print >>stderr, '# Container partition %s missing %s cop%s' % (
part, missing, 'y' if missing == 1 else 'ies')
container_copies_found[found_count] += 1 container_copies_found[found_count] += 1
containers_queried[0] += 1 containers_queried[0] += 1
if time() >= next_report[0]: if time() >= next_report[0]:
@ -155,7 +162,7 @@ def container_dispersion_report(coropool, connpool, account, container_ring,
def object_dispersion_report(coropool, connpool, account, object_ring, def object_dispersion_report(coropool, connpool, account, object_ring,
retries): retries, output_missing_partitions):
container = 'dispersion_objects' container = 'dispersion_objects'
with connpool.item() as conn: with connpool.item() as conn:
try: try:
@ -198,6 +205,13 @@ def object_dispersion_report(coropool, connpool, account, object_ring,
except (Exception, Timeout), err: except (Exception, Timeout), err:
error_log('Giving up on /%s/%s/%s/%s: %s' % (part, account, error_log('Giving up on /%s/%s/%s/%s: %s' % (part, account,
container, obj, err)) container, obj, err))
if output_missing_partitions and \
found_count < object_ring.replica_count:
missing = object_ring.replica_count - found_count
print '\r\x1B[K',
stdout.flush()
print >>stderr, '# Object partition %s missing %s cop%s' % (
part, missing, 'y' if missing == 1 else 'ies')
object_copies_found[found_count] += 1 object_copies_found[found_count] += 1
objects_queried[0] += 1 objects_queried[0] += 1
if time() >= next_report[0]: if time() >= next_report[0]:
@ -289,6 +303,8 @@ Usage: %prog [options] [conf_file]
help='dump dispersion report in json format') help='dump dispersion report in json format')
parser.add_option('-d', '--debug', action='store_true', default=False, parser.add_option('-d', '--debug', action='store_true', default=False,
help='print 404s to standard error') help='print 404s to standard error')
parser.add_option('-p', '--partitions', action='store_true', default=False,
help='print missing partitions to standard error')
options, args = parser.parse_args() options, args = parser.parse_args()
@ -324,11 +340,11 @@ Usage: %prog [options] [conf_file]
container_ring = Ring(swift_dir, ring_name='container') container_ring = Ring(swift_dir, ring_name='container')
object_ring = Ring(swift_dir, ring_name='object') object_ring = Ring(swift_dir, ring_name='object')
container_result = container_dispersion_report(coropool, connpool, container_result = container_dispersion_report(
account, container_ring, coropool, connpool, account, container_ring, retries,
retries) options.partitions)
object_result = object_dispersion_report(coropool, connpool, account, object_result = object_dispersion_report(
object_ring, retries) coropool, connpool, account, object_ring, retries, options.partitions)
if json_output: if json_output:
print json.dumps({"container": container_result, print json.dumps({"container": container_result,
"object": object_result}) "object": object_result})

View File

@ -24,7 +24,7 @@
.SH SYNOPSIS .SH SYNOPSIS
.LP .LP
.B swift-dispersion-report [-j|--dump-json] [conf_file] .B swift-dispersion-report [-d|--debug] [-j|--dump-json] [-p|--partitions] [conf_file]
.SH DESCRIPTION .SH DESCRIPTION
.PP .PP
@ -54,12 +54,24 @@ same configuration file, /etc/swift/dispersion.conf . The account used by these
tool should be a dedicated account for the dispersion stats and also have admin tool should be a dedicated account for the dispersion stats and also have admin
privileges. privileges.
.SH OPTIONS
.RS 0
.PD 1
.IP "\fB-d, --debug\fR"
output any 404 responses to standard error
.SH OPTIONS .SH OPTIONS
.RS 0 .RS 0
.PD 1 .PD 1
.IP "\fB-j, --dump-json\fR" .IP "\fB-j, --dump-json\fR"
output dispersion report in json format output dispersion report in json format
.SH OPTIONS
.RS 0
.PD 1
.IP "\fB-p, --partitions\fR"
output the partition numbers that have any missing replicas
.SH CONFIGURATION .SH CONFIGURATION
.PD 0 .PD 0
Example \fI/etc/swift/dispersion.conf\fR: Example \fI/etc/swift/dispersion.conf\fR: