Merge "Add dispersion command to swift-ring-builder"
This commit is contained in:
commit
ecf4d057b3
@ -37,19 +37,37 @@ cluster, and the locations for a partition are stored in the mapping maintained
|
|||||||
by the ring. The ring is also responsible for determining which devices are
|
by the ring. The ring is also responsible for determining which devices are
|
||||||
used for handoff in failure scenarios.
|
used for handoff in failure scenarios.
|
||||||
|
|
||||||
Data can be isolated with the concept of zones in the ring. Each replica
|
The replicas of each partition will be isolated onto as many distinct regions,
|
||||||
of a partition is guaranteed to reside in a different zone. A zone could
|
zones, servers and devices as the capacity of these failure domains allow. If
|
||||||
represent a drive, a server, a cabinet, a switch, or even a datacenter.
|
there are less failure domains at a given tier than replicas of the partition
|
||||||
|
assigned within a tier (e.g. a 3 replica cluster with 2 servers), or the
|
||||||
|
available capacity across the failure domains within a tier are not well
|
||||||
|
balanced it will not be possible to achieve both even capacity distribution
|
||||||
|
(`balance`) as well as complete isolation of replicas across failure domains
|
||||||
|
(`dispersion`). When this occurs the ring management tools will display a
|
||||||
|
warning so that the operator can evaluate the cluster topology.
|
||||||
|
|
||||||
The partitions of the ring are equally divided among all the devices in the
|
Data is evenly distributed across the capacity available in the cluster as
|
||||||
Swift installation. When partitions need to be moved around (for example if a
|
described by the devices weight. Weights can be used to balance the
|
||||||
device is added to the cluster), the ring ensures that a minimum number of
|
distribution of partitions on drives across the cluster. This can be useful,
|
||||||
partitions are moved at a time, and only one replica of a partition is moved at
|
for example, when different sized drives are used in a cluster. Device
|
||||||
a time.
|
weights can also be used when adding or removing capacity or failure domains
|
||||||
|
to control how many partitions are reassigned during a rebalance to be moved
|
||||||
|
as soon as replication bandwidth allows.
|
||||||
|
|
||||||
Weights can be used to balance the distribution of partitions on drives
|
.. note::
|
||||||
across the cluster. This can be useful, for example, when different sized
|
Prior to Swift 2.1.0 it was not possible to restrict partition movement by
|
||||||
drives are used in a cluster.
|
device weight when adding new failure domains, and would allow extremely
|
||||||
|
unbalanced rings. The greedy dispersion algorithm is now subject to the
|
||||||
|
constraints of the physical capacity in the system, but can be adjusted
|
||||||
|
with-in reason via the overload option. Artificially unbalancing the
|
||||||
|
partition assignment without respect to capacity can introduce unexpected
|
||||||
|
full devices when a given failure domain does not physically support its
|
||||||
|
share of the used capacity in the tier.
|
||||||
|
|
||||||
|
When partitions need to be moved around (for example if a device is added to
|
||||||
|
the cluster), the ring ensures that a minimum number of partitions are moved
|
||||||
|
at a time, and only one replica of a partition is moved at a time.
|
||||||
|
|
||||||
The ring is used by the Proxy server and several background processes
|
The ring is used by the Proxy server and several background processes
|
||||||
(like replication).
|
(like replication).
|
||||||
|
@ -21,13 +21,16 @@ from os.path import basename, abspath, dirname, exists, join as pathjoin
|
|||||||
from sys import argv as sys_argv, exit, stderr
|
from sys import argv as sys_argv, exit, stderr
|
||||||
from textwrap import wrap
|
from textwrap import wrap
|
||||||
from time import time
|
from time import time
|
||||||
|
import optparse
|
||||||
|
import math
|
||||||
|
|
||||||
from swift.common import exceptions
|
from swift.common import exceptions
|
||||||
from swift.common.ring import RingBuilder, Ring
|
from swift.common.ring import RingBuilder, Ring
|
||||||
from swift.common.ring.builder import MAX_BALANCE
|
from swift.common.ring.builder import MAX_BALANCE
|
||||||
from swift.common.utils import lock_parent_directory
|
from swift.common.utils import lock_parent_directory
|
||||||
from swift.common.ring.utils import parse_search_value, parse_args, \
|
from swift.common.ring.utils import parse_search_value, parse_args, \
|
||||||
build_dev_from_opts, parse_builder_ring_filename_args, find_parts
|
build_dev_from_opts, parse_builder_ring_filename_args, find_parts, \
|
||||||
|
dispersion_report
|
||||||
|
|
||||||
MAJOR_VERSION = 1
|
MAJOR_VERSION = 1
|
||||||
MINOR_VERSION = 3
|
MINOR_VERSION = 3
|
||||||
@ -246,9 +249,9 @@ swift-ring-builder <builder_file>
|
|||||||
if dev is not None])
|
if dev is not None])
|
||||||
balance = builder.get_balance()
|
balance = builder.get_balance()
|
||||||
print '%d partitions, %.6f replicas, %d regions, %d zones, ' \
|
print '%d partitions, %.6f replicas, %d regions, %d zones, ' \
|
||||||
'%d devices, %.02f balance' % (builder.parts, builder.replicas,
|
'%d devices, %.02f balance, %.02f dispersion' % (
|
||||||
regions, zones, dev_count,
|
builder.parts, builder.replicas, regions, zones, dev_count,
|
||||||
balance)
|
balance, builder.dispersion)
|
||||||
print 'The minimum number of hours before a partition can be ' \
|
print 'The minimum number of hours before a partition can be ' \
|
||||||
'reassigned is %s' % builder.min_part_hours
|
'reassigned is %s' % builder.min_part_hours
|
||||||
print 'The overload factor is %.6f' % builder.overload
|
print 'The overload factor is %.6f' % builder.overload
|
||||||
@ -601,13 +604,23 @@ swift-ring-builder <builder_file> remove <search-value> [search-value ...]
|
|||||||
|
|
||||||
def rebalance():
|
def rebalance():
|
||||||
"""
|
"""
|
||||||
swift-ring-builder <builder_file> rebalance <seed>
|
swift-ring-builder <builder_file> rebalance [options]
|
||||||
Attempts to rebalance the ring by reassigning partitions that haven't been
|
Attempts to rebalance the ring by reassigning partitions that haven't been
|
||||||
recently reassigned.
|
recently reassigned.
|
||||||
"""
|
"""
|
||||||
|
usage = Commands.rebalance.__doc__.strip()
|
||||||
|
parser = optparse.OptionParser(usage)
|
||||||
|
parser.add_option('-f', '--force', action='store_true',
|
||||||
|
help='Force a rebalanced ring to save even '
|
||||||
|
'if < 1% of parts changed')
|
||||||
|
parser.add_option('-s', '--seed', help="seed to use for rebalance")
|
||||||
|
options, args = parser.parse_args(argv)
|
||||||
|
|
||||||
def get_seed(index):
|
def get_seed(index):
|
||||||
|
if options.seed:
|
||||||
|
return options.seed
|
||||||
try:
|
try:
|
||||||
return argv[index]
|
return args[index]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -633,7 +646,8 @@ swift-ring-builder <builder_file> rebalance <seed>
|
|||||||
# special value(MAX_BALANCE) until zero weighted device return all
|
# special value(MAX_BALANCE) until zero weighted device return all
|
||||||
# its partitions. So we cannot check balance has changed.
|
# its partitions. So we cannot check balance has changed.
|
||||||
# Thus we need to check balance or last_balance is special value.
|
# Thus we need to check balance or last_balance is special value.
|
||||||
if not devs_changed and abs(last_balance - balance) < 1 and \
|
if not options.force and \
|
||||||
|
not devs_changed and abs(last_balance - balance) < 1 and \
|
||||||
not (last_balance == MAX_BALANCE and balance == MAX_BALANCE):
|
not (last_balance == MAX_BALANCE and balance == MAX_BALANCE):
|
||||||
print 'Cowardly refusing to save rebalance as it did not change ' \
|
print 'Cowardly refusing to save rebalance as it did not change ' \
|
||||||
'at least 1%.'
|
'at least 1%.'
|
||||||
@ -649,10 +663,23 @@ swift-ring-builder <builder_file> rebalance <seed>
|
|||||||
)
|
)
|
||||||
print '-' * 79
|
print '-' * 79
|
||||||
exit(EXIT_ERROR)
|
exit(EXIT_ERROR)
|
||||||
print 'Reassigned %d (%.02f%%) partitions. Balance is now %.02f.' % \
|
print ('Reassigned %d (%.02f%%) partitions. '
|
||||||
(parts, 100.0 * parts / builder.parts, balance)
|
'Balance is now %.02f. '
|
||||||
|
'Dispersion is now %.02f' % (
|
||||||
|
parts, 100.0 * parts / builder.parts,
|
||||||
|
balance,
|
||||||
|
builder.dispersion))
|
||||||
status = EXIT_SUCCESS
|
status = EXIT_SUCCESS
|
||||||
if balance > 5 and balance / 100.0 > builder.overload:
|
if builder.dispersion > 0:
|
||||||
|
print '-' * 79
|
||||||
|
print('NOTE: Dispersion of %.06f indicates some parts are not\n'
|
||||||
|
' optimally dispersed.\n\n'
|
||||||
|
' You may want adjust some device weights, increase\n'
|
||||||
|
' the overload or review the dispersion report.' %
|
||||||
|
builder.dispersion)
|
||||||
|
status = EXIT_WARNING
|
||||||
|
print '-' * 79
|
||||||
|
elif balance > 5 and balance / 100.0 > builder.overload:
|
||||||
print '-' * 79
|
print '-' * 79
|
||||||
print 'NOTE: Balance of %.02f indicates you should push this ' % \
|
print 'NOTE: Balance of %.02f indicates you should push this ' % \
|
||||||
balance
|
balance
|
||||||
@ -668,6 +695,83 @@ swift-ring-builder <builder_file> rebalance <seed>
|
|||||||
builder.save(argv[1])
|
builder.save(argv[1])
|
||||||
exit(status)
|
exit(status)
|
||||||
|
|
||||||
|
def dispersion():
|
||||||
|
"""
|
||||||
|
swift-ring-builder <builder_file> dispersion <search_filter> [options]
|
||||||
|
|
||||||
|
Output report on dispersion.
|
||||||
|
|
||||||
|
--verbose option will display dispersion graph broken down by tier
|
||||||
|
|
||||||
|
You can filter which tiers are evaluated to drill down using a regex
|
||||||
|
in the optional search_filter arguemnt.
|
||||||
|
|
||||||
|
The reports columns are:
|
||||||
|
|
||||||
|
Tier : the name of the tier
|
||||||
|
parts : the total number of partitions with assignment in the tier
|
||||||
|
% : the percentage of parts in the tier with replicas over assigned
|
||||||
|
max : maximum replicas a part should have assigned at the tier
|
||||||
|
0 - N : the number of parts with that many replicas assigned
|
||||||
|
|
||||||
|
e.g.
|
||||||
|
Tier: parts % max 0 1 2 3
|
||||||
|
r1z1 1022 79.45 1 2 210 784 28
|
||||||
|
|
||||||
|
r1z1 has 1022 total parts assigned, 79% of them have more than the
|
||||||
|
recommend max replica count of 1 assigned. Only 2 parts in the ring
|
||||||
|
are *not* assigned in this tier (0 replica count), 210 parts have
|
||||||
|
the recommend replica count of 1, 784 have 2 replicas, and 28 sadly
|
||||||
|
have all three replicas in this tier.
|
||||||
|
"""
|
||||||
|
status = EXIT_SUCCESS
|
||||||
|
if not builder._replica2part2dev:
|
||||||
|
print('Specified builder file \"%s\" is not rebalanced yet. '
|
||||||
|
'Please rebalance first.' % argv[1])
|
||||||
|
exit(EXIT_ERROR)
|
||||||
|
usage = Commands.dispersion.__doc__.strip()
|
||||||
|
parser = optparse.OptionParser(usage)
|
||||||
|
parser.add_option('-v', '--verbose', action='store_true',
|
||||||
|
help='Display dispersion report for tiers')
|
||||||
|
options, args = parser.parse_args(argv)
|
||||||
|
if args[3:]:
|
||||||
|
search_filter = args[3]
|
||||||
|
else:
|
||||||
|
search_filter = None
|
||||||
|
report = dispersion_report(builder, search_filter=search_filter,
|
||||||
|
verbose=options.verbose)
|
||||||
|
print 'Dispersion is %.06f' % builder.dispersion
|
||||||
|
if report['worst_tier']:
|
||||||
|
status = EXIT_WARNING
|
||||||
|
print 'Worst tier is %.06f (%s)' % (report['max_dispersion'],
|
||||||
|
report['worst_tier'])
|
||||||
|
if report['graph']:
|
||||||
|
replica_range = range(int(math.ceil(builder.replicas + 1)))
|
||||||
|
part_count_width = '%%%ds' % max(len(str(builder.parts)), 5)
|
||||||
|
replica_counts_tmpl = ' '.join(part_count_width for i in
|
||||||
|
replica_range)
|
||||||
|
tiers = (tier for tier, _junk in report['graph'])
|
||||||
|
tier_width = max(max(map(len, tiers)), 30)
|
||||||
|
header_line = ('%-' + str(tier_width) +
|
||||||
|
's ' + part_count_width +
|
||||||
|
' %6s %6s ' + replica_counts_tmpl) % tuple(
|
||||||
|
['Tier', 'Parts', '%', 'Max'] + replica_range)
|
||||||
|
underline = '-' * len(header_line)
|
||||||
|
print(underline)
|
||||||
|
print(header_line)
|
||||||
|
print(underline)
|
||||||
|
for tier_name, dispersion in report['graph']:
|
||||||
|
replica_counts_repr = replica_counts_tmpl % tuple(
|
||||||
|
dispersion['replicas'])
|
||||||
|
print ('%-' + str(tier_width) + 's ' + part_count_width +
|
||||||
|
' %6.02f %6d %s') % (tier_name,
|
||||||
|
dispersion['placed_parts'],
|
||||||
|
dispersion['dispersion'],
|
||||||
|
dispersion['max_replicas'],
|
||||||
|
replica_counts_repr,
|
||||||
|
)
|
||||||
|
exit(status)
|
||||||
|
|
||||||
def validate():
|
def validate():
|
||||||
"""
|
"""
|
||||||
swift-ring-builder <builder_file> validate
|
swift-ring-builder <builder_file> validate
|
||||||
|
@ -89,6 +89,9 @@ class RingBuilder(object):
|
|||||||
self._last_part_moves = None
|
self._last_part_moves = None
|
||||||
|
|
||||||
self._last_part_gather_start = 0
|
self._last_part_gather_start = 0
|
||||||
|
|
||||||
|
self._dispersion_graph = {}
|
||||||
|
self.dispersion = 0.0
|
||||||
self._remove_devs = []
|
self._remove_devs = []
|
||||||
self._ring = None
|
self._ring = None
|
||||||
|
|
||||||
@ -143,6 +146,8 @@ class RingBuilder(object):
|
|||||||
self._last_part_moves_epoch = builder['_last_part_moves_epoch']
|
self._last_part_moves_epoch = builder['_last_part_moves_epoch']
|
||||||
self._last_part_moves = builder['_last_part_moves']
|
self._last_part_moves = builder['_last_part_moves']
|
||||||
self._last_part_gather_start = builder['_last_part_gather_start']
|
self._last_part_gather_start = builder['_last_part_gather_start']
|
||||||
|
self._dispersion_graph = builder.get('_dispersion_graph', {})
|
||||||
|
self.dispersion = builder.get('dispersion', 0.0)
|
||||||
self._remove_devs = builder['_remove_devs']
|
self._remove_devs = builder['_remove_devs']
|
||||||
self._ring = None
|
self._ring = None
|
||||||
|
|
||||||
@ -170,6 +175,8 @@ class RingBuilder(object):
|
|||||||
'_last_part_moves_epoch': self._last_part_moves_epoch,
|
'_last_part_moves_epoch': self._last_part_moves_epoch,
|
||||||
'_last_part_moves': self._last_part_moves,
|
'_last_part_moves': self._last_part_moves,
|
||||||
'_last_part_gather_start': self._last_part_gather_start,
|
'_last_part_gather_start': self._last_part_gather_start,
|
||||||
|
'_dispersion_graph': self._dispersion_graph,
|
||||||
|
'dispersion': self.dispersion,
|
||||||
'_remove_devs': self._remove_devs}
|
'_remove_devs': self._remove_devs}
|
||||||
|
|
||||||
def change_min_part_hours(self, min_part_hours):
|
def change_min_part_hours(self, min_part_hours):
|
||||||
@ -349,6 +356,7 @@ class RingBuilder(object):
|
|||||||
if self._last_part_moves_epoch is None:
|
if self._last_part_moves_epoch is None:
|
||||||
self._initial_balance()
|
self._initial_balance()
|
||||||
self.devs_changed = False
|
self.devs_changed = False
|
||||||
|
self._build_dispersion_graph()
|
||||||
return self.parts, self.get_balance()
|
return self.parts, self.get_balance()
|
||||||
changed_parts = 0
|
changed_parts = 0
|
||||||
self._update_last_part_moves()
|
self._update_last_part_moves()
|
||||||
@ -372,12 +380,62 @@ class RingBuilder(object):
|
|||||||
self.devs_changed = False
|
self.devs_changed = False
|
||||||
self.version += 1
|
self.version += 1
|
||||||
|
|
||||||
|
changed_parts = self._build_dispersion_graph(old_replica2part2dev)
|
||||||
|
return changed_parts, balance
|
||||||
|
|
||||||
|
def _build_dispersion_graph(self, old_replica2part2dev=None):
|
||||||
|
"""
|
||||||
|
Build a dict of all tiers in the cluster to a list of the number of
|
||||||
|
parts with a replica count at each index. The values of the dict will
|
||||||
|
be lists of length the maximum whole replica + 1 so that the
|
||||||
|
graph[tier][3] is the number of parts with in the tier with 3 replicas
|
||||||
|
and graph [tier][0] is the number of parts not assigned in this tier.
|
||||||
|
|
||||||
|
i.e.
|
||||||
|
{
|
||||||
|
<tier>: [
|
||||||
|
<number_of_parts_with_0_replicas>,
|
||||||
|
<number_of_parts_with_1_replicas>,
|
||||||
|
...
|
||||||
|
<number_of_parts_with_n_replicas>,
|
||||||
|
],
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
:param old_replica2part2dev: if called from rebalance, the
|
||||||
|
old_replica2part2dev can be used to count moved moved parts.
|
||||||
|
|
||||||
|
:returns: number of parts with different assignments than
|
||||||
|
old_replica2part2dev if provided
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Since we're going to loop over every replica of every part we'll
|
||||||
|
# also count up changed_parts if old_replica2part2dev is passed in
|
||||||
|
old_replica2part2dev = old_replica2part2dev or []
|
||||||
# Compare the partition allocation before and after the rebalance
|
# Compare the partition allocation before and after the rebalance
|
||||||
# Only changed device ids are taken into account; devices might be
|
# Only changed device ids are taken into account; devices might be
|
||||||
# "touched" during the rebalance, but actually not really moved
|
# "touched" during the rebalance, but actually not really moved
|
||||||
changed_parts = 0
|
changed_parts = 0
|
||||||
for rep_id, _rep in enumerate(self._replica2part2dev):
|
|
||||||
for part_id, new_device in enumerate(_rep):
|
int_replicas = int(math.ceil(self.replicas))
|
||||||
|
max_allowed_replicas = self._build_max_replicas_by_tier()
|
||||||
|
parts_at_risk = 0
|
||||||
|
|
||||||
|
tfd = {}
|
||||||
|
|
||||||
|
dispersion_graph = {}
|
||||||
|
# go over all the devices holding each replica part by part
|
||||||
|
for part_id, dev_ids in enumerate(
|
||||||
|
itertools.izip(*self._replica2part2dev)):
|
||||||
|
# count the number of replicas of this part for each tier of each
|
||||||
|
# device, some devices may have overlapping tiers!
|
||||||
|
replicas_at_tier = defaultdict(int)
|
||||||
|
for rep_id, dev in enumerate(iter(
|
||||||
|
self.devs[dev_id] for dev_id in dev_ids)):
|
||||||
|
if dev['id'] not in tfd:
|
||||||
|
tfd[dev['id']] = tiers_for_dev(dev)
|
||||||
|
for tier in tfd[dev['id']]:
|
||||||
|
replicas_at_tier[tier] += 1
|
||||||
# IndexErrors will be raised if the replicas are increased or
|
# IndexErrors will be raised if the replicas are increased or
|
||||||
# decreased, and that actually means the partition has changed
|
# decreased, and that actually means the partition has changed
|
||||||
try:
|
try:
|
||||||
@ -386,9 +444,25 @@ class RingBuilder(object):
|
|||||||
changed_parts += 1
|
changed_parts += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if old_device != new_device:
|
if old_device != dev['id']:
|
||||||
changed_parts += 1
|
changed_parts += 1
|
||||||
return changed_parts, balance
|
part_at_risk = False
|
||||||
|
# update running totals for each tiers' number of parts with a
|
||||||
|
# given replica count
|
||||||
|
for tier, replicas in replicas_at_tier.items():
|
||||||
|
if tier not in dispersion_graph:
|
||||||
|
dispersion_graph[tier] = [self.parts] + [0] * int_replicas
|
||||||
|
dispersion_graph[tier][0] -= 1
|
||||||
|
dispersion_graph[tier][replicas] += 1
|
||||||
|
if replicas > max_allowed_replicas[tier]:
|
||||||
|
part_at_risk = True
|
||||||
|
# this part may be at risk in multiple tiers, but we only count it
|
||||||
|
# as at_risk once
|
||||||
|
if part_at_risk:
|
||||||
|
parts_at_risk += 1
|
||||||
|
self._dispersion_graph = dispersion_graph
|
||||||
|
self.dispersion = 100.0 * parts_at_risk / self.parts
|
||||||
|
return changed_parts
|
||||||
|
|
||||||
def validate(self, stats=False):
|
def validate(self, stats=False):
|
||||||
"""
|
"""
|
||||||
@ -979,11 +1053,11 @@ class RingBuilder(object):
|
|||||||
if candidates_with_room:
|
if candidates_with_room:
|
||||||
if len(candidates_with_room) > \
|
if len(candidates_with_room) > \
|
||||||
len(candidates_with_replicas):
|
len(candidates_with_replicas):
|
||||||
# There exists at least one tier with room for
|
# There exists at least one tier with room for
|
||||||
# another partition and 0 other replicas already in
|
# another partition and 0 other replicas already
|
||||||
# it, so we can use a faster search. The else
|
# in it, so we can use a faster search. The else
|
||||||
# branch's search would work here, but it's
|
# branch's search would work here, but it's
|
||||||
# significantly slower.
|
# significantly slower.
|
||||||
roomiest_tier = max(
|
roomiest_tier = max(
|
||||||
(t for t in candidates_with_room
|
(t for t in candidates_with_room
|
||||||
if other_replicas[t] == 0),
|
if other_replicas[t] == 0),
|
||||||
@ -1184,7 +1258,7 @@ class RingBuilder(object):
|
|||||||
builder = RingBuilder(1, 1, 1)
|
builder = RingBuilder(1, 1, 1)
|
||||||
builder.copy_from(builder_dict)
|
builder.copy_from(builder_dict)
|
||||||
for dev in builder.devs:
|
for dev in builder.devs:
|
||||||
#really old rings didn't have meta keys
|
# really old rings didn't have meta keys
|
||||||
if dev and 'meta' not in dev:
|
if dev and 'meta' not in dev:
|
||||||
dev['meta'] = ''
|
dev['meta'] = ''
|
||||||
# NOTE(akscram): An old ring builder file don't contain
|
# NOTE(akscram): An old ring builder file don't contain
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
import optparse
|
import optparse
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
def tiers_for_dev(dev):
|
def tiers_for_dev(dev):
|
||||||
@ -331,3 +332,53 @@ def find_parts(builder, argv):
|
|||||||
partition_count.iteritems(), key=itemgetter(1), reverse=True)
|
partition_count.iteritems(), key=itemgetter(1), reverse=True)
|
||||||
|
|
||||||
return sorted_partition_count
|
return sorted_partition_count
|
||||||
|
|
||||||
|
|
||||||
|
def dispersion_report(builder, search_filter=None, verbose=False):
|
||||||
|
if not builder._dispersion_graph:
|
||||||
|
builder._build_dispersion_graph()
|
||||||
|
max_allowed_replicas = builder._build_max_replicas_by_tier()
|
||||||
|
worst_tier = None
|
||||||
|
max_dispersion = 0.0
|
||||||
|
sorted_graph = []
|
||||||
|
for tier, replica_counts in sorted(builder._dispersion_graph.items()):
|
||||||
|
tier_name = get_tier_name(tier, builder)
|
||||||
|
if search_filter and not re.match(search_filter, tier_name):
|
||||||
|
continue
|
||||||
|
max_replicas = int(max_allowed_replicas[tier])
|
||||||
|
at_risk_parts = sum(replica_counts[max_replicas + 1:])
|
||||||
|
placed_parts = sum(replica_counts[1:])
|
||||||
|
tier_dispersion = 100.0 * at_risk_parts / placed_parts
|
||||||
|
if tier_dispersion > max_dispersion:
|
||||||
|
max_dispersion = tier_dispersion
|
||||||
|
worst_tier = tier_name
|
||||||
|
max_dispersion = max(max_dispersion, tier_dispersion)
|
||||||
|
if not verbose:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tier_report = {
|
||||||
|
'max_replicas': max_replicas,
|
||||||
|
'placed_parts': placed_parts,
|
||||||
|
'dispersion': tier_dispersion,
|
||||||
|
'replicas': replica_counts,
|
||||||
|
}
|
||||||
|
sorted_graph.append((tier_name, tier_report))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'max_dispersion': max_dispersion,
|
||||||
|
'worst_tier': worst_tier,
|
||||||
|
'graph': sorted_graph,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_tier_name(tier, builder):
|
||||||
|
if len(tier) == 1:
|
||||||
|
return "r%s" % (tier[0], )
|
||||||
|
if len(tier) == 2:
|
||||||
|
return "r%sz%s" % (tier[0], tier[1])
|
||||||
|
if len(tier) == 3:
|
||||||
|
return "r%sz%s-%s" % (tier[0], tier[1], tier[2])
|
||||||
|
if len(tier) == 4:
|
||||||
|
device = builder.devs[tier[3]] or {}
|
||||||
|
return "r%sz%s-%s/%s" % (tier[0], tier[1], tier[2],
|
||||||
|
device.get('device', 'IDd%s' % tier[3]))
|
||||||
|
@ -286,15 +286,26 @@ class TestCommands(unittest.TestCase):
|
|||||||
self.assertEquals(e.code, 2)
|
self.assertEquals(e.code, 2)
|
||||||
|
|
||||||
def test_validate_generic_error(self):
|
def test_validate_generic_error(self):
|
||||||
with mock.patch.object(RingBuilder, 'load',
|
with mock.patch.object(
|
||||||
mock.Mock(side_effect=
|
RingBuilder, 'load', mock.Mock(
|
||||||
IOError('Generic error occurred'))):
|
side_effect=IOError('Generic error occurred'))):
|
||||||
argv = ["", self.tmpfile, "validate"]
|
argv = ["", self.tmpfile, "validate"]
|
||||||
try:
|
try:
|
||||||
swift.cli.ringbuilder.main(argv)
|
swift.cli.ringbuilder.main(argv)
|
||||||
except SystemExit as e:
|
except SystemExit as e:
|
||||||
self.assertEquals(e.code, 2)
|
self.assertEquals(e.code, 2)
|
||||||
|
|
||||||
|
def test_warn_at_risk(self):
|
||||||
|
self.create_sample_ring()
|
||||||
|
ring = RingBuilder.load(self.tmpfile)
|
||||||
|
ring.devs[0]['weight'] = 10
|
||||||
|
ring.save(self.tmpfile)
|
||||||
|
argv = ["", self.tmpfile, "rebalance"]
|
||||||
|
try:
|
||||||
|
swift.cli.ringbuilder.main(argv)
|
||||||
|
except SystemExit as e:
|
||||||
|
self.assertEquals(e.code, 1)
|
||||||
|
|
||||||
|
|
||||||
class TestRebalanceCommand(unittest.TestCase):
|
class TestRebalanceCommand(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -19,7 +19,8 @@ from swift.common import ring
|
|||||||
from swift.common.ring.utils import (build_tier_tree, tiers_for_dev,
|
from swift.common.ring.utils import (build_tier_tree, tiers_for_dev,
|
||||||
parse_search_value, parse_args,
|
parse_search_value, parse_args,
|
||||||
build_dev_from_opts, find_parts,
|
build_dev_from_opts, find_parts,
|
||||||
parse_builder_ring_filename_args)
|
parse_builder_ring_filename_args,
|
||||||
|
dispersion_report)
|
||||||
|
|
||||||
|
|
||||||
class TestUtils(unittest.TestCase):
|
class TestUtils(unittest.TestCase):
|
||||||
@ -188,6 +189,67 @@ class TestUtils(unittest.TestCase):
|
|||||||
3, count, "Partition %d has only %d replicas" %
|
3, count, "Partition %d has only %d replicas" %
|
||||||
(partition, count))
|
(partition, count))
|
||||||
|
|
||||||
|
def test_dispersion_report(self):
|
||||||
|
rb = ring.RingBuilder(8, 3, 0)
|
||||||
|
rb.add_dev({'id': 0, 'region': 1, 'zone': 0, 'weight': 100,
|
||||||
|
'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'})
|
||||||
|
rb.add_dev({'id': 1, 'region': 1, 'zone': 1, 'weight': 200,
|
||||||
|
'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'})
|
||||||
|
rb.add_dev({'id': 2, 'region': 1, 'zone': 1, 'weight': 200,
|
||||||
|
'ip': '127.0.0.1', 'port': 10002, 'device': 'sda1'})
|
||||||
|
rb.rebalance(seed=10)
|
||||||
|
|
||||||
|
self.assertEqual(rb.dispersion, 39.84375)
|
||||||
|
report = dispersion_report(rb)
|
||||||
|
self.assertEqual(report['worst_tier'], 'r1z1')
|
||||||
|
self.assertEqual(report['max_dispersion'], 39.84375)
|
||||||
|
|
||||||
|
# Each node should store 256 partitions to avoid multiple replicas
|
||||||
|
# 2/5 of total weight * 768 ~= 307 -> 51 partitions on each node in
|
||||||
|
# zone 1 are stored at least twice on the nodes
|
||||||
|
expected = [
|
||||||
|
['r1z1', 2, '0', '154', '102'],
|
||||||
|
['r1z1-127.0.0.1:10001', 1, '205', '51', '0'],
|
||||||
|
['r1z1-127.0.0.1:10001/sda1', 1, '205', '51', '0'],
|
||||||
|
['r1z1-127.0.0.1:10002', 1, '205', '51', '0'],
|
||||||
|
['r1z1-127.0.0.1:10002/sda1', 1, '205', '51', '0']]
|
||||||
|
|
||||||
|
def build_tier_report(max_replicas, placed_parts, dispersion,
|
||||||
|
replicas):
|
||||||
|
return {
|
||||||
|
'max_replicas': max_replicas,
|
||||||
|
'placed_parts': placed_parts,
|
||||||
|
'dispersion': dispersion,
|
||||||
|
'replicas': replicas,
|
||||||
|
}
|
||||||
|
expected = [
|
||||||
|
['r1z1', build_tier_report(
|
||||||
|
2, 256, 39.84375, [0, 0, 154, 102])],
|
||||||
|
['r1z1-127.0.0.1:10001', build_tier_report(
|
||||||
|
1, 256, 19.921875, [0, 205, 51, 0])],
|
||||||
|
['r1z1-127.0.0.1:10001/sda1', build_tier_report(
|
||||||
|
1, 256, 19.921875, [0, 205, 51, 0])],
|
||||||
|
['r1z1-127.0.0.1:10002', build_tier_report(
|
||||||
|
1, 256, 19.921875, [0, 205, 51, 0])],
|
||||||
|
['r1z1-127.0.0.1:10002/sda1', build_tier_report(
|
||||||
|
1, 256, 19.921875, [0, 205, 51, 0])],
|
||||||
|
]
|
||||||
|
report = dispersion_report(rb, 'r1z1.*', verbose=True)
|
||||||
|
graph = report['graph']
|
||||||
|
for i in range(len(expected)):
|
||||||
|
self.assertEqual(expected[i][0], graph[i][0])
|
||||||
|
self.assertEqual(expected[i][1], graph[i][1])
|
||||||
|
|
||||||
|
# overcompensate in r1z0
|
||||||
|
rb.add_dev({'id': 3, 'region': 1, 'zone': 0, 'weight': 500,
|
||||||
|
'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'})
|
||||||
|
rb.rebalance(seed=10)
|
||||||
|
|
||||||
|
report = dispersion_report(rb)
|
||||||
|
self.assertEqual(rb.dispersion, 40.234375)
|
||||||
|
self.assertEqual(report['worst_tier'], 'r1z0-127.0.0.1:10003')
|
||||||
|
self.assertEqual(report['max_dispersion'], 30.078125)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user