diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py
index 43b9288a48..325f6f723c 100644
--- a/swift/common/ring/builder.py
+++ b/swift/common/ring/builder.py
@@ -646,23 +646,21 @@ class RingBuilder(object):
 
     def _get_available_parts(self):
         """
-        Returns a tuple (wanted_parts_total, dict of (tier: available parts in
-        other tiers) for all tiers in the ring.
+        Returns a dict of (tier: available parts in other tiers) for all tiers
+        in the ring.
 
         Devices that have too much partitions (negative parts_wanted) are
         ignored, otherwise the sum of all parts_wanted is 0 +/- rounding
         errors.
-
         """
-        wanted_parts_total = 0
         wanted_parts_for_tier = {}
         for dev in self._iter_devs():
-            wanted_parts_total += max(0, dev['parts_wanted'])
+            pw = max(0, dev['parts_wanted'])
             for tier in tiers_for_dev(dev):
                 if tier not in wanted_parts_for_tier:
                     wanted_parts_for_tier[tier] = 0
-                wanted_parts_for_tier[tier] += max(0, dev['parts_wanted'])
-        return (wanted_parts_total, wanted_parts_for_tier)
+                wanted_parts_for_tier[tier] += pw
+        return wanted_parts_for_tier
 
     def _gather_reassign_parts(self):
         """
@@ -674,6 +672,22 @@ class RingBuilder(object):
         # as a hot-spot).
         tfd = {}
 
+        tiers_by_len = defaultdict(set)
+        for dev in self._iter_devs():
+            tiers = tiers_for_dev(dev)
+            tfd[dev['id']] = tiers
+            for tier in tiers:
+                tiers_by_len[len(tier)].add(tier)
+
+        tiers_by_len = dict((length, list(tiers))
+                            for length, tiers in tiers_by_len.items())
+
+        sibling_tiers = {}
+        for length, tiers in tiers_by_len.items():
+            for i, tier in enumerate(tiers):
+                sibling_tiers[tier] = [t for t in (tiers[:i] + tiers[(i + 1):])
+                                       if t[:-1] == tier[:-1]]
+
         # First we gather partitions from removed devices. Since removed
         # devices usually indicate device failures, we have no choice but to
         # reassign these partitions. However, we mark them as moved so later
@@ -692,8 +706,7 @@ class RingBuilder(object):
         # currently sufficient spread out across the cluster.
         spread_out_parts = defaultdict(list)
         max_allowed_replicas = self._build_max_replicas_by_tier()
-        wanted_parts_total, wanted_parts_for_tier = \
-            self._get_available_parts()
+        wanted_parts_for_tier = self._get_available_parts()
         moved_parts = 0
         for part in xrange(self.parts):
             # Only move one replica at a time if possible.
@@ -706,8 +719,6 @@ class RingBuilder(object):
             # revealed the lambda invocation as a significant cost.
             replicas_at_tier = {}
             for dev in self._devs_for_part(part):
-                if dev['id'] not in tfd:
-                    tfd[dev['id']] = tiers_for_dev(dev)
                 for tier in tfd[dev['id']]:
                     if tier not in replicas_at_tier:
                         replicas_at_tier[tier] = 1
@@ -719,16 +730,42 @@ class RingBuilder(object):
             for replica in self._replicas_for_part(part):
                 dev = self.devs[self._replica2part2dev[replica][part]]
                 removed_replica = False
-                if dev['id'] not in tfd:
-                    tfd[dev['id']] = tiers_for_dev(dev)
                 for tier in tfd[dev['id']]:
-                    rep_at_tier = 0
-                    if tier in replicas_at_tier:
-                        rep_at_tier = replicas_at_tier[tier]
-                    # Only allowing parts to be gathered if
-                    # there are wanted parts on other tiers
-                    available_parts_for_tier = wanted_parts_total - \
-                        wanted_parts_for_tier[tier] - moved_parts
+                    rep_at_tier = replicas_at_tier.get(tier, 0)
+
+                    # If this tier's not overcrowded, there's nothing to
+                    # gather, so we can avoid some calculation here as an
+                    # optimization.
+                    if rep_at_tier <= max_allowed_replicas[tier]:
+                        continue
+
+                    available_parts_for_tier = sum(
+                        wanted_parts_for_tier[t]
+                        for t in sibling_tiers[tier]
+                        # If a sibling tier is "full" with respect to
+                        # partition dispersion, but not "full" with respect
+                        # to parts_wanted, we don't count it as a possible
+                        # destination.
+                        #
+                        # Otherwise, we gather a partition from tier X
+                        # (because its replicas are not spread out), and
+                        # then we may place it right back in tier X or in
+                        # another tier that already has replicas (because
+                        # that tier has parts_wanted). Then, on the next
+                        # rebalance, it'll happen again, and then again...
+                        #
+                        # Worse yet, this "dancing replica" immobilizes
+                        # other replicas of the partition that want to move
+                        # because they're on devices with negative
+                        # parts_wanted. This can lead to a replica that
+                        # sticks to a zero-weight device no matter how often
+                        # the ring is rebalanced.
+                        if (max_allowed_replicas[t] >
+                            replicas_at_tier.get(t, 0))
+                    ) - moved_parts
+
+                    # Only allow a part to be gathered if there are wanted
+                    # parts on other tiers.
                     if (rep_at_tier > max_allowed_replicas[tier] and
                             self._last_part_moves[part] >=
                             self.min_part_hours and
@@ -741,8 +778,6 @@ class RingBuilder(object):
                         moved_parts += 1
                         break
                 if removed_replica:
-                    if dev['id'] not in tfd:
-                        tfd[dev['id']] = tiers_for_dev(dev)
                     for tier in tfd[dev['id']]:
                         replicas_at_tier[tier] -= 1
 
diff --git a/test/unit/common/ring/test_builder.py b/test/unit/common/ring/test_builder.py
index ef524f0a93..309abf032c 100644
--- a/test/unit/common/ring/test_builder.py
+++ b/test/unit/common/ring/test_builder.py
@@ -19,6 +19,7 @@ import operator
 import os
 import unittest
 import cPickle as pickle
+from array import array
 from collections import defaultdict
 from math import ceil
 from tempfile import mkdtemp
@@ -656,6 +657,58 @@ class TestRingBuilder(unittest.TestCase):
 
         rb.rebalance()
 
+    def test_remove_last_partition_from_zero_weight(self):
+        rb = ring.RingBuilder(4, 3, 1)
+        rb.add_dev({'id': 0, 'region': 0, 'zone': 1, 'weight': 1.0,
+                    'ip': '127.0.0.1', 'port': 10000, 'device': 'sda'})
+        rb.add_dev({'id': 1, 'region': 0, 'zone': 2, 'weight': 2.0,
+                    'ip': '127.0.0.2', 'port': 10000, 'device': 'sda'})
+        rb.add_dev({'id': 2, 'region': 0, 'zone': 3, 'weight': 3.0,
+                    'ip': '127.0.0.3', 'port': 10000, 'device': 'sda'})
+
+        rb.add_dev({'id': 3, 'region': 0, 'zone': 3, 'weight': 0.5,
+                    'ip': '127.0.0.3', 'port': 10001, 'device': 'zero'})
+
+        zero_weight_dev = 3
+
+        rb.rebalance()
+
+        # We want at least one partition with replicas only in zone 2 and 3
+        # due to device weights. It would *like* to spread out into zone 1,
+        # but can't, due to device weight.
+        #
+        # Also, we want such a partition to have a replica on device 3,
+        # which we will then reduce to zero weight. This should cause the
+        # removal of the replica from device 3.
+        #
+        # Getting this to happen by chance is hard, so let's just set up a
+        # builder so that it's in the state we want. This is a synthetic
+        # example; while the bug has happened on a real cluster, that
+        # builder file had a part_power of 16, so its contents are much too
+        # big to include here.
+        rb._replica2part2dev = [
+            #                            these are the relevant ones
+            #                                   |  |  |  |
+            #                                   v  v  v  v
+            array('H', [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]),
+            array('H', [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2]),
+            array('H', [0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 2, 2, 2, 2])]
+
+        rb.set_dev_weight(zero_weight_dev, 0.0)
+        rb.pretend_min_part_hours_passed()
+        rb.rebalance(seed=1)
+
+        node_counts = defaultdict(int)
+        for part2dev_id in rb._replica2part2dev:
+            for dev_id in part2dev_id:
+                node_counts[dev_id] += 1
+        self.assertEqual(node_counts[zero_weight_dev], 0)
+
+        # it's as balanced as it gets, so nothing moves anymore
+        rb.pretend_min_part_hours_passed()
+        parts_moved, _balance = rb.rebalance(seed=1)
+        self.assertEqual(parts_moved, 0)
+
     def test_region_fullness_with_balanceable_ring(self):
         rb = ring.RingBuilder(8, 3, 1)
         rb.add_dev({'id': 0, 'region': 0, 'zone': 0, 'weight': 1,