diff --git a/swift/common/constraints.py b/swift/common/constraints.py index a437b32924..a2fed3dd91 100644 --- a/swift/common/constraints.py +++ b/swift/common/constraints.py @@ -306,7 +306,12 @@ def check_utf8(string): if isinstance(string, unicode): string.encode('utf-8') else: - if string.decode('UTF-8').encode('UTF-8') != string: + decoded = string.decode('UTF-8') + if decoded.encode('UTF-8') != string: + return False + # A UTF-8 string with surrogates in it is invalid. + if any(0xD800 <= ord(codepoint) <= 0xDFFF + for codepoint in decoded): return False return '\x00' not in string # If string is unicode, decode() will raise UnicodeEncodeError diff --git a/test/unit/common/test_constraints.py b/test/unit/common/test_constraints.py index d7f332d9b1..b16c5d9df3 100644 --- a/test/unit/common/test_constraints.py +++ b/test/unit/common/test_constraints.py @@ -420,6 +420,10 @@ class TestConstraints(unittest.TestCase): self.assertFalse(constraints.check_utf8('\xed\xa0\xbc\xed\xbc\xb8')) self.assertFalse(constraints.check_utf8('\xed\xa0\xbd\xed\xb9\x88')) + def test_check_utf8_lone_surrogates(self): + self.assertFalse(constraints.check_utf8('\xed\xa0\xbc')) + self.assertFalse(constraints.check_utf8('\xed\xb9\x88')) + def test_validate_bad_meta(self): req = Request.blank( '/v/a/c/o',