From 0080337897c5b8ebaba392da36303e7df69262a8 Mon Sep 17 00:00:00 2001 From: Michael Barton Date: Mon, 27 Oct 2014 16:29:07 +0000 Subject: [PATCH] reject problematic object names We had this problem: >> : x = '\xed\xa0\xbc\xed\xbc\xb8' >> : x == x.decode('utf-8').encode('utf-8') << : False That str contains two utf-8 codepoints, which I guess python is normalizing into one unicode character, which it then encodes as one utf-8 codepoint. Like this: >> : u'\ud83c\udf38' << : u'\U0001f338' I don't entirely understand that, but having a different byte representation after round-tripping through unicode causes problems with replication and listings. This patch just rejects anything that doesn't re-encode to the same thing. If someone smarter wants to do something different, please speak up. Change-Id: I9ac48ac2693e4121be6585c6e4f5d0079e9bb3e4 --- swift/common/constraints.py | 3 ++- test/unit/common/test_constraints.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/swift/common/constraints.py b/swift/common/constraints.py index eacc6c8d32..a437b32924 100644 --- a/swift/common/constraints.py +++ b/swift/common/constraints.py @@ -306,7 +306,8 @@ def check_utf8(string): if isinstance(string, unicode): string.encode('utf-8') else: - string.decode('UTF-8') + if string.decode('UTF-8').encode('UTF-8') != string: + return False return '\x00' not in string # If string is unicode, decode() will raise UnicodeEncodeError # So, we should catch both UnicodeDecodeError & UnicodeEncodeError diff --git a/test/unit/common/test_constraints.py b/test/unit/common/test_constraints.py index 8f17ec7e4c..d7f332d9b1 100644 --- a/test/unit/common/test_constraints.py +++ b/test/unit/common/test_constraints.py @@ -416,6 +416,10 @@ class TestConstraints(unittest.TestCase): valid_utf8_str]: self.assertTrue(constraints.check_utf8(true_argument)) + def test_check_utf8_non_canonical(self): + self.assertFalse(constraints.check_utf8('\xed\xa0\xbc\xed\xbc\xb8')) + self.assertFalse(constraints.check_utf8('\xed\xa0\xbd\xed\xb9\x88')) + def test_validate_bad_meta(self): req = Request.blank( '/v/a/c/o',