Add encodeutils.to_utf8() function
The function replaces a very common pattern in code base supporting Python 2 and Python 3: if isinstance(text, six.text_type): text = text.encode('utf-8') to_utf8() accepts subtypes of bytes and six.text_type. For example, oslo.i18n Message objects are accepted and encoded to UTF-8 as expected. Using encodeutils.safe_encode(text) is not reliable because it relies on the current locale encoding which can be ASCII, whereas UTF-8 is expected. Having to write encodeutils.safe_encode(text, incoding='utf-8') is not obvious and error-prone (it's easy to forget the incoming parameter). Change-Id: I00463716b6012cbef383855999f63f99f2f52540
This commit is contained in:
parent
68d5569854
commit
de84b5ba03
@ -71,6 +71,9 @@ def safe_encode(text, incoming=None,
|
||||
:returns: text or a bytestring `encoding` encoded
|
||||
representation of it.
|
||||
:raises TypeError: If text is not an instance of str
|
||||
|
||||
See also to_utf8() function which is simpler and don't depend on
|
||||
the locale encoding.
|
||||
"""
|
||||
if not isinstance(text, (six.string_types, six.binary_type)):
|
||||
raise TypeError("%s can't be encoded" % type(text))
|
||||
@ -95,6 +98,22 @@ def safe_encode(text, incoming=None,
|
||||
return text
|
||||
|
||||
|
||||
def to_utf8(text):
|
||||
"""Encode Unicode to UTF-8, return bytes unchanged.
|
||||
|
||||
Raise TypeError if text is not a bytes string or a Unicode string.
|
||||
|
||||
.. versionadded:: 3.5
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
return text
|
||||
elif isinstance(text, six.text_type):
|
||||
return text.encode('utf-8')
|
||||
else:
|
||||
raise TypeError("bytes or Unicode expected, got %s"
|
||||
% type(text).__name__)
|
||||
|
||||
|
||||
def exception_to_unicode(exc):
|
||||
"""Get the message of an exception as a Unicode string.
|
||||
|
||||
|
@ -108,6 +108,20 @@ class EncodeUtilsTest(test_base.BaseTestCase):
|
||||
self.assertNotEqual(text, result)
|
||||
self.assertNotEqual(six.b("foo\xf1bar"), result)
|
||||
|
||||
def test_to_utf8(self):
|
||||
self.assertEqual(encodeutils.to_utf8(b'a\xe9\xff'), # bytes
|
||||
b'a\xe9\xff')
|
||||
self.assertEqual(encodeutils.to_utf8(u'a\xe9\xff\u20ac'), # Unicode
|
||||
b'a\xc3\xa9\xc3\xbf\xe2\x82\xac')
|
||||
self.assertRaises(TypeError, encodeutils.to_utf8, 123) # invalid
|
||||
|
||||
# oslo.i18n Message objects should also be accepted for convenience.
|
||||
# It works because Message is a subclass of six.text_type. Use the
|
||||
# lazy translation to get a Message instance of oslo_i18n.
|
||||
msg = oslo_i18n.fixture.Translation().lazy("test")
|
||||
self.assertEqual(encodeutils.to_utf8(msg),
|
||||
b'test')
|
||||
|
||||
|
||||
class ExceptionToUnicodeTest(test_base.BaseTestCase):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user