Add exception_to_unicode() function
There is no simple way working on Python 2 and Python 3 to get the message of an exception as a Unicode string. This new functions uses an heuristic to get the encoding of the exception message. It tries UTF-8 (which is a superset of ASCII), the locale encoding, or fallback to ISO-8859-1 (which never fails). This function is required to log exceptions using the logging module when the exception message contains non-ASCII characters. - logging.log() only works with non-ASCII characters on Python 2 and Python 3 if the message is formatted as Unicode. For example, logging.log(b'error: %s', b'\xe9') fails in most cases. - unicode % bytes doesn't work with non-ASCII characters - logging.log(u'error: %s', exc) doesn't work if the exception message contains a non-ASCII character Only logging.log(u'error: %s', exception_to_unicode(exc)) works in all cases: ASCII or non-ASCII exception message, Python 2 and Python 3. Co-Authored-By: Joshua Harlow <harlowja@gmail.com> Change-Id: I241b7c81c7ae3d26f81790e9180678dc9af81e22
This commit is contained in:
parent
fdbb15be10
commit
7f2406874e
@ -93,3 +93,59 @@ def safe_encode(text, incoming=None,
|
||||
return text.encode(encoding, errors)
|
||||
else:
|
||||
return text
|
||||
|
||||
|
||||
def exception_to_unicode(exc):
|
||||
"""Get the message of an exception as a Unicode string.
|
||||
|
||||
On Python 3, the exception message is always a Unicode string. On
|
||||
Python 2, the exception message is a bytes string *most* of the time.
|
||||
|
||||
If the exception message is a bytes strings, try to decode it from UTF-8
|
||||
(superset of ASCII), from the locale encoding, or fallback to decoding it
|
||||
from ISO-8859-1 (which never fails).
|
||||
"""
|
||||
msg = None
|
||||
if six.PY2:
|
||||
# Don't call directly unicode(exc), because it fails with
|
||||
# UnicodeDecodeError on Python 2 if exc.__unicode__() returns a bytes
|
||||
# string not decodable from the default encoding (ASCII)
|
||||
try:
|
||||
msg = exc.__unicode__()
|
||||
except UnicodeError:
|
||||
pass
|
||||
|
||||
if msg is None:
|
||||
# Don't call directly str(exc), because it fails with
|
||||
# UnicodeEncodeError on Python 2 if exc.__str__() returns a Unicode
|
||||
# string not encodable to the default encoding (ASCII)
|
||||
msg = exc.__str__()
|
||||
|
||||
if isinstance(msg, six.text_type):
|
||||
# This should be the default path on Python 3 and an *optional* path
|
||||
# on Python 2 (if for some reason the exception message was already
|
||||
# in unicode instead of the more typical bytes string); so avoid
|
||||
# further converting to unicode in both of these cases.
|
||||
return msg
|
||||
|
||||
try:
|
||||
# Try to decode from UTF-8 (superset of ASCII). The decoder fails
|
||||
# if the string is not a valid UTF-8 string: the UTF-8 codec includes
|
||||
# a validation algorithm to ensure the consistency of the codec.
|
||||
return msg.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
# Try the locale encoding, most error messages are encoded to this encoding
|
||||
# (ex: os.strerror(errno))
|
||||
encoding = sys.getfilesystemencoding()
|
||||
try:
|
||||
return msg.decode(encoding)
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
# The encoding is not ASCII, not UTF-8, nor the locale encoding. Fallback
|
||||
# to the ISO-8859-1 encoding which never fails. It will produce mojibake
|
||||
# if the message is not encoded to ISO-8859-1, but we don't want a super
|
||||
# complex heuristic to get the encoding of an exception message.
|
||||
return msg.decode('latin1')
|
||||
|
@ -18,6 +18,7 @@
|
||||
import mock
|
||||
from oslotest import base as test_base
|
||||
import six
|
||||
import testtools
|
||||
|
||||
from oslo_utils import encodeutils
|
||||
|
||||
@ -103,3 +104,130 @@ class EncodeUtilsTest(test_base.BaseTestCase):
|
||||
text=text, incoming='utf-8', encoding='iso-8859-1')
|
||||
self.assertNotEqual(text, result)
|
||||
self.assertNotEqual(six.b("foo\xf1bar"), result)
|
||||
|
||||
|
||||
class ExceptionToUnicodeTest(test_base.BaseTestCase):
|
||||
|
||||
def test_str_exception(self):
|
||||
# The regular Exception class cannot be used directly:
|
||||
# Exception(u'\xe9').__str__() raises an UnicodeEncodeError
|
||||
# on Python 2
|
||||
class StrException(Exception):
|
||||
def __init__(self, value):
|
||||
Exception.__init__(self)
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
# On Python 3, an exception which returns bytes with is __str__()
|
||||
# method (like StrException(bytes)) is probably a bug, but it was not
|
||||
# harder to support this silly case in exception_to_unicode().
|
||||
|
||||
# Decode from ASCII
|
||||
exc = StrException(b'bytes ascii')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'bytes ascii')
|
||||
|
||||
# Decode from UTF-8
|
||||
exc = StrException(b'utf-8 \xc3\xa9\xe2\x82\xac')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'utf-8 \xe9\u20ac')
|
||||
|
||||
# Force the locale encoding to ASCII to test the fallback
|
||||
with mock.patch('sys.getfilesystemencoding', return_value='ascii'):
|
||||
# Fallback: decode from ISO-8859-1
|
||||
exc = StrException(b'rawbytes \x80\xff')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'rawbytes \x80\xff')
|
||||
|
||||
# No conversion needed
|
||||
exc = StrException(u'unicode ascii')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'unicode ascii')
|
||||
|
||||
# No conversion needed
|
||||
exc = StrException(u'unicode \xe9\u20ac')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'unicode \xe9\u20ac')
|
||||
|
||||
# Test the locale encoding
|
||||
with mock.patch('sys.getfilesystemencoding', return_value='koi8_r'):
|
||||
exc = StrException(b'\xf2\xd5\xd3\xd3\xcb\xc9\xca')
|
||||
# Decode from the locale encoding
|
||||
# (the message cannot be decoded from ASCII nor UTF-8)
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'\u0420\u0443\u0441\u0441\u043a\u0438\u0439')
|
||||
|
||||
@testtools.skipIf(six.PY3, 'test specific to Python 2')
|
||||
def test_unicode_exception(self):
|
||||
# Exception with a __unicode__() method, but no __str__()
|
||||
class UnicodeException(Exception):
|
||||
def __init__(self, value):
|
||||
Exception.__init__(self)
|
||||
self.value = value
|
||||
|
||||
def __unicode__(self):
|
||||
return self.value
|
||||
|
||||
# __unicode__() returns unicode
|
||||
exc = UnicodeException(u'unicode \xe9\u20ac')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'unicode \xe9\u20ac')
|
||||
|
||||
# __unicode__() returns bytes (does this case really happen in the
|
||||
# wild?)
|
||||
exc = UnicodeException(b'utf-8 \xc3\xa9\xe2\x82\xac')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'utf-8 \xe9\u20ac')
|
||||
|
||||
@testtools.skipIf(six.PY3, 'test specific to Python 2')
|
||||
def test_unicode_or_str_exception(self):
|
||||
# Exception with __str__() and __unicode__() methods
|
||||
class UnicodeOrStrException(Exception):
|
||||
def __init__(self, unicode_value, str_value):
|
||||
Exception.__init__(self)
|
||||
self.unicode_value = unicode_value
|
||||
self.str_value = str_value
|
||||
|
||||
def __unicode__(self):
|
||||
return self.unicode_value
|
||||
|
||||
def __str__(self):
|
||||
return self.str_value
|
||||
|
||||
# __unicode__() returns unicode
|
||||
exc = UnicodeOrStrException(u'unicode \xe9\u20ac', b'str')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'unicode \xe9\u20ac')
|
||||
|
||||
# __unicode__() returns bytes (does this case really happen in the
|
||||
# wild?)
|
||||
exc = UnicodeOrStrException(b'utf-8 \xc3\xa9\xe2\x82\xac', b'str')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'utf-8 \xe9\u20ac')
|
||||
|
||||
@testtools.skipIf(six.PY3, 'test specific to Python 2')
|
||||
def test_unicode_only_exception(self):
|
||||
# Exception with a __unicode__() method and a __str__() which
|
||||
# raises an exception (similar to the Message class of oslo_i18n)
|
||||
class UnicodeOnlyException(Exception):
|
||||
def __init__(self, value):
|
||||
Exception.__init__(self)
|
||||
self.value = value
|
||||
|
||||
def __unicode__(self):
|
||||
return self.value
|
||||
|
||||
def __str__(self):
|
||||
raise UnicodeError("use unicode()")
|
||||
|
||||
# __unicode__() returns unicode
|
||||
exc = UnicodeOnlyException(u'unicode \xe9\u20ac')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'unicode \xe9\u20ac')
|
||||
|
||||
# __unicode__() returns bytes
|
||||
exc = UnicodeOnlyException(b'utf-8 \xc3\xa9\xe2\x82\xac')
|
||||
self.assertEqual(encodeutils.exception_to_unicode(exc),
|
||||
u'utf-8 \xe9\u20ac')
|
||||
|
Loading…
x
Reference in New Issue
Block a user