Split strutils into 2 different modules
This patch pulls encoding related functions out of strutils into its own encodeutils module. We could probably find a better name for strutils now, although it seems short and contextualized enough. Partially-implements blueprint: graduate-oslo-utils Change-Id: Ib76065823c8a1b56020f14cea80b6d73e150aa49
This commit is contained in:
parent
5621114c62
commit
7687a04ea4
89
oslo/utils/encodeutils.py
Normal file
89
oslo/utils/encodeutils.py
Normal file
@ -0,0 +1,89 @@
|
||||
# Copyright 2014 Red Hat, Inc.
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import sys
|
||||
|
||||
import six
|
||||
|
||||
|
||||
def safe_decode(text, incoming=None, errors='strict'):
|
||||
"""Decodes incoming text/bytes string using `incoming` if they're not
|
||||
already unicode.
|
||||
|
||||
:param incoming: Text's current encoding
|
||||
:param errors: Errors handling policy. See here for valid
|
||||
values http://docs.python.org/2/library/codecs.html
|
||||
:returns: text or a unicode `incoming` encoded
|
||||
representation of it.
|
||||
:raises TypeError: If text is not an instance of str
|
||||
"""
|
||||
if not isinstance(text, (six.string_types, six.binary_type)):
|
||||
raise TypeError("%s can't be decoded" % type(text))
|
||||
|
||||
if isinstance(text, six.text_type):
|
||||
return text
|
||||
|
||||
if not incoming:
|
||||
incoming = (sys.stdin.encoding or
|
||||
sys.getdefaultencoding())
|
||||
|
||||
try:
|
||||
return text.decode(incoming, errors)
|
||||
except UnicodeDecodeError:
|
||||
# Note(flaper87) If we get here, it means that
|
||||
# sys.stdin.encoding / sys.getdefaultencoding
|
||||
# didn't return a suitable encoding to decode
|
||||
# text. This happens mostly when global LANG
|
||||
# var is not set correctly and there's no
|
||||
# default encoding. In this case, most likely
|
||||
# python will use ASCII or ANSI encoders as
|
||||
# default encodings but they won't be capable
|
||||
# of decoding non-ASCII characters.
|
||||
#
|
||||
# Also, UTF-8 is being used since it's an ASCII
|
||||
# extension.
|
||||
return text.decode('utf-8', errors)
|
||||
|
||||
|
||||
def safe_encode(text, incoming=None,
|
||||
encoding='utf-8', errors='strict'):
|
||||
"""Encodes incoming text/bytes string using `encoding`.
|
||||
|
||||
If incoming is not specified, text is expected to be encoded with
|
||||
current python's default encoding. (`sys.getdefaultencoding`)
|
||||
|
||||
:param incoming: Text's current encoding
|
||||
:param encoding: Expected encoding for text (Default UTF-8)
|
||||
:param errors: Errors handling policy. See here for valid
|
||||
values http://docs.python.org/2/library/codecs.html
|
||||
:returns: text or a bytestring `encoding` encoded
|
||||
representation of it.
|
||||
:raises TypeError: If text is not an instance of str
|
||||
"""
|
||||
if not isinstance(text, (six.string_types, six.binary_type)):
|
||||
raise TypeError("%s can't be encoded" % type(text))
|
||||
|
||||
if not incoming:
|
||||
incoming = (sys.stdin.encoding or
|
||||
sys.getdefaultencoding())
|
||||
|
||||
if isinstance(text, six.text_type):
|
||||
return text.encode(encoding, errors)
|
||||
elif text and encoding != incoming:
|
||||
# Decode text before encoding it with `encoding`
|
||||
text = safe_decode(text, incoming, errors)
|
||||
return text.encode(encoding, errors)
|
||||
else:
|
||||
return text
|
@ -19,11 +19,11 @@ System-level utilities and helper functions.
|
||||
|
||||
import math
|
||||
import re
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
import six
|
||||
|
||||
from oslo.utils import encodeutils
|
||||
from oslo.utils.openstack.common.gettextutils import _
|
||||
|
||||
|
||||
@ -97,77 +97,6 @@ def bool_from_string(subject, strict=False, default=False):
|
||||
return default
|
||||
|
||||
|
||||
def safe_decode(text, incoming=None, errors='strict'):
|
||||
"""Decodes incoming text/bytes string using `incoming` if they're not
|
||||
already unicode.
|
||||
|
||||
:param incoming: Text's current encoding
|
||||
:param errors: Errors handling policy. See here for valid
|
||||
values http://docs.python.org/2/library/codecs.html
|
||||
:returns: text or a unicode `incoming` encoded
|
||||
representation of it.
|
||||
:raises TypeError: If text is not an instance of str
|
||||
"""
|
||||
if not isinstance(text, (six.string_types, six.binary_type)):
|
||||
raise TypeError("%s can't be decoded" % type(text))
|
||||
|
||||
if isinstance(text, six.text_type):
|
||||
return text
|
||||
|
||||
if not incoming:
|
||||
incoming = (sys.stdin.encoding or
|
||||
sys.getdefaultencoding())
|
||||
|
||||
try:
|
||||
return text.decode(incoming, errors)
|
||||
except UnicodeDecodeError:
|
||||
# Note(flaper87) If we get here, it means that
|
||||
# sys.stdin.encoding / sys.getdefaultencoding
|
||||
# didn't return a suitable encoding to decode
|
||||
# text. This happens mostly when global LANG
|
||||
# var is not set correctly and there's no
|
||||
# default encoding. In this case, most likely
|
||||
# python will use ASCII or ANSI encoders as
|
||||
# default encodings but they won't be capable
|
||||
# of decoding non-ASCII characters.
|
||||
#
|
||||
# Also, UTF-8 is being used since it's an ASCII
|
||||
# extension.
|
||||
return text.decode('utf-8', errors)
|
||||
|
||||
|
||||
def safe_encode(text, incoming=None,
|
||||
encoding='utf-8', errors='strict'):
|
||||
"""Encodes incoming text/bytes string using `encoding`.
|
||||
|
||||
If incoming is not specified, text is expected to be encoded with
|
||||
current python's default encoding. (`sys.getdefaultencoding`)
|
||||
|
||||
:param incoming: Text's current encoding
|
||||
:param encoding: Expected encoding for text (Default UTF-8)
|
||||
:param errors: Errors handling policy. See here for valid
|
||||
values http://docs.python.org/2/library/codecs.html
|
||||
:returns: text or a bytestring `encoding` encoded
|
||||
representation of it.
|
||||
:raises TypeError: If text is not an instance of str
|
||||
"""
|
||||
if not isinstance(text, (six.string_types, six.binary_type)):
|
||||
raise TypeError("%s can't be encoded" % type(text))
|
||||
|
||||
if not incoming:
|
||||
incoming = (sys.stdin.encoding or
|
||||
sys.getdefaultencoding())
|
||||
|
||||
if isinstance(text, six.text_type):
|
||||
return text.encode(encoding, errors)
|
||||
elif text and encoding != incoming:
|
||||
# Decode text before encoding it with `encoding`
|
||||
text = safe_decode(text, incoming, errors)
|
||||
return text.encode(encoding, errors)
|
||||
else:
|
||||
return text
|
||||
|
||||
|
||||
def string_to_bytes(text, unit_system='IEC', return_int=False):
|
||||
"""Converts a string into an float representation of bytes.
|
||||
|
||||
@ -229,7 +158,7 @@ def to_slug(value, incoming=None, errors="strict"):
|
||||
:returns: slugified unicode representation of `value`
|
||||
:raises TypeError: If text is not an instance of str
|
||||
"""
|
||||
value = safe_decode(value, incoming, errors)
|
||||
value = encodeutils.safe_decode(value, incoming, errors)
|
||||
# NOTE(aababilov): no need to use safe_(encode|decode) here:
|
||||
# encodings are always "ascii", error handling is always "ignore"
|
||||
# and types are always known (first: unicode; second: str)
|
||||
|
@ -143,48 +143,6 @@ class StrUtilsTest(test_base.BaseTestCase):
|
||||
self.assertEqual(1, strutils.int_from_bool_as_string(True))
|
||||
self.assertEqual(0, strutils.int_from_bool_as_string(False))
|
||||
|
||||
def test_safe_decode(self):
|
||||
safe_decode = strutils.safe_decode
|
||||
self.assertRaises(TypeError, safe_decode, True)
|
||||
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
|
||||
incoming="utf-8"))
|
||||
if six.PY2:
|
||||
# In Python 3, bytes.decode() doesn't support anymore
|
||||
# bytes => bytes encodings like base64
|
||||
self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
|
||||
incoming='base64'))
|
||||
|
||||
self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
|
||||
errors='ignore'))
|
||||
|
||||
self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
|
||||
incoming='iso-8859-1'))
|
||||
|
||||
# Forcing incoming to ascii so it falls back to utf-8
|
||||
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
|
||||
incoming='ascii'))
|
||||
|
||||
self.assertEqual(six.u('foo'), safe_decode(b'foo'))
|
||||
|
||||
def test_safe_encode(self):
|
||||
safe_encode = strutils.safe_encode
|
||||
self.assertRaises(TypeError, safe_encode, True)
|
||||
self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
|
||||
encoding="utf-8"))
|
||||
if six.PY2:
|
||||
# In Python 3, str.encode() doesn't support anymore
|
||||
# text => text encodings like base64
|
||||
self.assertEqual(six.b("dGVzdA==\n"),
|
||||
safe_encode("test", encoding='base64'))
|
||||
self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
|
||||
encoding="iso-8859-1",
|
||||
incoming="utf-8"))
|
||||
|
||||
# Forcing incoming to ascii so it falls back to utf-8
|
||||
self.assertEqual(six.b('ni\xc3\xb1o'),
|
||||
safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
|
||||
self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))
|
||||
|
||||
def test_slugify(self):
|
||||
to_slug = strutils.to_slug
|
||||
self.assertRaises(TypeError, to_slug, True)
|
||||
|
66
tests/tests_encodeutils.py
Normal file
66
tests/tests_encodeutils.py
Normal file
@ -0,0 +1,66 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014 Red Hat, Inc.
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from oslotest import base as test_base
|
||||
import six
|
||||
|
||||
from oslo.utils import encodeutils
|
||||
|
||||
|
||||
class EncodeUtilsTest(test_base.BaseTestCase):
|
||||
|
||||
def test_safe_decode(self):
|
||||
safe_decode = encodeutils.safe_decode
|
||||
self.assertRaises(TypeError, safe_decode, True)
|
||||
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
|
||||
incoming="utf-8"))
|
||||
if six.PY2:
|
||||
# In Python 3, bytes.decode() doesn't support anymore
|
||||
# bytes => bytes encodings like base64
|
||||
self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
|
||||
incoming='base64'))
|
||||
|
||||
self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
|
||||
errors='ignore'))
|
||||
|
||||
self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
|
||||
incoming='iso-8859-1'))
|
||||
|
||||
# Forcing incoming to ascii so it falls back to utf-8
|
||||
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
|
||||
incoming='ascii'))
|
||||
|
||||
self.assertEqual(six.u('foo'), safe_decode(b'foo'))
|
||||
|
||||
def test_safe_encode(self):
|
||||
safe_encode = encodeutils.safe_encode
|
||||
self.assertRaises(TypeError, safe_encode, True)
|
||||
self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
|
||||
encoding="utf-8"))
|
||||
if six.PY2:
|
||||
# In Python 3, str.encode() doesn't support anymore
|
||||
# text => text encodings like base64
|
||||
self.assertEqual(six.b("dGVzdA==\n"),
|
||||
safe_encode("test", encoding='base64'))
|
||||
self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
|
||||
encoding="iso-8859-1",
|
||||
incoming="utf-8"))
|
||||
|
||||
# Forcing incoming to ascii so it falls back to utf-8
|
||||
self.assertEqual(six.b('ni\xc3\xb1o'),
|
||||
safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
|
||||
self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))
|
Loading…
x
Reference in New Issue
Block a user