Split strutils into 2 different modules

This patch pulls encoding related functions out of strutils into its own
encodeutils module. We could probably find a better name for strutils
now, although it seems short and contextualized enough.

Partially-implements blueprint: graduate-oslo-utils

Change-Id: Ib76065823c8a1b56020f14cea80b6d73e150aa49
This commit is contained in:
Flavio Percoco 2014-07-11 19:38:42 +02:00
parent 5621114c62
commit 7687a04ea4
4 changed files with 157 additions and 115 deletions

89
oslo/utils/encodeutils.py Normal file
View File

@ -0,0 +1,89 @@
# Copyright 2014 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import sys
import six
def safe_decode(text, incoming=None, errors='strict'):
"""Decodes incoming text/bytes string using `incoming` if they're not
already unicode.
:param incoming: Text's current encoding
:param errors: Errors handling policy. See here for valid
values http://docs.python.org/2/library/codecs.html
:returns: text or a unicode `incoming` encoded
representation of it.
:raises TypeError: If text is not an instance of str
"""
if not isinstance(text, (six.string_types, six.binary_type)):
raise TypeError("%s can't be decoded" % type(text))
if isinstance(text, six.text_type):
return text
if not incoming:
incoming = (sys.stdin.encoding or
sys.getdefaultencoding())
try:
return text.decode(incoming, errors)
except UnicodeDecodeError:
# Note(flaper87) If we get here, it means that
# sys.stdin.encoding / sys.getdefaultencoding
# didn't return a suitable encoding to decode
# text. This happens mostly when global LANG
# var is not set correctly and there's no
# default encoding. In this case, most likely
# python will use ASCII or ANSI encoders as
# default encodings but they won't be capable
# of decoding non-ASCII characters.
#
# Also, UTF-8 is being used since it's an ASCII
# extension.
return text.decode('utf-8', errors)
def safe_encode(text, incoming=None,
encoding='utf-8', errors='strict'):
"""Encodes incoming text/bytes string using `encoding`.
If incoming is not specified, text is expected to be encoded with
current python's default encoding. (`sys.getdefaultencoding`)
:param incoming: Text's current encoding
:param encoding: Expected encoding for text (Default UTF-8)
:param errors: Errors handling policy. See here for valid
values http://docs.python.org/2/library/codecs.html
:returns: text or a bytestring `encoding` encoded
representation of it.
:raises TypeError: If text is not an instance of str
"""
if not isinstance(text, (six.string_types, six.binary_type)):
raise TypeError("%s can't be encoded" % type(text))
if not incoming:
incoming = (sys.stdin.encoding or
sys.getdefaultencoding())
if isinstance(text, six.text_type):
return text.encode(encoding, errors)
elif text and encoding != incoming:
# Decode text before encoding it with `encoding`
text = safe_decode(text, incoming, errors)
return text.encode(encoding, errors)
else:
return text

View File

@ -19,11 +19,11 @@ System-level utilities and helper functions.
import math
import re
import sys
import unicodedata
import six
from oslo.utils import encodeutils
from oslo.utils.openstack.common.gettextutils import _
@ -97,77 +97,6 @@ def bool_from_string(subject, strict=False, default=False):
return default
def safe_decode(text, incoming=None, errors='strict'):
"""Decodes incoming text/bytes string using `incoming` if they're not
already unicode.
:param incoming: Text's current encoding
:param errors: Errors handling policy. See here for valid
values http://docs.python.org/2/library/codecs.html
:returns: text or a unicode `incoming` encoded
representation of it.
:raises TypeError: If text is not an instance of str
"""
if not isinstance(text, (six.string_types, six.binary_type)):
raise TypeError("%s can't be decoded" % type(text))
if isinstance(text, six.text_type):
return text
if not incoming:
incoming = (sys.stdin.encoding or
sys.getdefaultencoding())
try:
return text.decode(incoming, errors)
except UnicodeDecodeError:
# Note(flaper87) If we get here, it means that
# sys.stdin.encoding / sys.getdefaultencoding
# didn't return a suitable encoding to decode
# text. This happens mostly when global LANG
# var is not set correctly and there's no
# default encoding. In this case, most likely
# python will use ASCII or ANSI encoders as
# default encodings but they won't be capable
# of decoding non-ASCII characters.
#
# Also, UTF-8 is being used since it's an ASCII
# extension.
return text.decode('utf-8', errors)
def safe_encode(text, incoming=None,
encoding='utf-8', errors='strict'):
"""Encodes incoming text/bytes string using `encoding`.
If incoming is not specified, text is expected to be encoded with
current python's default encoding. (`sys.getdefaultencoding`)
:param incoming: Text's current encoding
:param encoding: Expected encoding for text (Default UTF-8)
:param errors: Errors handling policy. See here for valid
values http://docs.python.org/2/library/codecs.html
:returns: text or a bytestring `encoding` encoded
representation of it.
:raises TypeError: If text is not an instance of str
"""
if not isinstance(text, (six.string_types, six.binary_type)):
raise TypeError("%s can't be encoded" % type(text))
if not incoming:
incoming = (sys.stdin.encoding or
sys.getdefaultencoding())
if isinstance(text, six.text_type):
return text.encode(encoding, errors)
elif text and encoding != incoming:
# Decode text before encoding it with `encoding`
text = safe_decode(text, incoming, errors)
return text.encode(encoding, errors)
else:
return text
def string_to_bytes(text, unit_system='IEC', return_int=False):
"""Converts a string into an float representation of bytes.
@ -229,7 +158,7 @@ def to_slug(value, incoming=None, errors="strict"):
:returns: slugified unicode representation of `value`
:raises TypeError: If text is not an instance of str
"""
value = safe_decode(value, incoming, errors)
value = encodeutils.safe_decode(value, incoming, errors)
# NOTE(aababilov): no need to use safe_(encode|decode) here:
# encodings are always "ascii", error handling is always "ignore"
# and types are always known (first: unicode; second: str)

View File

@ -143,48 +143,6 @@ class StrUtilsTest(test_base.BaseTestCase):
self.assertEqual(1, strutils.int_from_bool_as_string(True))
self.assertEqual(0, strutils.int_from_bool_as_string(False))
def test_safe_decode(self):
safe_decode = strutils.safe_decode
self.assertRaises(TypeError, safe_decode, True)
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
incoming="utf-8"))
if six.PY2:
# In Python 3, bytes.decode() doesn't support anymore
# bytes => bytes encodings like base64
self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
incoming='base64'))
self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
errors='ignore'))
self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
incoming='iso-8859-1'))
# Forcing incoming to ascii so it falls back to utf-8
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
incoming='ascii'))
self.assertEqual(six.u('foo'), safe_decode(b'foo'))
def test_safe_encode(self):
safe_encode = strutils.safe_encode
self.assertRaises(TypeError, safe_encode, True)
self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
encoding="utf-8"))
if six.PY2:
# In Python 3, str.encode() doesn't support anymore
# text => text encodings like base64
self.assertEqual(six.b("dGVzdA==\n"),
safe_encode("test", encoding='base64'))
self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
encoding="iso-8859-1",
incoming="utf-8"))
# Forcing incoming to ascii so it falls back to utf-8
self.assertEqual(six.b('ni\xc3\xb1o'),
safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))
def test_slugify(self):
to_slug = strutils.to_slug
self.assertRaises(TypeError, to_slug, True)

View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
# Copyright 2014 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslotest import base as test_base
import six
from oslo.utils import encodeutils
class EncodeUtilsTest(test_base.BaseTestCase):
def test_safe_decode(self):
safe_decode = encodeutils.safe_decode
self.assertRaises(TypeError, safe_decode, True)
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
incoming="utf-8"))
if six.PY2:
# In Python 3, bytes.decode() doesn't support anymore
# bytes => bytes encodings like base64
self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
incoming='base64'))
self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
errors='ignore'))
self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
incoming='iso-8859-1'))
# Forcing incoming to ascii so it falls back to utf-8
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
incoming='ascii'))
self.assertEqual(six.u('foo'), safe_decode(b'foo'))
def test_safe_encode(self):
safe_encode = encodeutils.safe_encode
self.assertRaises(TypeError, safe_encode, True)
self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
encoding="utf-8"))
if six.PY2:
# In Python 3, str.encode() doesn't support anymore
# text => text encodings like base64
self.assertEqual(six.b("dGVzdA==\n"),
safe_encode("test", encoding='base64'))
self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
encoding="iso-8859-1",
incoming="utf-8"))
# Forcing incoming to ascii so it falls back to utf-8
self.assertEqual(six.b('ni\xc3\xb1o'),
safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))