Sync oslo imageutils, strutils to Ironic

Changes in imageutils: =============================== 9e88af1 fixed typos found by RETF rules b455fac Enable decimal value input in imageutils.QemuImgInfo bec3a5e Implements SI/IEC unit system conversion to bytes 8b2b0b7 Use hacking import_exceptions for gettextutils._ aad179d Fixing misspelled encryption key in QemuImgInfo 12bcdb7 Remove vim header 2bd46eb Refactors byte size extraction logic 0356685 Adds common image utils Changes in strutils: ==================== cb5a804 Move `mask_password` to strutils 8a0f567 Remove str() from LOG.* and exceptions fd18c28 Fix safe_encode(): return bytes on Python 3 302c7c8 strutils: Allow safe_{encode,decode} to take bytes as input bec3a5e Implements SI/IEC unit system conversion to bytes e53fe85 strutils bool_from_string, allow specified default 8b2b0b7 Use hacking import_exceptions for gettextutils._ 84d461e Fix a bug in safe_encode where it returns a bytes object in py3 12bcdb7 Remove vim header Change-Id: If0d16d70c004344511b9851c8a533759f0d2bb52
2014-08-05 11:03:33 +08:00 · 2014-08-05 11:03:33 +08:00 · 5d4d79fbd1
commit 5d4d79fbd1
parent 797fdbead2
4 changed files with 276 additions and 162 deletions
--- a/ironic/common/images.py
+++ b/ironic/common/images.py
@ -20,7 +20,6 @@ Handling of VM disk images.
 """
 import os
 import re
 from oslo.config import cfg
@ -28,8 +27,8 @@ from ironic.common import exception
 from ironic.common import image_service as service
 from ironic.common import utils
 from ironic.openstack.common import fileutils
 from ironic.openstack.common import imageutils
 from ironic.openstack.common import log as logging
 from ironic.openstack.common import strutils
 LOG = logging.getLogger(__name__)
@ -43,129 +42,14 @@ CONF = cfg.CONF
 CONF.register_opts(image_opts)
 class QemuImgInfo(object):
    BACKING_FILE_RE = re.compile((r"^(.*?)\s*\(actual\s+path\s*:"
                                  r"\s+(.*?)\)\s*$"), re.I)
    TOP_LEVEL_RE = re.compile(r"^([\w\d\s\_\-]+):(.*)$")
    SIZE_RE = re.compile(r"\(\s*(\d+)\s+bytes\s*\)", re.I)
    def __init__(self, cmd_output=None):
        details = self._parse(cmd_output or '')
        self.image = details.get('image')
        self.backing_file = details.get('backing_file')
        self.file_format = details.get('file_format')
        self.virtual_size = details.get('virtual_size')
        self.cluster_size = details.get('cluster_size')
        self.disk_size = details.get('disk_size')
        self.snapshots = details.get('snapshot_list', [])
        self.encryption = details.get('encryption')
    def __str__(self):
        lines = [
            'image: %s' % self.image,
            'file_format: %s' % self.file_format,
            'virtual_size: %s' % self.virtual_size,
            'disk_size: %s' % self.disk_size,
            'cluster_size: %s' % self.cluster_size,
            'backing_file: %s' % self.backing_file,
        ]
        if self.snapshots:
            lines.append("snapshots: %s" % self.snapshots)
        return "\n".join(lines)
    def _canonicalize(self, field):
        # Standardize on underscores/lc/no dash and no spaces
        # since qemu seems to have mixed outputs here... and
        # this format allows for better integration with python
        # - ie for usage in kwargs and such...
        field = field.lower().strip()
        return re.sub('[ -]', '_', field)
    def _extract_bytes(self, details):
        # Replace it with the byte amount
        real_size = self.SIZE_RE.search(details)
        if real_size:
            details = real_size.group(1)
        try:
            details = strutils.to_bytes(details)
        except (TypeError):
            pass
        return details
    def _extract_details(self, root_cmd, root_details, lines_after):
        real_details = root_details
        if root_cmd == 'backing_file':
            # Replace it with the real backing file
            backing_match = self.BACKING_FILE_RE.match(root_details)
            if backing_match:
                real_details = backing_match.group(2).strip()
        elif root_cmd in ['virtual_size', 'cluster_size', 'disk_size']:
            # Replace it with the byte amount (if we can convert it)
            real_details = self._extract_bytes(root_details)
        elif root_cmd == 'file_format':
            real_details = real_details.strip().lower()
        elif root_cmd == 'snapshot_list':
            # Next line should be a header, starting with 'ID'
            if not lines_after or not lines_after[0].startswith("ID"):
                msg = _("Snapshot list encountered but no header found!")
                raise ValueError(msg)
            del lines_after[0]
            real_details = []
            # This is the sprintf pattern we will try to match
            # "%-10s%-20s%7s%20s%15s"
            # ID TAG VM SIZE DATE VM CLOCK (current header)
            while lines_after:
                line = lines_after[0]
                line_pieces = line.split()
                if len(line_pieces) != 6:
                    break
                # Check against this pattern in the final position
                # "%02d:%02d:%02d.%03d"
                date_pieces = line_pieces[5].split(":")
                if len(date_pieces) != 3:
                    break
                real_details.append({
                    'id': line_pieces[0],
                    'tag': line_pieces[1],
                    'vm_size': line_pieces[2],
                    'date': line_pieces[3],
                    'vm_clock': line_pieces[4] + " " + line_pieces[5],
                })
                del lines_after[0]
        return real_details
    def _parse(self, cmd_output):
        # Analysis done of qemu-img.c to figure out what is going on here
        # Find all points start with some chars and then a ':' then a newline
        # and then handle the results of those 'top level' items in a separate
        # function.
        #
        # TODO(harlowja): newer versions might have a json output format
        #                 we should switch to that whenever possible.
        #                 see: http://bit.ly/XLJXDX
        contents = {}
        lines = [x for x in cmd_output.splitlines() if x.strip()]
        while lines:
            line = lines.pop(0)
            top_level = self.TOP_LEVEL_RE.match(line)
            if top_level:
                root = self._canonicalize(top_level.group(1))
                if not root:
                    continue
                root_details = top_level.group(2).strip()
                details = self._extract_details(root, root_details, lines)
                contents[root] = details
        return contents
 def qemu_img_info(path):
    """Return an object containing the parsed output from qemu-img info."""
    if not os.path.exists(path):
-        return QemuImgInfo()
+        return imageutils.QemuImgInfo()
    out, err = utils.execute('env', 'LC_ALL=C', 'LANG=C',
                             'qemu-img', 'info', path)
-    return QemuImgInfo(out)
+    return imageutils.QemuImgInfo(out)
 def convert_image(source, dest, out_format, run_as_root=False):
--- a/ironic/openstack/common/imageutils.py
+++ b/ironic/openstack/common/imageutils.py
@ -0,0 +1,152 @@
 # Copyright 2010 United States Government as represented by the
 # Administrator of the National Aeronautics and Space Administration.
 # All Rights Reserved.
 # Copyright (c) 2010 Citrix Systems, Inc.
 #
 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
 #    not use this file except in compliance with the License. You may obtain
 #    a copy of the License at
 #
 #         http://www.apache.org/licenses/LICENSE-2.0
 #
 #    Unless required by applicable law or agreed to in writing, software
 #    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
 """
 Helper methods to deal with images.
 """
 import re
 from ironic.openstack.common.gettextutils import _
 from ironic.openstack.common import strutils
 class QemuImgInfo(object):
    BACKING_FILE_RE = re.compile((r"^(.*?)\s*\(actual\s+path\s*:"
                                  r"\s+(.*?)\)\s*$"), re.I)
    TOP_LEVEL_RE = re.compile(r"^([\w\d\s\_\-]+):(.*)$")
    SIZE_RE = re.compile(r"(\d*\.?\d+)(\w+)?(\s*\(\s*(\d+)\s+bytes\s*\))?",
                         re.I)
    def __init__(self, cmd_output=None):
        details = self._parse(cmd_output or '')
        self.image = details.get('image')
        self.backing_file = details.get('backing_file')
        self.file_format = details.get('file_format')
        self.virtual_size = details.get('virtual_size')
        self.cluster_size = details.get('cluster_size')
        self.disk_size = details.get('disk_size')
        self.snapshots = details.get('snapshot_list', [])
        self.encrypted = details.get('encrypted')
    def __str__(self):
        lines = [
            'image: %s' % self.image,
            'file_format: %s' % self.file_format,
            'virtual_size: %s' % self.virtual_size,
            'disk_size: %s' % self.disk_size,
            'cluster_size: %s' % self.cluster_size,
            'backing_file: %s' % self.backing_file,
        ]
        if self.snapshots:
            lines.append("snapshots: %s" % self.snapshots)
        if self.encrypted:
            lines.append("encrypted: %s" % self.encrypted)
        return "\n".join(lines)
    def _canonicalize(self, field):
        # Standardize on underscores/lc/no dash and no spaces
        # since qemu seems to have mixed outputs here... and
        # this format allows for better integration with python
        # - i.e. for usage in kwargs and such...
        field = field.lower().strip()
        for c in (" ", "-"):
            field = field.replace(c, '_')
        return field
    def _extract_bytes(self, details):
        # Replace it with the byte amount
        real_size = self.SIZE_RE.search(details)
        if not real_size:
            raise ValueError(_('Invalid input value "%s".') % details)
        magnitude = real_size.group(1)
        unit_of_measure = real_size.group(2)
        bytes_info = real_size.group(3)
        if bytes_info:
            return int(real_size.group(4))
        elif not unit_of_measure:
            return int(magnitude)
        return strutils.string_to_bytes('%s%sB' % (magnitude, unit_of_measure),
                                        return_int=True)
    def _extract_details(self, root_cmd, root_details, lines_after):
        real_details = root_details
        if root_cmd == 'backing_file':
            # Replace it with the real backing file
            backing_match = self.BACKING_FILE_RE.match(root_details)
            if backing_match:
                real_details = backing_match.group(2).strip()
        elif root_cmd in ['virtual_size', 'cluster_size', 'disk_size']:
            # Replace it with the byte amount (if we can convert it)
            if root_details == 'None':
                real_details = 0
            else:
                real_details = self._extract_bytes(root_details)
        elif root_cmd == 'file_format':
            real_details = real_details.strip().lower()
        elif root_cmd == 'snapshot_list':
            # Next line should be a header, starting with 'ID'
            if not lines_after or not lines_after[0].startswith("ID"):
                msg = _("Snapshot list encountered but no header found!")
                raise ValueError(msg)
            del lines_after[0]
            real_details = []
            # This is the sprintf pattern we will try to match
            # "%-10s%-20s%7s%20s%15s"
            # ID TAG VM SIZE DATE VM CLOCK (current header)
            while lines_after:
                line = lines_after[0]
                line_pieces = line.split()
                if len(line_pieces) != 6:
                    break
                # Check against this pattern in the final position
                # "%02d:%02d:%02d.%03d"
                date_pieces = line_pieces[5].split(":")
                if len(date_pieces) != 3:
                    break
                real_details.append({
                    'id': line_pieces[0],
                    'tag': line_pieces[1],
                    'vm_size': line_pieces[2],
                    'date': line_pieces[3],
                    'vm_clock': line_pieces[4] + " " + line_pieces[5],
                })
                del lines_after[0]
        return real_details
    def _parse(self, cmd_output):
        # Analysis done of qemu-img.c to figure out what is going on here
        # Find all points start with some chars and then a ':' then a newline
        # and then handle the results of those 'top level' items in a separate
        # function.
        #
        # TODO(harlowja): newer versions might have a json output format
        #                 we should switch to that whenever possible.
        #                 see: http://bit.ly/XLJXDX
        contents = {}
        lines = [x for x in cmd_output.splitlines() if x.strip()]
        while lines:
            line = lines.pop(0)
            top_level = self.TOP_LEVEL_RE.match(line)
            if top_level:
                root = self._canonicalize(top_level.group(1))
                if not root:
                    continue
                root_details = top_level.group(2).strip()
                details = self._extract_details(root, root_details, lines)
                contents[root] = details
        return contents
--- a/ironic/openstack/common/strutils.py
+++ b/ironic/openstack/common/strutils.py
@ -1,5 +1,3 @@
 # vim: tabstop=4 shiftwidth=4 softtabstop=4
 # Copyright 2011 OpenStack Foundation.
 # All Rights Reserved.
 #
@ -19,25 +17,31 @@
 System-level utilities and helper functions.
 """
 import math
 import re
 import sys
 import unicodedata
 import six
-from ironic.openstack.common.gettextutils import _  # noqa
+from ironic.openstack.common.gettextutils import _
-# Used for looking up extensions of text
+UNIT_PREFIX_EXPONENT = {
-# to their 'multiplied' byte amount
+    'k': 1,
-BYTE_MULTIPLIERS = {
+    'K': 1,
-    '': 1,
+    'Ki': 1,
-    't': 1024 ** 4,
+    'M': 2,
-    'g': 1024 ** 3,
+    'Mi': 2,
-    'm': 1024 ** 2,
+    'G': 3,
-    'k': 1024,
+    'Gi': 3,
    'T': 4,
    'Ti': 4,
 }
 UNIT_SYSTEM_INFO = {
    'IEC': (1024, re.compile(r'(^[-+]?\d*\.?\d+)([KMGT]i?)?(b|bit|B)$')),
    'SI': (1000, re.compile(r'(^[-+]?\d*\.?\d+)([kMGT])?(b|bit|B)$')),
 }
 BYTE_REGEX = re.compile(r'(^-?\d+)(\D*)')
 TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes')
 FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no')
@ -46,6 +50,28 @@ SLUGIFY_STRIP_RE = re.compile(r"[^\w\s-]")
 SLUGIFY_HYPHENATE_RE = re.compile(r"[-\s]+")
 # NOTE(flaper87): The following 3 globals are used by `mask_password`
 _SANITIZE_KEYS = ['adminPass', 'admin_pass', 'password', 'admin_password']
 # NOTE(ldbragst): Let's build a list of regex objects using the list of
 # _SANITIZE_KEYS we already have. This way, we only have to add the new key
 # to the list of _SANITIZE_KEYS and we can generate regular expressions
 # for XML and JSON automatically.
 _SANITIZE_PATTERNS = []
 _FORMAT_PATTERNS = [r'(%(key)s\s*[=]\s*[\"\']).*?([\"\'])',
                    r'(<%(key)s>).*?(</%(key)s>)',
                    r'([\"\']%(key)s[\"\']\s*:\s*[\"\']).*?([\"\'])',
                    r'([\'"].*?%(key)s[\'"]\s*:\s*u?[\'"]).*?([\'"])',
                    r'([\'"].*?%(key)s[\'"]\s*,\s*\'--?[A-z]+\'\s*,\s*u?[\'"])'
                    '.*?([\'"])',
                    r'(%(key)s\s*--?[A-z]+\s*)\S+(\s*)']
 for key in _SANITIZE_KEYS:
    for pattern in _FORMAT_PATTERNS:
        reg_ex = re.compile(pattern % {'key': key}, re.DOTALL)
        _SANITIZE_PATTERNS.append(reg_ex)
 def int_from_bool_as_string(subject):
    """Interpret a string as a boolean and return either 1 or 0.
@ -60,12 +86,12 @@ def int_from_bool_as_string(subject):
    return bool_from_string(subject) and 1 or 0
-def bool_from_string(subject, strict=False):
+def bool_from_string(subject, strict=False, default=False):
    """Interpret a string as a boolean.
    A case-insensitive match is performed such that strings matching 't',
    'true', 'on', 'y', 'yes', or '1' are considered True and, when
-    `strict=False`, anything else is considered False.
+    `strict=False`, anything else returns the value specified by 'default'.
    Useful for JSON-decoded stuff and config file parsing.
@ -74,7 +100,7 @@ def bool_from_string(subject, strict=False):
    Strings yielding False are 'f', 'false', 'off', 'n', 'no', or '0'.
    """
    if not isinstance(subject, six.string_types):
-        subject = str(subject)
+        subject = six.text_type(subject)
    lowered = subject.strip().lower()
@ -90,11 +116,12 @@ def bool_from_string(subject, strict=False):
                                      'acceptable': acceptable}
        raise ValueError(msg)
    else:
-        return False
+        return default
 def safe_decode(text, incoming=None, errors='strict'):
-    """Decodes incoming str using `incoming` if they're not already unicode.
+    """Decodes incoming text/bytes string using `incoming` if they're not
       already unicode.
    :param incoming: Text's current encoding
    :param errors: Errors handling policy. See here for valid
@ -103,7 +130,7 @@ def safe_decode(text, incoming=None, errors='strict'):
                representation of it.
    :raises TypeError: If text is not an instance of str
    """
-    if not isinstance(text, six.string_types):
+    if not isinstance(text, (six.string_types, six.binary_type)):
        raise TypeError("%s can't be decoded" % type(text))
    if isinstance(text, six.text_type):
@ -133,7 +160,7 @@ def safe_decode(text, incoming=None, errors='strict'):
 def safe_encode(text, incoming=None,
                encoding='utf-8', errors='strict'):
-    """Encodes incoming str/unicode using `encoding`.
+    """Encodes incoming text/bytes string using `encoding`.
    If incoming is not specified, text is expected to be encoded with
    current python's default encoding. (`sys.getdefaultencoding`)
@ -146,7 +173,7 @@ def safe_encode(text, incoming=None,
                representation of it.
    :raises TypeError: If text is not an instance of str
    """
-    if not isinstance(text, six.string_types):
+    if not isinstance(text, (six.string_types, six.binary_type)):
        raise TypeError("%s can't be encoded" % type(text))
    if not incoming:
@ -159,38 +186,54 @@ def safe_encode(text, incoming=None,
        # Decode text before encoding it with `encoding`
        text = safe_decode(text, incoming, errors)
        return text.encode(encoding, errors)
-
+    else:
-    return text
+        return text
-def to_bytes(text, default=0):
+def string_to_bytes(text, unit_system='IEC', return_int=False):
-    """Converts a string into an integer of bytes.
+    """Converts a string into an float representation of bytes.
-    Looks at the last characters of the text to determine
+    The units supported for IEC ::
-    what conversion is needed to turn the input text into a byte number.
+
-    Supports "B, K(B), M(B), G(B), and T(B)". (case insensitive)
+        Kb(it), Kib(it), Mb(it), Mib(it), Gb(it), Gib(it), Tb(it), Tib(it)
        KB, KiB, MB, MiB, GB, GiB, TB, TiB
    The units supported for SI ::
        kb(it), Mb(it), Gb(it), Tb(it)
        kB, MB, GB, TB
    Note that the SI unit system does not support capital letter 'K'
    :param text: String input for bytes size conversion.
-    :param default: Default return value when text is blank.
+    :param unit_system: Unit system for byte size conversion.
    :param return_int: If True, returns integer representation of text
                       in bytes. (default: decimal)
    :returns: Numerical representation of text in bytes.
    :raises ValueError: If text has an invalid value.
    """
-    match = BYTE_REGEX.search(text)
+    try:
        base, reg_ex = UNIT_SYSTEM_INFO[unit_system]
    except KeyError:
        msg = _('Invalid unit system: "%s"') % unit_system
        raise ValueError(msg)
    match = reg_ex.match(text)
    if match:
-        magnitude = int(match.group(1))
+        magnitude = float(match.group(1))
-        mult_key_org = match.group(2)
+        unit_prefix = match.group(2)
-        if not mult_key_org:
+        if match.group(3) in ['b', 'bit']:
-            return magnitude
+            magnitude /= 8
    elif text:
        msg = _('Invalid string format: %s') % text
        raise TypeError(msg)
    else:
-        return default
+        msg = _('Invalid string format: %s') % text
-    mult_key = mult_key_org.lower().replace('b', '', 1)
+        raise ValueError(msg)
-    multiplier = BYTE_MULTIPLIERS.get(mult_key)
+    if not unit_prefix:
-    if multiplier is None:
+        res = magnitude
-        msg = _('Unknown byte multiplier: %s') % mult_key_org
+    else:
-        raise TypeError(msg)
+        res = magnitude * pow(base, UNIT_PREFIX_EXPONENT[unit_prefix])
-    return magnitude * multiplier
+    if return_int:
        return int(math.ceil(res))
    return res
 def to_slug(value, incoming=None, errors="strict"):
@ -216,3 +259,37 @@ def to_slug(value, incoming=None, errors="strict"):
        "ascii", "ignore").decode("ascii")
    value = SLUGIFY_STRIP_RE.sub("", value).strip().lower()
    return SLUGIFY_HYPHENATE_RE.sub("-", value)
 def mask_password(message, secret="***"):
    """Replace password with 'secret' in message.
    :param message: The string which includes security information.
    :param secret: value with which to replace passwords.
    :returns: The unicode value of message with the password fields masked.
    For example:
    >>> mask_password("'adminPass' : 'aaaaa'")
    "'adminPass' : '***'"
    >>> mask_password("'admin_pass' : 'aaaaa'")
    "'admin_pass' : '***'"
    >>> mask_password('"password" : "aaaaa"')
    '"password" : "***"'
    >>> mask_password("'original_password' : 'aaaaa'")
    "'original_password' : '***'"
    >>> mask_password("u'original_password' :   u'aaaaa'")
    "u'original_password' :   u'***'"
    """
    message = six.text_type(message)
    # NOTE(ldbragst): Check to see if anything in message contains any key
    # specified in _SANITIZE_KEYS, if not then just return the message since
    # we don't have to mask any passwords.
    if not any(key in message for key in _SANITIZE_KEYS):
        return message
    secret = r'\g<1>' + secret + r'\g<2>'
    for pattern in _SANITIZE_PATTERNS:
        message = re.sub(pattern, secret, message)
    return message
--- a/openstack-common.conf
+++ b/openstack-common.conf
@ -8,6 +8,7 @@ module=eventlet_backdoor
 module=excutils
 module=fileutils
 module=gettextutils
 module=imageutils
 module=importutils
 module=jsonutils
 module=local