Do not use pickle: Use json
Change-Id: Ic4217e82fc72f9a32c13136528e1bc3cc05d0c73 Signed-off-by: Prashanth Pai <ppai@redhat.com>
This commit is contained in:
parent
fbc07d3ad9
commit
435a0dcfc7
162
bin/swiftonfile-migrate-metadata
Executable file
162
bin/swiftonfile-migrate-metadata
Executable file
@ -0,0 +1,162 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright (c) 2015 Red Hat, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import pwd
|
||||
import sys
|
||||
import stat
|
||||
import errno
|
||||
import xattr
|
||||
import cPickle as pickle
|
||||
import multiprocessing
|
||||
|
||||
from optparse import OptionParser
|
||||
from swiftonfile.swift.common.utils import write_metadata, SafeUnpickler, \
|
||||
METADATA_KEY, MAX_XATTR_SIZE
|
||||
|
||||
|
||||
ORIGINAL_EUID = os.geteuid()
|
||||
NOBODY_UID = pwd.getpwnam('nobody').pw_uid
|
||||
|
||||
|
||||
def print_msg(s):
|
||||
global options
|
||||
if options.verbose:
|
||||
print(s)
|
||||
|
||||
|
||||
def clean_metadata(path, key_count):
|
||||
"""
|
||||
Can only be used when you know the key_count. Saves one unnecessarry
|
||||
removexattr() call. Ignores error when file or metadata isn't found.
|
||||
"""
|
||||
for key in xrange(0, key_count):
|
||||
try:
|
||||
xattr.removexattr(path, '%s%s' % (METADATA_KEY, (key or '')))
|
||||
except IOError as err:
|
||||
if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA):
|
||||
print_msg("xattr.removexattr(%s, %s%s) failed: %s" %
|
||||
(path, METADATA_KEY, (key or ''), err.errno))
|
||||
|
||||
|
||||
def process_object(path):
|
||||
|
||||
metastr = ''
|
||||
key_count = 0
|
||||
try:
|
||||
while True:
|
||||
metastr += xattr.getxattr(path, '%s%s' %
|
||||
(METADATA_KEY, (key_count or '')))
|
||||
key_count += 1
|
||||
if len(metastr) < MAX_XATTR_SIZE:
|
||||
# Prevent further getxattr calls
|
||||
break
|
||||
except IOError as err:
|
||||
if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA):
|
||||
print_msg("xattr.getxattr(%s, %s%s) failed: %s" %
|
||||
(path, METADATA_KEY, (key_count or ''), err.errno))
|
||||
|
||||
if not metastr:
|
||||
return
|
||||
|
||||
if metastr.startswith('\x80\x02}') and metastr.endswith('.'):
|
||||
# It's pickled. If unpickling is successful and metadata is
|
||||
# not stale write back the metadata by serializing it.
|
||||
try:
|
||||
os.seteuid(NOBODY_UID) # Drop privileges
|
||||
metadata = SafeUnpickler.loads(metastr)
|
||||
os.seteuid(ORIGINAL_EUID) # Restore privileges
|
||||
assert isinstance(metadata, dict)
|
||||
except (pickle.UnpicklingError, EOFError, AttributeError,
|
||||
IndexError, ImportError, AssertionError):
|
||||
clean_metadata(path, key_count)
|
||||
else:
|
||||
try:
|
||||
# Remove existing metadata first before writing new metadata
|
||||
clean_metadata(path, key_count)
|
||||
write_metadata(path, metadata)
|
||||
print_msg("%s MIGRATED" % (path))
|
||||
except IOError as err:
|
||||
if err.errno not in (errno.ENOENT, errno.ESTALE):
|
||||
raise
|
||||
elif metastr.startswith("{") and metastr.endswith("}"):
|
||||
# It's not pickled and is already serialized, just return
|
||||
print_msg("%s SKIPPED" % (path))
|
||||
else:
|
||||
# Metadata is malformed
|
||||
clean_metadata(path, key_count)
|
||||
print_msg("%s CLEANED" % (path))
|
||||
|
||||
|
||||
def walktree(top, pool, root=True):
|
||||
"""
|
||||
Recursively walk the filesystem tree and migrate metadata of each object
|
||||
found. Unlike os.walk(), this method performs stat() sys call on a
|
||||
file/directory at most only once.
|
||||
"""
|
||||
|
||||
if root:
|
||||
# The root of volume is account which also contains metadata
|
||||
pool.apply_async(process_object, (top, ))
|
||||
|
||||
for f in os.listdir(top):
|
||||
if root and f in (".trashcan", ".glusterfs", "async_pending", "tmp"):
|
||||
continue
|
||||
path = os.path.join(top, f)
|
||||
try:
|
||||
s = os.stat(path)
|
||||
except OSError as err:
|
||||
if err.errno in (errno.ENOENT, errno.ESTALE):
|
||||
continue
|
||||
raise
|
||||
if stat.S_ISLNK(s.st_mode):
|
||||
pass
|
||||
elif stat.S_ISDIR(s.st_mode):
|
||||
pool.apply_async(process_object, (path, ))
|
||||
# Recurse into directory
|
||||
walktree(path, pool, root=False)
|
||||
elif stat.S_ISREG(s.st_mode):
|
||||
pool.apply_async(process_object, (path, ))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
global options
|
||||
|
||||
usage = "usage: %prog [options] volume1_mountpath volume2_mountpath..."
|
||||
description = """Object metadata are stored as \
|
||||
extended attributes of files and directories. This utility migrates metadata \
|
||||
stored in pickled format to JSON format."""
|
||||
parser = OptionParser(usage=usage, description=description)
|
||||
parser.add_option("-v", "--verbose", dest="verbose",
|
||||
action="store_true", default=False,
|
||||
help="Print object paths as they are processed.")
|
||||
(options, mount_paths) = parser.parse_args()
|
||||
|
||||
if len(mount_paths) < 1:
|
||||
print "Mountpoint path(s) missing."
|
||||
parser.print_usage()
|
||||
sys.exit(-1)
|
||||
|
||||
pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2)
|
||||
|
||||
for path in mount_paths:
|
||||
if os.path.isdir(path):
|
||||
walktree(path, pool)
|
||||
|
||||
pool.close()
|
||||
pool.join()
|
@ -46,5 +46,18 @@ disk_chunk_size = 65536
|
||||
# This will provide a reasonable starting point for tuning this value.
|
||||
network_chunk_size = 65536
|
||||
|
||||
# In older versions of swiftonfile, metadata stored as xattrs of files
|
||||
# were serialized using PICKLE format. The PICKLE format is vulnerable to
|
||||
# exploits in deployments where a user has access to backend filesystem.
|
||||
# Deserializing pickled metadata can result in malicious code being
|
||||
# executed if an attacker has stored malicious code as xattr from filesystem
|
||||
# interface. Although, new metadata is always serialized using JSON format,
|
||||
# existing metadata already stored in PICKLE format can be loaded by setting
|
||||
# the following option to 'on'.
|
||||
# You can turn this option to 'off' once you have migrated all your metadata
|
||||
# from PICKLE format to JSON format using swiftonfile-migrate-metadata tool.
|
||||
# This conf option will be deprecated and eventualy removed in future releases
|
||||
# read_pickled_metadata = off
|
||||
|
||||
[object-updater]
|
||||
user = <your-user-name>
|
||||
|
1
setup.py
1
setup.py
@ -42,6 +42,7 @@ setup(
|
||||
install_requires=[],
|
||||
scripts=[
|
||||
'bin/swiftonfile-print-metadata',
|
||||
'bin/swiftonfile-migrate-metadata',
|
||||
],
|
||||
entry_points={
|
||||
'paste.app_factory': [
|
||||
|
@ -15,14 +15,18 @@
|
||||
|
||||
import os
|
||||
import stat
|
||||
import json
|
||||
import errno
|
||||
import random
|
||||
import logging
|
||||
from hashlib import md5
|
||||
from eventlet import sleep
|
||||
import cPickle as pickle
|
||||
from cStringIO import StringIO
|
||||
import pickletools
|
||||
from swiftonfile.swift.common.exceptions import SwiftOnFileSystemIOError
|
||||
from swift.common.exceptions import DiskFileNoSpace
|
||||
from swift.common.db import utf8encodekeys
|
||||
from swiftonfile.swift.common.fs_utils import do_stat, \
|
||||
do_walk, do_rmdir, do_log_rl, get_filename_from_fd, do_open, \
|
||||
do_getxattr, do_setxattr, do_removexattr, do_read, \
|
||||
@ -47,6 +51,8 @@ DEFAULT_GID = -1
|
||||
PICKLE_PROTOCOL = 2
|
||||
CHUNK_SIZE = 65536
|
||||
|
||||
read_pickled_metadata = False
|
||||
|
||||
|
||||
def normalize_timestamp(timestamp):
|
||||
"""
|
||||
@ -63,8 +69,41 @@ def normalize_timestamp(timestamp):
|
||||
return "%016.05f" % (float(timestamp))
|
||||
|
||||
|
||||
class SafeUnpickler(object):
|
||||
"""
|
||||
Loading a pickled stream is potentially unsafe and exploitable because
|
||||
the loading process can import modules/classes (via GLOBAL opcode) and
|
||||
run any callable (via REDUCE opcode). As the metadata stored in Swift
|
||||
is just a dictionary, we take away these powerful "features", thus
|
||||
making the loading process safe. Hence, this is very Swift specific
|
||||
and is not a general purpose safe unpickler.
|
||||
"""
|
||||
|
||||
__slots__ = 'OPCODE_BLACKLIST'
|
||||
OPCODE_BLACKLIST = ('GLOBAL', 'REDUCE', 'BUILD', 'OBJ', 'NEWOBJ', 'INST',
|
||||
'EXT1', 'EXT2', 'EXT4')
|
||||
|
||||
@classmethod
|
||||
def find_class(self, module, name):
|
||||
# Do not allow importing of ANY module. This is really redundant as
|
||||
# we block those OPCODEs that results in invocation of this method.
|
||||
raise pickle.UnpicklingError('Potentially unsafe pickle')
|
||||
|
||||
@classmethod
|
||||
def loads(self, string):
|
||||
for opcode in pickletools.genops(string):
|
||||
if opcode[0].name in self.OPCODE_BLACKLIST:
|
||||
raise pickle.UnpicklingError('Potentially unsafe pickle')
|
||||
orig_unpickler = pickle.Unpickler(StringIO(string))
|
||||
orig_unpickler.find_global = self.find_class
|
||||
return orig_unpickler.load()
|
||||
|
||||
|
||||
pickle.loads = SafeUnpickler.loads
|
||||
|
||||
|
||||
def serialize_metadata(metadata):
|
||||
return pickle.dumps(metadata, PICKLE_PROTOCOL)
|
||||
return json.dumps(metadata, separators=(',', ':'))
|
||||
|
||||
|
||||
def deserialize_metadata(metastr):
|
||||
@ -72,14 +111,24 @@ def deserialize_metadata(metastr):
|
||||
Returns dict populated with metadata if deserializing is successful.
|
||||
Returns empty dict if deserialzing fails.
|
||||
"""
|
||||
if metastr.startswith('\x80\x02}') and metastr.endswith('.'):
|
||||
# Assert that the metadata was indeed pickled before attempting
|
||||
# to unpickle. This only *reduces* risk of malicious shell code
|
||||
# being executed. However, it does NOT fix anything.
|
||||
global read_pickled_metadata
|
||||
|
||||
if metastr.startswith('\x80\x02}') and metastr.endswith('.') and \
|
||||
read_pickled_metadata:
|
||||
# Assert that the serialized metadata is pickled using
|
||||
# pickle protocol 2.
|
||||
try:
|
||||
return pickle.loads(metastr)
|
||||
except (pickle.UnpicklingError, EOFError, AttributeError,
|
||||
IndexError, ImportError, AssertionError):
|
||||
except Exception:
|
||||
logging.warning("pickle.loads() failed.", exc_info=True)
|
||||
return {}
|
||||
elif metastr.startswith('{') and metastr.endswith('}'):
|
||||
try:
|
||||
metadata = json.loads(metastr)
|
||||
utf8encodekeys(metadata)
|
||||
return metadata
|
||||
except (UnicodeDecodeError, ValueError):
|
||||
logging.warning("json.loads() failed.", exc_info=True)
|
||||
return {}
|
||||
else:
|
||||
return {}
|
||||
|
@ -16,7 +16,8 @@
|
||||
""" Object Server for Gluster for Swift """
|
||||
|
||||
from swift.common.swob import HTTPConflict, HTTPNotImplemented
|
||||
from swift.common.utils import public, timing_stats, replication
|
||||
from swift.common.utils import public, timing_stats, replication, \
|
||||
config_true_value
|
||||
from swift.common.request_helpers import get_name_and_placement
|
||||
from swiftonfile.swift.common.exceptions import AlreadyExistsAsFile, \
|
||||
AlreadyExistsAsDir
|
||||
@ -26,6 +27,7 @@ from swift.obj import server
|
||||
|
||||
from swiftonfile.swift.obj.diskfile import DiskFileManager
|
||||
from swiftonfile.swift.common.constraints import check_object_creation
|
||||
from swiftonfile.swift.common import utils
|
||||
|
||||
|
||||
class SwiftOnFileDiskFileRouter(object):
|
||||
@ -57,6 +59,10 @@ class ObjectController(server.ObjectController):
|
||||
"""
|
||||
# Replaces Swift's DiskFileRouter object reference with ours.
|
||||
self._diskfile_router = SwiftOnFileDiskFileRouter(conf, self.logger)
|
||||
# This conf option will be deprecated and eventualy removed in
|
||||
# future releases
|
||||
utils.read_pickled_metadata = \
|
||||
config_true_value(conf.get('read_pickled_metadata', 'no'))
|
||||
|
||||
@public
|
||||
@timing_stats()
|
||||
|
@ -1267,6 +1267,9 @@ class TestFile(Base):
|
||||
self.assert_status(400)
|
||||
|
||||
def testMetadataNumberLimit(self):
|
||||
raise SkipTest("Bad test")
|
||||
# TODO(ppai): Fix it in upstream swift first
|
||||
# Refer to comments below
|
||||
number_limit = load_constraint('max_meta_count')
|
||||
size_limit = load_constraint('max_meta_overall_size')
|
||||
|
||||
@ -1279,10 +1282,13 @@ class TestFile(Base):
|
||||
metadata = {}
|
||||
while len(metadata.keys()) < i:
|
||||
key = Utils.create_ascii_name()
|
||||
# The following line returns a valid utf8 byte sequence
|
||||
val = Utils.create_name()
|
||||
|
||||
if len(key) > j:
|
||||
key = key[:j]
|
||||
# This slicing done below can make the 'utf8' byte
|
||||
# sequence invalid and hence it cannot be decoded.
|
||||
val = val[:j]
|
||||
|
||||
size += len(key) + len(val)
|
||||
|
@ -16,6 +16,7 @@
|
||||
""" Tests for common.utils """
|
||||
|
||||
import os
|
||||
import json
|
||||
import unittest
|
||||
import errno
|
||||
import xattr
|
||||
@ -23,10 +24,12 @@ import cPickle as pickle
|
||||
import tempfile
|
||||
import hashlib
|
||||
import shutil
|
||||
import cPickle as pickle
|
||||
from collections import defaultdict
|
||||
from mock import patch, Mock
|
||||
from swiftonfile.swift.common import utils
|
||||
from swiftonfile.swift.common.utils import deserialize_metadata, serialize_metadata
|
||||
from swiftonfile.swift.common.utils import deserialize_metadata, \
|
||||
serialize_metadata, PICKLE_PROTOCOL
|
||||
from swiftonfile.swift.common.exceptions import SwiftOnFileSystemOSError, \
|
||||
SwiftOnFileSystemIOError
|
||||
from swift.common.exceptions import DiskFileNoSpace
|
||||
@ -138,6 +141,32 @@ def _mock_os_fsync(fd):
|
||||
return
|
||||
|
||||
|
||||
class TestSafeUnpickler(unittest.TestCase):
|
||||
|
||||
class Exploit(object):
|
||||
def __reduce__(self):
|
||||
return (os.system, ('touch /tmp/pickle-exploit',))
|
||||
|
||||
def test_loads(self):
|
||||
valid_md = {'key1': 'val1', 'key2': 'val2'}
|
||||
for protocol in (0, 1, 2):
|
||||
valid_dump = pickle.dumps(valid_md, protocol)
|
||||
mal_dump = pickle.dumps(self.Exploit(), protocol)
|
||||
# malicious dump is appended to valid dump
|
||||
payload1 = valid_dump[:-1] + mal_dump
|
||||
# malicious dump is prefixed to valid dump
|
||||
payload2 = mal_dump[:-1] + valid_dump
|
||||
# entire dump is malicious
|
||||
payload3 = mal_dump
|
||||
for payload in (payload1, payload2, payload3):
|
||||
try:
|
||||
utils.SafeUnpickler.loads(payload)
|
||||
except pickle.UnpicklingError as err:
|
||||
self.assertTrue('Potentially unsafe pickle' in err)
|
||||
else:
|
||||
self.fail("Expecting cPickle.UnpicklingError")
|
||||
|
||||
|
||||
class TestUtils(unittest.TestCase):
|
||||
""" Tests for common.utils """
|
||||
|
||||
@ -321,6 +350,42 @@ class TestUtils(unittest.TestCase):
|
||||
assert _xattr_op_cnt['get'] == 1, "%r" % _xattr_op_cnt
|
||||
assert _xattr_op_cnt['set'] == 0, "%r" % _xattr_op_cnt
|
||||
|
||||
def test_deserialize_metadata_pickle(self):
|
||||
orig_md = {'key1': 'value1', 'key2': 'value2'}
|
||||
pickled_md = pickle.dumps(orig_md, PICKLE_PROTOCOL)
|
||||
_m_pickle_loads = Mock(return_value={})
|
||||
utils.read_pickled_metadata = True
|
||||
with patch('swiftonfile.swift.common.utils.pickle.loads',
|
||||
_m_pickle_loads):
|
||||
# pickled
|
||||
md = utils.deserialize_metadata(pickled_md)
|
||||
self.assertTrue(_m_pickle_loads.called)
|
||||
self.assertTrue(isinstance(md, dict))
|
||||
_m_pickle_loads.reset_mock()
|
||||
# not pickled
|
||||
utils.deserialize_metadata("not_pickle")
|
||||
self.assertFalse(_m_pickle_loads.called)
|
||||
_m_pickle_loads.reset_mock()
|
||||
# pickled but conf does not allow loading
|
||||
utils.read_pickled_metadata = False
|
||||
md = utils.deserialize_metadata(pickled_md)
|
||||
self.assertFalse(_m_pickle_loads.called)
|
||||
|
||||
# malformed pickle
|
||||
_m_pickle_loads.side_effect = pickle.UnpicklingError
|
||||
md = utils.deserialize_metadata("malformed_pickle")
|
||||
self.assertTrue(isinstance(md, dict))
|
||||
|
||||
def test_deserialize_metadata_json(self):
|
||||
_m_json_loads = Mock(return_value={})
|
||||
with patch('swiftonfile.swift.common.utils.json.loads',
|
||||
_m_json_loads):
|
||||
utils.deserialize_metadata("not_json")
|
||||
self.assertFalse(_m_json_loads.called)
|
||||
_m_json_loads.reset_mock()
|
||||
utils.deserialize_metadata("{fake_valid_json}")
|
||||
self.assertTrue(_m_json_loads.called)
|
||||
|
||||
def test_get_etag_empty(self):
|
||||
tf = tempfile.NamedTemporaryFile()
|
||||
hd = utils._get_etag(tf.name)
|
||||
|
Loading…
Reference in New Issue
Block a user