Create manifest and implement sqlite driver
Use class inheritance for driver in case we need something other than sqlite as time goes on. SQLite is only 10% larger than the custom format used with osdk. When compressed the SQLite manifest only fractionally larger than osdk. The time it takes to generate and process is about 2x longer. However, the benefit we get from using common contructs (sqlite) and tracking and changing the manifest schema is enough for me to think this is right way to proceed. To generate a manifest with 2^24 objects (64TB disk) run: tools/generate_manifest.py --backupsize 64000 --manifest /pathtosave/manifest This will generate a 809MB file Co-Authored-By: Sam Yaple <sam@yaple.net> Change-Id: Ic431bfa52b6fcaeb1c6a64cf270cbb36c496335e
This commit is contained in:
parent
7b8e49f3d8
commit
a9d13fc32f
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@
|
||||
.testrepository/
|
||||
ekko.egg-info/
|
||||
*.pyc
|
||||
*.swp
|
||||
|
190
ekko/manifest.py
190
ekko/manifest.py
@ -1,190 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright 2016 Sam Yaple
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Copied and licensed from https://github.com/SamYaple/osdk
|
||||
|
||||
from binascii import crc32
|
||||
from collections import namedtuple
|
||||
from datetime import datetime
|
||||
from struct import pack
|
||||
from struct import unpack
|
||||
from uuid import UUID
|
||||
|
||||
import six
|
||||
|
||||
SIGNATURE = 'd326503ab5ca49adac56c89eb0b8ef08d326503ab5ca49adac56c89eb0b8ef08'
|
||||
|
||||
|
||||
class EkkoShortReadError(Exception):
|
||||
|
||||
def __init__(self, size_read, size_requested):
|
||||
self.size_read = size_read
|
||||
self.size_requested = size_requested
|
||||
|
||||
|
||||
class EkkoManifestTooNewError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class EkkoChecksumError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class EkkoInvalidSignatureError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Manifest(object):
|
||||
|
||||
def __init__(self, manifest):
|
||||
self.manifest = manifest
|
||||
self.metadata = {'version': 0}
|
||||
|
||||
def write_manifest(self):
|
||||
with open(self.manifest, 'wb', 1) as f:
|
||||
self.write_header(f)
|
||||
self.write_body(f)
|
||||
|
||||
def build_header(self):
|
||||
data = pack(
|
||||
'<i2IQH14s',
|
||||
utctimestamp(),
|
||||
self.metadata['info'].incremental,
|
||||
self.metadata['info'].segment_size,
|
||||
self.metadata['info'].sectors,
|
||||
len(self.metadata['bases']),
|
||||
str.encode('\0\0' * 14)
|
||||
)
|
||||
|
||||
checksum = crc32(data)
|
||||
|
||||
for i in self.metadata['bases']:
|
||||
data += i
|
||||
checksum = crc32(i, checksum)
|
||||
|
||||
return data, checksum
|
||||
|
||||
def write_body(self, f):
|
||||
checksum = 0
|
||||
|
||||
for k, v in six.iteritems(self.segments):
|
||||
data = pack(
|
||||
'<IHI2B20s',
|
||||
k,
|
||||
v.base,
|
||||
v.incremental,
|
||||
v.compression,
|
||||
v.encryption,
|
||||
self.hashes[k]
|
||||
)
|
||||
|
||||
f.write(data)
|
||||
checksum = crc32(data, checksum)
|
||||
|
||||
# Backfill the body_checksum
|
||||
f.seek(24, 0)
|
||||
f.write(pack('<i', checksum))
|
||||
|
||||
def write_header(self, f):
|
||||
data, checksum = self.build_header()
|
||||
|
||||
def read_data(self, f, size_requested):
|
||||
data = f.read(size_requested)
|
||||
size_read = len(data)
|
||||
if size_read != size_requested:
|
||||
raise EkkoShortReadError(
|
||||
'Failed to read amount of requested data',
|
||||
size_read,
|
||||
size_requested
|
||||
)
|
||||
self.checksum = crc32(data)
|
||||
return data
|
||||
|
||||
def read_signature(self, f):
|
||||
if not UUID(SIGNATURE).bytes == self.read_data(f, 32):
|
||||
raise EkkoInvalidSignatureError('File signiture is not valid')
|
||||
|
||||
def read_header(self, f):
|
||||
self.checksum = 0
|
||||
Info = namedtuple(
|
||||
'Info',
|
||||
'timestamp incremental segment_size sectors'
|
||||
)
|
||||
|
||||
self.read_signature(f)
|
||||
|
||||
version, header_checksum, body_checksum = unpack(
|
||||
'<I2i', self.read_data(f, 12)
|
||||
)
|
||||
|
||||
if self.metadata['version'] < version:
|
||||
raise EkkoManifestTooNewError(
|
||||
'The manifest version is newer than I know how to read'
|
||||
)
|
||||
|
||||
self.metadata['info'] = Info._make(
|
||||
unpack('<i2IQ', self.read_data(f, 20))
|
||||
)
|
||||
|
||||
num_of_bases, _ = unpack('<H14s', self.read_data(f, 16))
|
||||
|
||||
self.metadata['bases'] = [
|
||||
self.read_data(f, 16) for x in six.moves.range(0, num_of_bases)
|
||||
]
|
||||
|
||||
if self.checksum != header_checksum:
|
||||
raise EkkoChecksumError('Header checksum does not match')
|
||||
|
||||
return body_checksum
|
||||
|
||||
def read_body(self, f, body_checksum):
|
||||
self.checksum = 0
|
||||
self.segments = dict()
|
||||
self.hashes = dict()
|
||||
Segment = namedtuple(
|
||||
'Segment',
|
||||
'base incremental compression encryption'
|
||||
)
|
||||
|
||||
try:
|
||||
while True:
|
||||
processing_segment = True
|
||||
|
||||
segment, base = unpack('<IH', self.read_data(f, 6))
|
||||
|
||||
self.segments[segment] = Segment(
|
||||
self.metadata['bases'][base],
|
||||
unpack('<I2B', self.read_data(f, 6))
|
||||
)
|
||||
|
||||
self.hashes[segment] = unpack('<20s', self.read_data(f, 20))
|
||||
|
||||
processing_segment = False
|
||||
except EkkoShortReadError as e:
|
||||
if processing_segment or e.size_of_read != 0:
|
||||
raise
|
||||
|
||||
if self.checksum != body_checksum:
|
||||
raise EkkoChecksumError('Body checksum does not match')
|
||||
|
||||
def read_manifest(self):
|
||||
with open(self.manifest, 'rb', 1) as f:
|
||||
self.read_body(f, self.read_header(f))
|
||||
|
||||
|
||||
def utctimestamp():
|
||||
ts = datetime.utcnow() - datetime(1970, 1, 1)
|
||||
return ts.seconds + ts.days * 24 * 3600
|
0
ekko/manifest/__init__.py
Normal file
0
ekko/manifest/__init__.py
Normal file
41
ekko/manifest/driver.py
Normal file
41
ekko/manifest/driver.py
Normal file
@ -0,0 +1,41 @@
|
||||
# Copyright 2016 Intel corporation
|
||||
# Copyright 2016 Sam Yaple
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from oslo_utils import importutils
|
||||
|
||||
|
||||
def load_manifest_driver(manifest_location, manifest_driver=None):
|
||||
if not manifest_driver:
|
||||
manifest_driver = 'sqlite.SQLiteDriver'
|
||||
|
||||
return importutils.import_object_ns('ekko.manifest',
|
||||
manifest_driver,
|
||||
manifest_location)
|
||||
|
||||
|
||||
class ManifestDriver(object):
|
||||
"""Base class for manifest drivers
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, manifest_file):
|
||||
self.conn = None
|
||||
self.manifest_file = manifest_file
|
||||
|
||||
def put_metadata(self, metadata):
|
||||
raise NotImplementedError()
|
||||
|
||||
def put_segments(self, segments):
|
||||
raise NotImplementedError()
|
86
ekko/manifest/sqlite.py
Normal file
86
ekko/manifest/sqlite.py
Normal file
@ -0,0 +1,86 @@
|
||||
# Copyright 2016 Intel corporation
|
||||
# Copyright 2016 Sam Yaple
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from contextlib import closing
|
||||
from contextlib import contextmanager
|
||||
import sqlite3
|
||||
|
||||
from ekko.manifest import driver
|
||||
|
||||
|
||||
class SQLiteDriver(driver.ManifestDriver):
|
||||
|
||||
def initialize(self):
|
||||
with self.get_conn() as conn:
|
||||
with closing(conn.cursor()) as cur:
|
||||
cur.executescript("""
|
||||
CREATE TABLE metadata (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT
|
||||
);
|
||||
CREATE TABLE segments (
|
||||
backupset_id BLOB,
|
||||
incremental INTEGER,
|
||||
segment INTEGER PRIMARY KEY,
|
||||
compression TINYINT,
|
||||
encryption TINYINT,
|
||||
segment_hash BLOB
|
||||
);
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
@contextmanager
|
||||
def get_conn(self):
|
||||
if not self.conn:
|
||||
self.conn = sqlite3.connect(self.manifest_file)
|
||||
|
||||
conn = self.conn
|
||||
self.conn = None
|
||||
|
||||
yield conn
|
||||
conn.rollback()
|
||||
self.conn = conn
|
||||
|
||||
def put_segments(self, segments):
|
||||
with self.get_conn() as conn:
|
||||
with closing(conn.cursor()) as cur:
|
||||
for segment in segments:
|
||||
cur.execute(
|
||||
"INSERT INTO segments VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
buffer(segment.backupset_id),
|
||||
segment.incremental,
|
||||
segment.segment,
|
||||
segment.compression,
|
||||
segment.encryption,
|
||||
buffer(segment.segment_hash)
|
||||
)
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def put_metadata(self, metadata):
|
||||
with self.get_conn() as conn:
|
||||
with closing(conn.cursor()) as cur:
|
||||
cur.executemany(
|
||||
"INSERT OR REPLACE INTO metadata VALUES (?, ?)",
|
||||
[
|
||||
('incremental', metadata.incremental),
|
||||
('segment_size', metadata.segment_size),
|
||||
('sectors', metadata.sectors),
|
||||
('timestamp', metadata.timestamp)
|
||||
]
|
||||
)
|
||||
|
||||
conn.commit()
|
41
ekko/manifest/structure.py
Normal file
41
ekko/manifest/structure.py
Normal file
@ -0,0 +1,41 @@
|
||||
# Copyright 2016 Sam Yaple
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import time
|
||||
from uuid import uuid4 as uuid
|
||||
|
||||
|
||||
class Metadata(object):
|
||||
|
||||
def __init__(self, incremental, sectors, segment_size=None,
|
||||
timestamp=None, backupset_id=None):
|
||||
self.timestamp = timestamp if timestamp else time.time()
|
||||
self.sectors = sectors
|
||||
self.incremental = incremental
|
||||
self.segment_size = 4 * 1024 ** 2 # 4MiB
|
||||
self.backupset_id = backupset_id if backupset_id else uuid().bytes
|
||||
|
||||
|
||||
class Segment(object):
|
||||
__slots__ = ['backupset_id', 'incremental', 'segment',
|
||||
'compression', 'encryption', 'segment_hash']
|
||||
|
||||
def __init__(self, backupset_id, incremental, segment,
|
||||
compression, encryption, segment_hash):
|
||||
self.backupset_id = backupset_id
|
||||
self.incremental = incremental
|
||||
self.segment = segment
|
||||
self.compression = compression
|
||||
self.encryption = encryption
|
||||
self.segment_hash = segment_hash
|
@ -4,3 +4,4 @@
|
||||
|
||||
pbr>=1.6
|
||||
six>=1.9.0
|
||||
oslo.utils>=3.2.0 # Apache-2.0
|
||||
|
@ -1,112 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copyright 2016 Sam Yaple
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Copied and licensed from https://github.com/SamYaple/osdk
|
||||
|
||||
|
||||
import argparse
|
||||
from collections import namedtuple
|
||||
# from hashlib import sha1
|
||||
import os
|
||||
import sys
|
||||
from uuid import uuid4 as uuid
|
||||
|
||||
from ekko import manifest
|
||||
from six.moves import range
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Backup Block Device')
|
||||
parser.add_argument('--backupsize', required=True, type=int,
|
||||
help='Size of backup for manifest gen (size in GB)')
|
||||
parser.add_argument('--manifest', required=True,
|
||||
help='manifest file')
|
||||
parser.add_argument('--cbt', required=False,
|
||||
help='change block tracking info')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def read_segments(segments, size, backup):
|
||||
backup.segments = dict()
|
||||
backup.hashes = dict()
|
||||
Segment = namedtuple(
|
||||
'Segment',
|
||||
'base incremental compression encryption'
|
||||
)
|
||||
|
||||
for segment in segments:
|
||||
# Generate manifest info for each object in backup
|
||||
backup.segments[segment] = Segment(
|
||||
len(backup.metadata['bases']) - 1,
|
||||
backup.metadata['info'].incremental,
|
||||
0,
|
||||
0
|
||||
)
|
||||
# Random string simulating hash sha
|
||||
backup.hashes[segment] = os.urandom(20)
|
||||
|
||||
|
||||
def generate_mem_struct(segments, size, backup):
|
||||
b = {
|
||||
'96153320-980b-4b5e-958f-ea57812b280d': []
|
||||
}
|
||||
|
||||
for seg in segments:
|
||||
b['96153320-980b-4b5e-958f-ea57812b280d'].append({
|
||||
seg: backup.metadata['info'].incremental
|
||||
})
|
||||
|
||||
return b
|
||||
|
||||
|
||||
def check_manifest(manifest_file):
|
||||
return os.path.isfile(manifest_file)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
segment_size = 4 * 1024**2 # 4MiB
|
||||
size_of_disk = args.backupsize * 1024**3 # Convert GB to B
|
||||
num_of_sectors = int(size_of_disk / 512)
|
||||
num_of_segments = int(size_of_disk / segment_size)
|
||||
incremental = 0
|
||||
|
||||
Info = namedtuple(
|
||||
'Info',
|
||||
'timestamp incremental segment_size sectors'
|
||||
)
|
||||
|
||||
if check_manifest(args.manifest):
|
||||
print('manifest exists; exiting')
|
||||
return
|
||||
|
||||
backup = manifest.Manifest(args.manifest)
|
||||
|
||||
backup.metadata['info'] = Info(
|
||||
manifest.utctimestamp(),
|
||||
incremental,
|
||||
segment_size,
|
||||
num_of_sectors,
|
||||
)
|
||||
|
||||
backup.metadata['bases'] = [uuid().bytes]
|
||||
|
||||
# read_segments(range(0, num_of_segments - 1), segment_size, backup)
|
||||
generate_mem_struct(range(0, num_of_segments - 1), segment_size, backup)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -18,14 +18,12 @@
|
||||
|
||||
|
||||
import argparse
|
||||
from collections import namedtuple
|
||||
# from hashlib import sha1
|
||||
import os
|
||||
import sys
|
||||
from uuid import uuid4 as uuid
|
||||
|
||||
sys.path.insert(0, '/root/ekko/')
|
||||
from ekko import manifest
|
||||
from ekko.manifest import driver as manifest_driver
|
||||
from ekko.manifest import structure as manifest_structure
|
||||
from six.moves import range
|
||||
|
||||
|
||||
@ -40,24 +38,16 @@ def parse_args():
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def read_segments(segments, size, backup):
|
||||
backup.segments = dict()
|
||||
backup.hashes = dict()
|
||||
Segment = namedtuple(
|
||||
'Segment',
|
||||
'base incremental compression encryption'
|
||||
)
|
||||
|
||||
def read_segments(segments, metadata):
|
||||
for segment in segments:
|
||||
# Generate manifest info for each object in backup
|
||||
backup.segments[segment] = Segment(
|
||||
len(backup.metadata['bases']) - 1,
|
||||
backup.metadata['info'].incremental,
|
||||
yield manifest_structure.Segment(
|
||||
metadata.backupset_id,
|
||||
metadata.incremental,
|
||||
segment,
|
||||
0,
|
||||
0
|
||||
0,
|
||||
os.urandom(20)
|
||||
)
|
||||
# Random string simulating hash sha
|
||||
backup.hashes[segment] = os.urandom(20)
|
||||
|
||||
|
||||
def check_manifest(manifest_file):
|
||||
@ -66,35 +56,24 @@ def check_manifest(manifest_file):
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
segment_size = 4 * 1024**2 # 4MiB
|
||||
size_of_disk = args.backupsize * 1024**3 # Convert GB to B
|
||||
num_of_sectors = int(size_of_disk / 512)
|
||||
num_of_segments = int(size_of_disk / segment_size)
|
||||
incremental = 0
|
||||
|
||||
Info = namedtuple(
|
||||
'Info',
|
||||
'timestamp incremental segment_size sectors'
|
||||
)
|
||||
|
||||
if check_manifest(args.manifest):
|
||||
print('manifest exists; exiting')
|
||||
return
|
||||
|
||||
backup = manifest.Manifest(args.manifest)
|
||||
manifest = manifest_driver.load_manifest_driver(args.manifest)
|
||||
|
||||
backup.metadata['info'] = Info(
|
||||
manifest.utctimestamp(),
|
||||
incremental,
|
||||
segment_size,
|
||||
num_of_sectors,
|
||||
)
|
||||
size_of_disk = args.backupsize * 1024**3 # Convert GB to B
|
||||
num_of_sectors = int(size_of_disk / 512)
|
||||
incremental = 0
|
||||
metadata = manifest_structure.Metadata(incremental, sectors=num_of_sectors)
|
||||
|
||||
backup.metadata['bases'] = [uuid().bytes]
|
||||
manifest.initialize()
|
||||
manifest.put_metadata(metadata)
|
||||
|
||||
read_segments(range(0, num_of_segments - 1), segment_size, backup)
|
||||
num_of_segments = int(size_of_disk / metadata.segment_size)
|
||||
segments = read_segments(range(0, num_of_segments - 1), metadata)
|
||||
|
||||
backup.write_manifest()
|
||||
manifest.put_segments(segments)
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
Loading…
Reference in New Issue
Block a user