diff --git a/doc/source/operation.rst b/doc/source/operation.rst index 3d3ac1ca0..ccd7d53fc 100644 --- a/doc/source/operation.rst +++ b/doc/source/operation.rst @@ -202,6 +202,31 @@ alien-image-list .. program-output:: nodepool alien-image-list --help :nostderr: +Image builds and uploads can take a lot of time, so there is a pair of +commands to export and import the image build and upload metadata from +Nodepool's internal storage in ZooKeeper. These can be used to backup +and restore data in case the ZooKeeper cluster is lost. Note that +these commands do not save or restore the actual image data, only the +records in ZooKeeper. If the data are important, consider backing +them up as well. Even without the local image builds, restoring the +image metadata will allow nodepool-launcher to continue to operate +while new builds are created. + +These commands do not export or import any node information. It is +expected that any existing nodes will be detected as leaked and +automatically deleted if the ZooKeeper storage is reset. + +export-image-data +^^^^^^^^^^^^^^^^^ +.. program-output:: nodepool export-image-data --help + :nostderr: + +import-image-data +^^^^^^^^^^^^^^^^^ +.. program-output:: nodepool import-image-data --help + :nostderr: + + Removing a Provider ------------------- diff --git a/nodepool/cmd/nodepoolcmd.py b/nodepool/cmd/nodepoolcmd.py index 459ae7def..457725363 100644 --- a/nodepool/cmd/nodepoolcmd.py +++ b/nodepool/cmd/nodepoolcmd.py @@ -12,7 +12,9 @@ # License for the specific language governing permissions and limitations # under the License. +import json import logging.config +import os from prettytable import PrettyTable @@ -144,6 +146,24 @@ class NodePoolCmd(NodepoolApp): cmd_image_unpause.set_defaults(func=self.image_unpause) cmd_image_unpause.add_argument('image', help='image name') + cmd_export_image_data = subparsers.add_parser( + 'export-image-data', + help='Export image data from ZooKeeper') + cmd_export_image_data.add_argument( + 'path', + type=str, + help='Export file path') + cmd_export_image_data.set_defaults(func=self.export_image_data) + + cmd_import_image_data = subparsers.add_parser( + 'import-image-data', + help='Import image data to ZooKeeper') + cmd_import_image_data.add_argument( + 'path', + type=str, + help='Import file path') + cmd_import_image_data.set_defaults(func=self.import_image_data) + return parser def setup_logging(self): @@ -368,6 +388,16 @@ class NodePoolCmd(NodepoolApp): image_name = self.args.image self.zk.setImagePaused(image_name, False) + def export_image_data(self): + data = self.zk.exportImageData() + with open(os.open(self.args.path, + os.O_CREAT | os.O_WRONLY, 0o600), 'w') as f: + json.dump(data, f) + + def import_image_data(self): + with open(self.args.path, 'r') as f: + self.zk.importImageData(json.load(f)) + def _wait_for_threads(self, threads): for t in threads: if t: @@ -393,7 +423,8 @@ class NodePoolCmd(NodepoolApp): 'image-delete', 'alien-image-list', 'list', 'delete', 'request-list', 'info', 'erase', - 'image-pause', 'image-unpause'): + 'image-pause', 'image-unpause', + 'export-image-data', 'import-image-data'): self.zk = zk.ZooKeeper(enable_cache=False) self.zk.connect( list(config.zookeeper_servers.values()), diff --git a/nodepool/tests/__init__.py b/nodepool/tests/__init__.py index 605ca5135..6a735781a 100644 --- a/nodepool/tests/__init__.py +++ b/nodepool/tests/__init__.py @@ -655,6 +655,17 @@ class DBTestCase(BaseTestCase): for child in self.zk.client.get_children(node): self.printZKTree(join(node, child)) + def getZKTree(self, path, ret=None): + """Return the contents of a ZK tree as a dictionary""" + if ret is None: + ret = {} + for key in self.zk.client.get_children(path): + subpath = os.path.join(path, key) + ret[subpath] = self.zk.client.get( + os.path.join(path, key))[0] + self.getZKTree(subpath, ret) + return ret + class IntegrationTestCase(DBTestCase): def setUpFakes(self): diff --git a/nodepool/tests/unit/test_commands.py b/nodepool/tests/unit/test_commands.py index ad79f4335..cbddfcc67 100644 --- a/nodepool/tests/unit/test_commands.py +++ b/nodepool/tests/unit/test_commands.py @@ -16,6 +16,7 @@ import logging import os.path import sys # noqa making sure its available for monkey patching +import tempfile import fixtures import mock @@ -399,3 +400,37 @@ class TestNodepoolCMD(tests.DBTestCase): nodes = self.waitForNodes('fake-label') self.assertEqual(1, len(nodes)) self.assertEqual(p1_nodes[0], nodes[0]) + + def test_export_image_data(self): + configfile = self.setup_config('node.yaml') + builder = self.useBuilder(configfile) + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + self.waitForImage('fake-provider', 'fake-image') + self.waitForNodes('fake-label') + + pool.stop() + for worker in builder._upload_workers: + worker.shutdown() + worker.join() + builder.stop() + # Save a copy of the data in ZK + old_data = self.getZKTree('/nodepool/images') + # We aren't backing up the lock data + old_data.pop('/nodepool/images/fake-image/builds/0000000001' + '/providers/fake-provider/images/lock') + old_data.pop('/nodepool/images/fake-image/builds/lock') + + with tempfile.NamedTemporaryFile() as tf: + self.patch_argv( + "-c", configfile, 'export-image-data', tf.name) + nodepoolcmd.main() + # Delete data from ZK + self.zk.client.delete('/nodepool', recursive=True) + + self.patch_argv( + "-c", configfile, 'import-image-data', tf.name) + nodepoolcmd.main() + + new_data = self.getZKTree('/nodepool/images') + self.assertEqual(new_data, old_data) diff --git a/nodepool/zk.py b/nodepool/zk.py index 965189ce8..a5fe45388 100644 --- a/nodepool/zk.py +++ b/nodepool/zk.py @@ -2479,3 +2479,46 @@ class ZooKeeper(object): def getStatsElection(self, identifier): path = self._electionPath('stats') return Election(self.client, path, identifier) + + def exportImageData(self): + ''' + Export the DIB image and upload data from ZK for backup purposes. + ''' + ret = {} + for image_name in self.getImageNames(): + paused = self.getImagePaused(image_name) + if paused: + paused_path = self._imagePausePath(image_name) + ret[paused_path] = '' + for build_no in self.getBuildNumbers(image_name): + build_path = self._imageBuildsPath(image_name) + "/" + build_no + try: + build_data, stat = self.client.get(build_path) + except kze.NoNodeError: + continue + ret[build_path] = build_data.decode('utf8') + for provider_name in self.getBuildProviders(image_name, + build_no): + for upload_no in self.getImageUploadNumbers( + image_name, build_no, provider_name): + upload_path = self._imageUploadPath( + image_name, build_no, provider_name) + "/" + upload_path += upload_no + try: + upload_data, stat = self.client.get(upload_path) + except kze.NoNodeError: + continue + ret[upload_path] = upload_data.decode('utf8') + return ret + + def importImageData(self, import_data): + '''Import the DIB image and upload data to ZK. + + This makes no guarantees about locking; it is expected to be + run on a quiescent system with no daemons running. + + ''' + for path, data in import_data.items(): + self.client.create(path, + value=data.encode('utf8'), + makepath=True) diff --git a/releasenotes/notes/export-image-data-3c46d79d5b5fd5bd.yaml b/releasenotes/notes/export-image-data-3c46d79d5b5fd5bd.yaml new file mode 100644 index 000000000..536b348dd --- /dev/null +++ b/releasenotes/notes/export-image-data-3c46d79d5b5fd5bd.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + Two new nodepool commands, `nodepool export-image-data` and + `nodepool import-image-data` have been added to back up the image + data in ZooKeeper to a file in case the ZooKeeper cluster is lost.