diff --git a/doc/source/log-roles.rst b/doc/source/log-roles.rst index 6653b869f..908da080c 100644 --- a/doc/source/log-roles.rst +++ b/doc/source/log-roles.rst @@ -14,4 +14,5 @@ Log Roles .. zuul:autorole:: set-zuul-log-path-fact .. zuul:autorole:: upload-logs .. zuul:autorole:: upload-logs-gcs +.. zuul:autorole:: upload-logs-s3 .. zuul:autorole:: upload-logs-swift diff --git a/roles/upload-logs-s3/README.rst b/roles/upload-logs-s3/README.rst new file mode 100644 index 000000000..f49f2f50d --- /dev/null +++ b/roles/upload-logs-s3/README.rst @@ -0,0 +1,66 @@ +Upload logs to S3 + +Before using this role, create at least one bucket and set up +appropriate access controls or lifecycle events. This role will not +automatically create buckets. + +This role requires the ``boto3`` Python package to be +installed in the Ansible environment on the Zuul executor. + +**Role Variables** + +.. zuul:rolevar:: zuul_site_upload_logs + :default: true + + Controls when logs are uploaded. true, the default, means always + upload logs. false means never upload logs. 'failure' means to only + upload logs when the job has failed. + + .. note:: Intended to be set by admins via site-variables. + +.. zuul:rolevar:: zuul_log_partition + :default: false + + If set to true, then the first component of the log path will be + removed from the object name and added to the bucket name, so that + logs for different changes are distributed across a large number of + buckets. + +.. zuul:rolevar:: zuul_log_bucket + + This role *will not* create buckets which do not already exist. If + partitioning is not enabled, this is the name of the bucket which + will be used. If partitioning is enabled, then this will be used + as the prefix for the bucket name which will be separated from the + partition name by an underscore. For example, "logs_42" would be + the bucket name for partition 42. + + Note that you will want to set this to a value that uniquely + identifies your Zuul installation. + +.. zuul:rolevar:: zuul_log_path + :default: Generated by the role `set-zuul-log-path-fact` + + Prepend this path to the object names when uploading. + +.. zuul:rolevar:: zuul_log_create_indexes + :default: true + + Whether to create `index.html` files with directory indexes. + +.. zuul:rolevar:: zuul_log_path_shard_build + :default: false + + This var is consumed by set-zuul-log-path-fact which + upload-logs-s3 calls into. If you set this you will get log paths + prefixed with the first three characters of the build uuid. This + will improve log file sharding. + + More details can be found at + :zuul:rolevar:`set-zuul-log-path-fact.zuul_log_path_shard_build`. + + +.. zuul:rolevar:: upload_logs_s3_endpoint + + The endpoint to use when uploading logs to an s3 compatible service. + By default this will be automatically constructed by boto but should be set when working with non-aws hosted s3 service. diff --git a/roles/upload-logs-s3/__init__.py b/roles/upload-logs-s3/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/defaults/main.yaml b/roles/upload-logs-s3/defaults/main.yaml new file mode 100644 index 000000000..65ec9198b --- /dev/null +++ b/roles/upload-logs-s3/defaults/main.yaml @@ -0,0 +1,3 @@ +zuul_log_partition: false +zuul_log_create_indexes: true + diff --git a/roles/upload-logs-s3/library/__init__.py b/roles/upload-logs-s3/library/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tar.gz b/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tar.gz new file mode 100644 index 000000000..9b1579d90 Binary files /dev/null and b/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tar.gz differ diff --git a/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tgz b/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tgz new file mode 100644 index 000000000..ca9fccb99 Binary files /dev/null and b/roles/upload-logs-s3/library/test-fixtures/artifacts/foo.tgz differ diff --git a/roles/upload-logs-s3/library/test-fixtures/auth.json b/roles/upload-logs-s3/library/test-fixtures/auth.json new file mode 100644 index 000000000..8df1606b6 --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/auth.json @@ -0,0 +1 @@ +{"access_token": "something", "expires_in": 3599, "token_type": "Bearer"} diff --git a/roles/upload-logs-s3/library/test-fixtures/links/controller/service_log.txt b/roles/upload-logs-s3/library/test-fixtures/links/controller/service_log.txt new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/links/job-output.json b/roles/upload-logs-s3/library/test-fixtures/links/job-output.json new file mode 100644 index 000000000..c8cd7e92d --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/links/job-output.json @@ -0,0 +1 @@ +{"test": "foo"} diff --git a/roles/upload-logs-s3/library/test-fixtures/links/symlink_loop/placeholder b/roles/upload-logs-s3/library/test-fixtures/links/symlink_loop/placeholder new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/compressed.gz b/roles/upload-logs-s3/library/test-fixtures/logs/controller/compressed.gz new file mode 100644 index 000000000..4dc3bad66 Binary files /dev/null and b/roles/upload-logs-s3/library/test-fixtures/logs/controller/compressed.gz differ diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/cpu-load.svg b/roles/upload-logs-s3/library/test-fixtures/logs/controller/cpu-load.svg new file mode 100644 index 000000000..01a940a25 --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/logs/controller/cpu-load.svg @@ -0,0 +1,3 @@ + + diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/journal.xz b/roles/upload-logs-s3/library/test-fixtures/logs/controller/journal.xz new file mode 100644 index 000000000..ea28d9e05 Binary files /dev/null and b/roles/upload-logs-s3/library/test-fixtures/logs/controller/journal.xz differ diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/service_log.txt b/roles/upload-logs-s3/library/test-fixtures/logs/controller/service_log.txt new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/foo::3.txt b/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/foo::3.txt new file mode 100644 index 000000000..384ce7d7f --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/foo::3.txt @@ -0,0 +1,2 @@ +This is a plan text file with a funny name. +The index links should escape the :'s. diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/subdir.txt b/roles/upload-logs-s3/library/test-fixtures/logs/controller/subdir/subdir.txt new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/controller/syslog b/roles/upload-logs-s3/library/test-fixtures/logs/controller/syslog new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/job-output.json b/roles/upload-logs-s3/library/test-fixtures/logs/job-output.json new file mode 100644 index 000000000..c8cd7e92d --- /dev/null +++ b/roles/upload-logs-s3/library/test-fixtures/logs/job-output.json @@ -0,0 +1 @@ +{"test": "foo"} diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/inventory.yaml b/roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/inventory.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/zuul-info.controller.txt b/roles/upload-logs-s3/library/test-fixtures/logs/zuul-info/zuul-info.controller.txt new file mode 100644 index 000000000..e69de29bb diff --git a/roles/upload-logs-s3/library/test_zuul_s3_upload.py b/roles/upload-logs-s3/library/test_zuul_s3_upload.py new file mode 100644 index 000000000..4a4075bbb --- /dev/null +++ b/roles/upload-logs-s3/library/test_zuul_s3_upload.py @@ -0,0 +1,393 @@ +# See the License for the specific language governing permissions and +# limitations under the License. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import os +import testtools +import time +import stat +import fixtures + +from bs4 import BeautifulSoup +from .zuul_s3_upload import FileList, Indexer, FileDetail + + +FIXTURE_DIR = os.path.join(os.path.dirname(__file__), + 'test-fixtures') + + +class SymlinkFixture(fixtures.Fixture): + links = [ + ('bad_symlink', '/etc'), + ('bad_symlink_file', '/etc/issue'), + ('good_symlink', 'controller'), + ('recursive_symlink', '.'), + ('symlink_file', 'job-output.json'), + ('symlink_loop_a', 'symlink_loop'), + ('symlink_loop/symlink_loop_b', '..'), + ] + + def _setUp(self): + for (src, target) in self.links: + path = os.path.join(FIXTURE_DIR, 'links', src) + os.symlink(target, path) + self.addCleanup(os.unlink, path) + + +class TestFileList(testtools.TestCase): + + def assert_files(self, result, files): + self.assertEqual(len(result), len(files)) + for expected, received in zip(files, result): + self.assertEqual(expected[0], received.relative_path) + if expected[0] and expected[0][-1] == '/': + efilename = os.path.split( + os.path.dirname(expected[0]))[1] + '/' + else: + efilename = os.path.split(expected[0])[1] + self.assertEqual(efilename, received.filename) + if received.folder: + if received.full_path is not None and expected[0] != '': + self.assertTrue(os.path.isdir(received.full_path)) + else: + self.assertTrue(os.path.isfile(received.full_path)) + self.assertEqual(expected[1], received.mimetype) + self.assertEqual(expected[2], received.encoding) + + def find_file(self, file_list, path): + for f in file_list: + if f.relative_path == path: + return f + + def test_single_dir_trailing_slash(self): + '''Test a single directory with a trailing slash''' + + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs/')) + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('zuul-info', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('controller/subdir', 'application/directory', None), + ('controller/compressed.gz', 'text/plain', 'gzip'), + ('controller/cpu-load.svg', 'image/svg+xml', None), + ('controller/journal.xz', 'text/plain', 'xz'), + ('controller/service_log.txt', 'text/plain', None), + ('controller/syslog', 'text/plain', None), + ('controller/subdir/foo::3.txt', 'text/plain', None), + ('controller/subdir/subdir.txt', 'text/plain', None), + ('zuul-info/inventory.yaml', 'text/plain', None), + ('zuul-info/zuul-info.controller.txt', 'text/plain', None), + ]) + + def test_single_dir(self): + '''Test a single directory without a trailing slash''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs')) + self.assert_files(fl, [ + ('', 'application/directory', None), + ('logs', 'application/directory', None), + ('logs/controller', 'application/directory', None), + ('logs/zuul-info', 'application/directory', None), + ('logs/job-output.json', 'application/json', None), + ('logs/controller/subdir', 'application/directory', None), + ('logs/controller/compressed.gz', 'text/plain', 'gzip'), + ('logs/controller/cpu-load.svg', 'image/svg+xml', None), + ('logs/controller/journal.xz', 'text/plain', 'xz'), + ('logs/controller/service_log.txt', 'text/plain', None), + ('logs/controller/syslog', 'text/plain', None), + ('logs/controller/subdir/foo::3.txt', 'text/plain', None), + ('logs/controller/subdir/subdir.txt', 'text/plain', None), + ('logs/zuul-info/inventory.yaml', 'text/plain', None), + ('logs/zuul-info/zuul-info.controller.txt', + 'text/plain', None), + ]) + + def test_single_file(self): + '''Test a single file''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, + 'logs/zuul-info/inventory.yaml')) + self.assert_files(fl, [ + ('', 'application/directory', None), + ('inventory.yaml', 'text/plain', None), + ]) + + def test_symlinks(self): + '''Test symlinks''' + with FileList() as fl: + self.useFixture(SymlinkFixture()) + fl.add(os.path.join(FIXTURE_DIR, 'links/')) + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('good_symlink', 'application/directory', None), + ('recursive_symlink', 'application/directory', None), + ('symlink_loop', 'application/directory', None), + ('symlink_loop_a', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('symlink_file', 'text/plain', None), + ('controller/service_log.txt', 'text/plain', None), + ('symlink_loop/symlink_loop_b', 'application/directory', None), + ('symlink_loop/placeholder', 'text/plain', None), + ]) + + def test_index_files(self): + '''Test index generation''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs')) + ix = Indexer(fl) + ix.make_indexes() + + self.assert_files(fl, [ + ('', 'application/directory', None), + ('index.html', 'text/html', None), + ('logs', 'application/directory', None), + ('logs/controller', 'application/directory', None), + ('logs/zuul-info', 'application/directory', None), + ('logs/job-output.json', 'application/json', None), + ('logs/index.html', 'text/html', None), + ('logs/controller/subdir', 'application/directory', None), + ('logs/controller/compressed.gz', 'text/plain', 'gzip'), + ('logs/controller/cpu-load.svg', 'image/svg+xml', None), + ('logs/controller/journal.xz', 'text/plain', 'xz'), + ('logs/controller/service_log.txt', 'text/plain', None), + ('logs/controller/syslog', 'text/plain', None), + ('logs/controller/index.html', 'text/html', None), + ('logs/controller/subdir/foo::3.txt', 'text/plain', None), + ('logs/controller/subdir/subdir.txt', 'text/plain', None), + ('logs/controller/subdir/index.html', 'text/html', None), + ('logs/zuul-info/inventory.yaml', 'text/plain', None), + ('logs/zuul-info/zuul-info.controller.txt', + 'text/plain', None), + ('logs/zuul-info/index.html', 'text/html', None), + ]) + + top_index = self.find_file(fl, 'index.html') + page = open(top_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + self.assertEqual(len(rows), 1) + + self.assertEqual(rows[0].find('a').get('href'), 'logs/index.html') + self.assertEqual(rows[0].find('a').text, 'logs/') + + subdir_index = self.find_file( + fl, 'logs/controller/subdir/index.html') + page = open(subdir_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + self.assertEqual(rows[0].find('a').get('href'), '../index.html') + self.assertEqual(rows[0].find('a').text, '../') + + # Test proper escaping of files with funny names + self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt') + self.assertEqual(rows[1].find('a').text, 'foo::3.txt') + # Test files without escaping + self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt') + self.assertEqual(rows[2].find('a').text, 'subdir.txt') + + def test_index_files_trailing_slash(self): + '''Test index generation with a trailing slash''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs/')) + ix = Indexer(fl) + ix.make_indexes() + + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('zuul-info', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('index.html', 'text/html', None), + ('controller/subdir', 'application/directory', None), + ('controller/compressed.gz', 'text/plain', 'gzip'), + ('controller/cpu-load.svg', 'image/svg+xml', None), + ('controller/journal.xz', 'text/plain', 'xz'), + ('controller/service_log.txt', 'text/plain', None), + ('controller/syslog', 'text/plain', None), + ('controller/index.html', 'text/html', None), + ('controller/subdir/foo::3.txt', 'text/plain', None), + ('controller/subdir/subdir.txt', 'text/plain', None), + ('controller/subdir/index.html', 'text/html', None), + ('zuul-info/inventory.yaml', 'text/plain', None), + ('zuul-info/zuul-info.controller.txt', 'text/plain', None), + ('zuul-info/index.html', 'text/html', None), + ]) + + top_index = self.find_file(fl, 'index.html') + page = open(top_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + self.assertEqual(len(rows), 3) + + self.assertEqual(rows[0].find('a').get('href'), + 'controller/index.html') + self.assertEqual(rows[0].find('a').text, 'controller/') + + self.assertEqual(rows[1].find('a').get('href'), + 'zuul-info/index.html') + self.assertEqual(rows[1].find('a').text, 'zuul-info/') + + subdir_index = self.find_file(fl, 'controller/subdir/index.html') + page = open(subdir_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + self.assertEqual(rows[0].find('a').get('href'), '../index.html') + self.assertEqual(rows[0].find('a').text, '../') + + # Test proper escaping of files with funny names + self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt') + self.assertEqual(rows[1].find('a').text, 'foo::3.txt') + # Test files without escaping + self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt') + self.assertEqual(rows[2].find('a').text, 'subdir.txt') + + def test_topdir_parent_link(self): + '''Test index generation creates topdir parent link''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs/')) + ix = Indexer(fl) + ix.make_indexes( + create_parent_links=True, + create_topdir_parent_link=True) + + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('zuul-info', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('index.html', 'text/html', None), + ('controller/subdir', 'application/directory', None), + ('controller/compressed.gz', 'text/plain', 'gzip'), + ('controller/cpu-load.svg', 'image/svg+xml', None), + ('controller/journal.xz', 'text/plain', 'xz'), + ('controller/service_log.txt', 'text/plain', None), + ('controller/syslog', 'text/plain', None), + ('controller/index.html', 'text/html', None), + ('controller/subdir/foo::3.txt', 'text/plain', None), + ('controller/subdir/subdir.txt', 'text/plain', None), + ('controller/subdir/index.html', 'text/html', None), + ('zuul-info/inventory.yaml', 'text/plain', None), + ('zuul-info/zuul-info.controller.txt', 'text/plain', None), + ('zuul-info/index.html', 'text/html', None), + ]) + + top_index = self.find_file(fl, 'index.html') + page = open(top_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + self.assertEqual(len(rows), 4) + + self.assertEqual(rows[0].find('a').get('href'), + '../index.html') + self.assertEqual(rows[0].find('a').text, '../') + + self.assertEqual(rows[1].find('a').get('href'), + 'controller/index.html') + self.assertEqual(rows[1].find('a').text, 'controller/') + + self.assertEqual(rows[2].find('a').get('href'), + 'zuul-info/index.html') + self.assertEqual(rows[2].find('a').text, 'zuul-info/') + + subdir_index = self.find_file(fl, 'controller/subdir/index.html') + page = open(subdir_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + self.assertEqual(rows[0].find('a').get('href'), '../index.html') + self.assertEqual(rows[0].find('a').text, '../') + + # Test proper escaping of files with funny names + self.assertEqual(rows[1].find('a').get('href'), 'foo%3A%3A3.txt') + self.assertEqual(rows[1].find('a').text, 'foo::3.txt') + # Test files without escaping + self.assertEqual(rows[2].find('a').get('href'), 'subdir.txt') + self.assertEqual(rows[2].find('a').text, 'subdir.txt') + + def test_no_parent_links(self): + '''Test index generation creates topdir parent link''' + with FileList() as fl: + fl.add(os.path.join(FIXTURE_DIR, 'logs/')) + ix = Indexer(fl) + ix.make_indexes( + create_parent_links=False, + create_topdir_parent_link=False) + + self.assert_files(fl, [ + ('', 'application/directory', None), + ('controller', 'application/directory', None), + ('zuul-info', 'application/directory', None), + ('job-output.json', 'application/json', None), + ('index.html', 'text/html', None), + ('controller/subdir', 'application/directory', None), + ('controller/compressed.gz', 'text/plain', 'gzip'), + ('controller/cpu-load.svg', 'image/svg+xml', None), + ('controller/journal.xz', 'text/plain', 'xz'), + ('controller/service_log.txt', 'text/plain', None), + ('controller/syslog', 'text/plain', None), + ('controller/index.html', 'text/html', None), + ('controller/subdir/foo::3.txt', 'text/plain', None), + ('controller/subdir/subdir.txt', 'text/plain', None), + ('controller/subdir/index.html', 'text/html', None), + ('zuul-info/inventory.yaml', 'text/plain', None), + ('zuul-info/zuul-info.controller.txt', 'text/plain', None), + ('zuul-info/index.html', 'text/html', None), + ]) + + top_index = self.find_file(fl, 'index.html') + page = open(top_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + self.assertEqual(len(rows), 3) + + self.assertEqual(rows[0].find('a').get('href'), + 'controller/index.html') + self.assertEqual(rows[0].find('a').text, + 'controller/') + + self.assertEqual(rows[1].find('a').get('href'), + 'zuul-info/index.html') + self.assertEqual(rows[1].find('a').text, + 'zuul-info/') + + subdir_index = self.find_file(fl, 'controller/subdir/index.html') + page = open(subdir_index.full_path).read() + page = BeautifulSoup(page, 'html.parser') + rows = page.find_all('tr')[1:] + + # Test proper escaping of files with funny names + self.assertEqual(rows[0].find('a').get('href'), 'foo%3A%3A3.txt') + self.assertEqual(rows[0].find('a').text, 'foo::3.txt') + # Test files without escaping + self.assertEqual(rows[1].find('a').get('href'), 'subdir.txt') + self.assertEqual(rows[1].find('a').text, 'subdir.txt') + + +class TestFileDetail(testtools.TestCase): + + def test_get_file_detail(self): + '''Test files info''' + path = os.path.join(FIXTURE_DIR, 'logs/job-output.json') + file_detail = FileDetail(path, '') + path_stat = os.stat(path) + self.assertEqual( + time.gmtime(path_stat[stat.ST_MTIME]), + file_detail.last_modified) + self.assertEqual(16, file_detail.size) + + def test_get_file_detail_missing_file(self): + '''Test files that go missing during a walk''' + + file_detail = FileDetail('missing/file/that/we/cant/find', '') + + self.assertEqual(time.gmtime(0), file_detail.last_modified) + self.assertEqual(0, file_detail.size) diff --git a/roles/upload-logs-s3/library/zuul_s3_upload.py b/roles/upload-logs-s3/library/zuul_s3_upload.py new file mode 100755 index 000000000..d6e9a9842 --- /dev/null +++ b/roles/upload-logs-s3/library/zuul_s3_upload.py @@ -0,0 +1,825 @@ +#!/usr/bin/env python3 +# +# Copyright 2014 Rackspace Australia +# Copyright 2018 Red Hat, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + + +""" +Utility to upload files to s3 +""" + +import argparse +import gzip +import io +import logging +import mimetypes +import os +try: + import queue as queuelib +except ImportError: + import Queue as queuelib +import shutil +import stat +import sys +import tempfile +import threading +import time +try: + import urllib.parse as urlparse +except ImportError: + import urllib as urlparse +import zlib +import collections + +import boto3 +from ansible.module_utils.basic import AnsibleModule + +try: + # Python 3.3+ + from collections.abc import Sequence +except ImportError: + from collections import Sequence + + +mimetypes.init() +mimetypes.add_type('text/plain', '.yaml') + +MAX_UPLOAD_THREADS = 24 +POST_ATTEMPTS = 3 + +# Map mime types to apache icons +APACHE_MIME_ICON_MAP = { + '_default': 'unknown.png', + 'application/gzip': 'compressed.png', + 'application/directory': 'folder.png', + 'text/html': 'text.png', + 'text/plain': 'text.png', +} + +# Map mime types to apache icons +APACHE_FILE_ICON_MAP = { + '..': 'back.png', +} + +# These icon files are from the Apache project and are in the public +# domain. +ICON_IMAGES = { + 'back.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAElBMVEX/' + '///M//+ZmZlmZmYzMzMAAACei5rnAAAAAnRSTlP/AOW3MEoAAABWSURB' + 'VHjabdBBCgAhDEPRRpv7X3kwEMsQ//IRRC08urjRHbha5VLFUsVSxVI9' + 'lmDh5hMpHD6n0EgoiZG0DNINpnWlcVXaRix76e1/8dddcL6nG0Ri9gHj' + 'tgSXKYeLBgAAAABJRU5ErkJggg==', + 'compressed.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAADAFBM' + 'VEX//////8z//5n//2b//zP//wD/zP//zMz/zJn/zGb/zDP/zAD/' + 'mf//mcz/mZn/mWb/mTP/mQD/Zv//Zsz/Zpn/Zmb/ZjP/ZgD/M///' + 'M8z/M5n/M2b/MzP/MwD/AP//AMz/AJn/AGb/ADP/AADM///M/8zM' + '/5nM/2bM/zPM/wDMzP/MzMzMzJnMzGbMzDPMzADMmf/MmczMmZnM' + 'mWbMmTPMmQDMZv/MZszMZpnMZmbMZjPMZgDMM//MM8zMM5nMM2bM' + 'MzPMMwDMAP/MAMzMAJnMAGbMADPMAACZ//+Z/8yZ/5mZ/2aZ/zOZ' + '/wCZzP+ZzMyZzJmZzGaZzDOZzACZmf+ZmcyZmZmZmWaZmTOZmQCZ' + 'Zv+ZZsyZZpmZZmaZZjOZZgCZM/+ZM8yZM5mZM2aZMzOZMwCZAP+Z' + 'AMyZAJmZAGaZADOZAABm//9m/8xm/5lm/2Zm/zNm/wBmzP9mzMxm' + 'zJlmzGZmzDNmzABmmf9mmcxmmZlmmWZmmTNmmQBmZv9mZsxmZplm' + 'ZmZmZjNmZgBmM/9mM8xmM5lmM2ZmMzNmMwBmAP9mAMxmAJlmAGZm' + 'ADNmAAAz//8z/8wz/5kz/2Yz/zMz/wAzzP8zzMwzzJkzzGYzzDMz' + 'zAAzmf8zmcwzmZkzmWYzmTMzmQAzZv8zZswzZpkzZmYzZjMzZgAz' + 'M/8zM8wzM5kzM2YzMzMzMwAzAP8zAMwzAJkzAGYzADMzAAAA//8A' + '/8wA/5kA/2YA/zMA/wAAzP8AzMwAzJkAzGYAzDMAzAAAmf8AmcwA' + 'mZkAmWYAmTMAmQAAZv8AZswAZpkAZmYAZjMAZgAAM/8AM8wAM5kA' + 'M2YAMzMAMwAAAP8AAMwAAJkAAGYAADPuAADdAAC7AACqAACIAAB3' + 'AABVAABEAAAiAAARAAAA7gAA3QAAuwAAqgAAiAAAdwAAVQAARAAA' + 'IgAAEQAAAO4AAN0AALsAAKoAAIgAAHcAAFUAAEQAACIAABHu7u7d' + '3d27u7uqqqqIiIh3d3dVVVVEREQiIiIREREAAAD7CIKZAAAAJXRS' + 'TlP///////////////////////////////////////////////8A' + 'P89CTwAAAGtJREFUeNp9z9ENgDAIhOEOco+dybVuEXasFMRDY/x5' + '+xJCO6Znu6kSx7BhXyjtKBWWNlwW88Loid7hFRKBXiIYCMfMEYUQ' + 'QohC3CjFA5nIjqx1CqlDLGR/EhM5O06yvin0ftGOyIS7lV14AsQN' + 'aR7rMEBYAAAAAElFTkSuQmCC', + 'folder.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAElBMVEX/' + '////zJnM//+ZZjMzMzMAAADCEvqoAAAAA3RSTlP//wDXyg1BAAAASElE' + 'QVR42s3KQQ6AQAhDUaXt/a/sQDrRJu7c+NmQB0e99B3lnqjT6cYx6zSI' + 'bV40n3D7psYMoBoz4w8/EdNYQsbGEjNxYSljXTEsA9O1pLTvAAAAAElF' + 'TkSuQmCC', + 'text.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAD1BMVEX/' + '///M//+ZmZkzMzMAAABVsTOVAAAAAnRSTlP/AOW3MEoAAABISURBVHja' + 'tcrRCgAgCENRbf7/N7dKomGvngjhMsPLD4NdMPwia438NRIyxsaL/XQZ' + 'hyxpkC6zyjLXGVXnkhqWJWIIrOgeinECLlUCjBCqNQoAAAAASUVORK5C' + 'YII=', + 'unknown.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAD1BMVEX/' + '///M//+ZmZkzMzMAAABVsTOVAAAAAnRSTlP/AOW3MEoAAABYSURBVHja' + 'ncvRDoAgDEPRruX/v1kmNHPBxMTLyzgD6FmsILg56g2hQnJkOco4yZhq' + 'tN5nYd5Zq0LsHblwxwP9GTCWsaGtoelANKzOlz/RfaLYUmLE6E28ALlN' + 'AupSdoFsAAAAAElFTkSuQmCC'} + + +# Begin vendored code +# This code is licensed under the Public Domain/CC0 and comes from +# https://github.com/leenr/gzip-stream/blob/master/gzip_stream.py +# Code was modified: +# removed type annotations to support python2. +# removed use of *, somearg for positional anonymous args. +# Default compression level to 9. + +class GZIPCompressedStream(io.RawIOBase): + def __init__(self, stream, compression_level=9): + assert 1 <= compression_level <= 9 + + self._compression_level = compression_level + self._stream = stream + + self._compressed_stream = io.BytesIO() + self._compressor = gzip.GzipFile( + mode='wb', + fileobj=self._compressed_stream, + compresslevel=compression_level + ) + + # because of the GZIP header written by `GzipFile.__init__`: + self._compressed_stream.seek(0) + + @property + def compression_level(self): + return self._compression_level + + @property + def stream(self): + return self._stream + + def readable(self): + return True + + def _read_compressed_into(self, b): + buf = self._compressed_stream.read(len(b)) + b[:len(buf)] = buf + return len(buf) + + def readinto(self, b): + b = memoryview(b) + + offset = 0 + size = len(b) + while offset < size: + offset += self._read_compressed_into(b[offset:]) + if offset < size: + # self._compressed_buffer now empty + if self._compressor.closed: + # nothing to compress anymore + break + # compress next bytes + self._read_n_compress(size) + + return offset + + def _read_n_compress(self, size): + assert size > 0 + + data = self._stream.read(size) + + # rewind buffer to the start to free up memory + # (because anything currently in the buffer should be already + # streamed off the object) + self._compressed_stream.seek(0) + self._compressed_stream.truncate(0) + + if data: + self._compressor.write(data) + else: + # this will write final data (will flush zlib with Z_FINISH) + self._compressor.close() + + # rewind to the buffer start + self._compressed_stream.seek(0) + + def __repr__(self): + return ( + '{self.__class__.__name__}(' + '{self.stream!r}, ' + 'compression_level={self.compression_level!r}' + ')' + ).format(self=self) + +# End vendored code + + +def get_mime_icon(mime, filename=''): + icon = (APACHE_FILE_ICON_MAP.get(filename) or + APACHE_MIME_ICON_MAP.get(mime) or + APACHE_MIME_ICON_MAP['_default']) + return "data:image/png;base64,%s" % ICON_IMAGES[icon] + + +def retry_function(func): + for attempt in range(1, POST_ATTEMPTS + 1): + try: + return func() + except Exception: + if attempt >= POST_ATTEMPTS: + raise + else: + logging.exception("Error on attempt %d" % attempt) + time.sleep(attempt * 10) + + +def sizeof_fmt(num, suffix='B'): + # From http://stackoverflow.com/questions/1094841/ + # reusable-library-to-get-human-readable-version-of-file-size + for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: + if abs(num) < 1024.0: + return "%3.1f%s%s" % (num, unit, suffix) + num /= 1024.0 + return "%.1f%s%s" % (num, 'Y', suffix) + + +class FileDetail(): + """ + Used to generate indexes with links or as the file path + to push to s3. + """ + + def __init__(self, full_path, relative_path, filename=None): + """ + Args: + full_path (str): The absolute path to the file on disk. + relative_path (str): The relative path from the artifacts source + used for links. + filename (str): An optional alternate filename in links. + """ + # Make FileNotFoundError exception to be compatible with python2 + try: + FileNotFoundError # noqa: F823 + except NameError: + FileNotFoundError = OSError + + self.full_path = full_path + if filename is None: + self.filename = os.path.basename(full_path) + else: + self.filename = filename + self.relative_path = relative_path + + if self.full_path and os.path.isfile(self.full_path): + mime_guess, encoding = mimetypes.guess_type(self.full_path) + self.mimetype = mime_guess if mime_guess else 'text/plain' + self.encoding = encoding + self.folder = False + else: + self.mimetype = 'application/directory' + self.encoding = None + self.folder = True + try: + st = os.stat(self.full_path) + self.last_modified = time.gmtime(st[stat.ST_MTIME]) + self.size = st[stat.ST_SIZE] + except (FileNotFoundError, TypeError): + self.last_modified = time.gmtime(0) + self.size = 0 + + def __repr__(self): + t = 'Folder' if self.folder else 'File' + return '<%s %s>' % (t, self.relative_path) + + +class FileList(Sequence): + '''A collection of FileDetail objects + + This is a list-like group of FileDetail objects, intended to be + used as a context manager around the upload process. + ''' + def __init__(self): + self.file_list = [] + self.file_list.append(FileDetail(None, '', '')) + self.tempdirs = [] + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + for tempdir in self.tempdirs: + shutil.rmtree(tempdir) + + def __getitem__(self, item): + return self.file_list.__getitem__(item) + + def __len__(self): + return self.file_list.__len__() + + def get_tempdir(self): + '''Get a temporary directory + + Returns path to a private temporary directory which will be + cleaned on exit + ''' + tempdir = tempfile.mkdtemp(prefix='s-u-l-tmp') + self.tempdirs.append(tempdir) + return tempdir + + @staticmethod + def _path_in_tree(root, path): + full_path = os.path.realpath(os.path.abspath( + os.path.expanduser(path))) + if not full_path.startswith(root): + logging.debug("Skipping path outside root: %s" % (path,)) + return False + return True + + def add(self, file_path): + """ + Generate a list of files to upload to swift. Recurses through + directories + """ + + # file_list: A list of FileDetails to push to swift + file_list = [] + + if os.path.isfile(file_path): + relative_path = os.path.basename(file_path) + file_list.append(FileDetail(file_path, relative_path)) + elif os.path.isdir(file_path): + original_root = os.path.realpath(os.path.abspath( + os.path.expanduser(file_path))) + + parent_dir = os.path.dirname(file_path) + if not file_path.endswith('/'): + filename = os.path.basename(file_path) + full_path = file_path + relative_name = os.path.relpath(full_path, parent_dir) + file_list.append(FileDetail(full_path, relative_name, + filename)) + # TODO: this will copy the result of symlinked files, but + # it won't follow directory symlinks. If we add that, we + # should ensure that we don't loop. + for path, folders, files in os.walk(file_path): + # Sort folder in-place so that we recurse in order. + files.sort(key=lambda x: x.lower()) + folders.sort(key=lambda x: x.lower()) + # relative_path: The path between the given directory + # and the one being currently walked. + relative_path = os.path.relpath(path, parent_dir) + + for filename in folders: + full_path = os.path.join(path, filename) + if not self._path_in_tree(original_root, full_path): + continue + relative_name = os.path.relpath(full_path, parent_dir) + file_list.append(FileDetail(full_path, relative_name, + filename)) + + for filename in files: + full_path = os.path.join(path, filename) + if not self._path_in_tree(original_root, full_path): + continue + relative_name = os.path.relpath(full_path, parent_dir) + file_detail = FileDetail(full_path, relative_name) + file_list.append(file_detail) + + self.file_list += file_list + + +class Indexer(): + """Index a FileList + + Functions to generate indexes and other collated data for a + FileList + + - make_indexes() : make index.html in folders + """ + def __init__(self, file_list): + ''' + Args: + file_list (FileList): A FileList object with all files + to be indexed. + ''' + assert isinstance(file_list, FileList) + self.file_list = file_list + + def _make_index_file(self, folder_links, title, tempdir, append_footer): + """Writes an index into a file for pushing""" + for file_details in folder_links: + # Do not generate an index file if one exists already. + # This may be the case when uploading other machine generated + # content like python coverage info. + if self.index_filename == file_details.filename: + return + index_content = self._generate_log_index( + folder_links, title, append_footer) + fd = open(os.path.join(tempdir, self.index_filename), 'w') + fd.write(index_content) + return os.path.join(tempdir, self.index_filename) + + def _generate_log_index(self, folder_links, title, append_footer): + """Create an index of logfiles and links to them""" + + output = '
Name | Last Modified | ' + output += 'Size | |
---|---|---|---|
' % ({ + 'm': file_details.mimetype, + 'i': get_mime_icon(file_details.mimetype, + file_details.filename), + })) + filename = file_details.filename + link_filename = filename + if file_details.folder: + filename += '/' + link_filename += '/index.html' + output += ' | %s | ' % ( + urlparse.quote(link_filename), + filename) + output += '%s | ' % time.asctime( + file_details.last_modified) + size = sizeof_fmt(file_details.size, suffix='') + output += '%s | ' % size + output += '