#!/usr/bin/env python3 # # Copyright 2014 Rackspace Australia # Copyright 2018-2019 Red Hat, Inc # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # Make coding more python3-ish from __future__ import (absolute_import, division, print_function) __metaclass__ = type """ Generic utilities used for log upload. """ import gzip import io import logging import mimetypes import os import shutil import stat import tempfile import time try: import urllib.parse as urlparse except ImportError: import urllib as urlparse import zlib import collections try: # Python 3.3+ from collections.abc import Sequence except ImportError: from collections import Sequence if not mimetypes.inited: # We don't want to reinit and override any previously added types. mimetypes.init() mimetypes.add_type('text/plain', '.yaml') MAX_UPLOAD_THREADS = 24 POST_ATTEMPTS = 3 # Map mime types to apache icons APACHE_MIME_ICON_MAP = { '_default': 'unknown.png', 'application/gzip': 'compressed.png', 'application/directory': 'folder.png', 'text/html': 'text.png', 'text/plain': 'text.png', } # Map mime types to apache icons APACHE_FILE_ICON_MAP = { '..': 'back.png', } # These icon files are from the Apache project and are in the public # domain. ICON_IMAGES = { 'back.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAElBMVEX/' '///M//+ZmZlmZmYzMzMAAACei5rnAAAAAnRSTlP/AOW3MEoAAABWSURB' 'VHjabdBBCgAhDEPRRpv7X3kwEMsQ//IRRC08urjRHbha5VLFUsVSxVI9' 'lmDh5hMpHD6n0EgoiZG0DNINpnWlcVXaRix76e1/8dddcL6nG0Ri9gHj' 'tgSXKYeLBgAAAABJRU5ErkJggg==', 'compressed.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAADAFBM' 'VEX//////8z//5n//2b//zP//wD/zP//zMz/zJn/zGb/zDP/zAD/' 'mf//mcz/mZn/mWb/mTP/mQD/Zv//Zsz/Zpn/Zmb/ZjP/ZgD/M///' 'M8z/M5n/M2b/MzP/MwD/AP//AMz/AJn/AGb/ADP/AADM///M/8zM' '/5nM/2bM/zPM/wDMzP/MzMzMzJnMzGbMzDPMzADMmf/MmczMmZnM' 'mWbMmTPMmQDMZv/MZszMZpnMZmbMZjPMZgDMM//MM8zMM5nMM2bM' 'MzPMMwDMAP/MAMzMAJnMAGbMADPMAACZ//+Z/8yZ/5mZ/2aZ/zOZ' '/wCZzP+ZzMyZzJmZzGaZzDOZzACZmf+ZmcyZmZmZmWaZmTOZmQCZ' 'Zv+ZZsyZZpmZZmaZZjOZZgCZM/+ZM8yZM5mZM2aZMzOZMwCZAP+Z' 'AMyZAJmZAGaZADOZAABm//9m/8xm/5lm/2Zm/zNm/wBmzP9mzMxm' 'zJlmzGZmzDNmzABmmf9mmcxmmZlmmWZmmTNmmQBmZv9mZsxmZplm' 'ZmZmZjNmZgBmM/9mM8xmM5lmM2ZmMzNmMwBmAP9mAMxmAJlmAGZm' 'ADNmAAAz//8z/8wz/5kz/2Yz/zMz/wAzzP8zzMwzzJkzzGYzzDMz' 'zAAzmf8zmcwzmZkzmWYzmTMzmQAzZv8zZswzZpkzZmYzZjMzZgAz' 'M/8zM8wzM5kzM2YzMzMzMwAzAP8zAMwzAJkzAGYzADMzAAAA//8A' '/8wA/5kA/2YA/zMA/wAAzP8AzMwAzJkAzGYAzDMAzAAAmf8AmcwA' 'mZkAmWYAmTMAmQAAZv8AZswAZpkAZmYAZjMAZgAAM/8AM8wAM5kA' 'M2YAMzMAMwAAAP8AAMwAAJkAAGYAADPuAADdAAC7AACqAACIAAB3' 'AABVAABEAAAiAAARAAAA7gAA3QAAuwAAqgAAiAAAdwAAVQAARAAA' 'IgAAEQAAAO4AAN0AALsAAKoAAIgAAHcAAFUAAEQAACIAABHu7u7d' '3d27u7uqqqqIiIh3d3dVVVVEREQiIiIREREAAAD7CIKZAAAAJXRS' 'TlP///////////////////////////////////////////////8A' 'P89CTwAAAGtJREFUeNp9z9ENgDAIhOEOco+dybVuEXasFMRDY/x5' '+xJCO6Znu6kSx7BhXyjtKBWWNlwW88Loid7hFRKBXiIYCMfMEYUQ' 'QohC3CjFA5nIjqx1CqlDLGR/EhM5O06yvin0ftGOyIS7lV14AsQN' 'aR7rMEBYAAAAAElFTkSuQmCC', 'folder.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAElBMVEX/' '////zJnM//+ZZjMzMzMAAADCEvqoAAAAA3RSTlP//wDXyg1BAAAASElE' 'QVR42s3KQQ6AQAhDUaXt/a/sQDrRJu7c+NmQB0e99B3lnqjT6cYx6zSI' 'bV40n3D7psYMoBoz4w8/EdNYQsbGEjNxYSljXTEsA9O1pLTvAAAAAElF' 'TkSuQmCC', 'text.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAD1BMVEX/' '///M//+ZmZkzMzMAAABVsTOVAAAAAnRSTlP/AOW3MEoAAABISURBVHja' 'tcrRCgAgCENRbf7/N7dKomGvngjhMsPLD4NdMPwia438NRIyxsaL/XQZ' 'hyxpkC6zyjLXGVXnkhqWJWIIrOgeinECLlUCjBCqNQoAAAAASUVORK5C' 'YII=', 'unknown.png': 'iVBORw0KGgoAAAANSUhEUgAAABQAAAAWCAMAAAD3n0w0AAAAD1BMVEX/' '///M//+ZmZkzMzMAAABVsTOVAAAAAnRSTlP/AOW3MEoAAABYSURBVHja' 'ncvRDoAgDEPRruX/v1kmNHPBxMTLyzgD6FmsILg56g2hQnJkOco4yZhq' 'tN5nYd5Zq0LsHblwxwP9GTCWsaGtoelANKzOlz/RfaLYUmLE6E28ALlN' 'AupSdoFsAAAAAElFTkSuQmCC'} # Begin vendored code # This code is licensed under the Public Domain/CC0 and comes from # https://github.com/leenr/gzip-stream/blob/master/gzip_stream.py # Code was modified: # removed type annotations to support python2. # removed use of *, somearg for positional anonymous args. # Default compression level to 9. # # changed read method argument name from length to size and # added read method default value size=-1 for parent class compatibility class GZIPCompressedStream(io.RawIOBase): def __init__(self, stream, compression_level=9): assert 1 <= compression_level <= 9 self._compression_level = compression_level self._stream = stream self._compressed_stream = io.BytesIO() self._compressor = gzip.GzipFile( mode='wb', fileobj=self._compressed_stream, compresslevel=compression_level ) # because of the GZIP header written by `GzipFile.__init__`: self._compressed_stream.seek(0) self.count = 0 def read(self, size=-1): r = super().read(size) self.count += len(r) return r def tell(self): return self.count @property def compression_level(self): return self._compression_level @property def stream(self): return self._stream def readable(self): return True def _read_compressed_into(self, b): buf = self._compressed_stream.read(len(b)) b[:len(buf)] = buf return len(buf) def readinto(self, b): b = memoryview(b) offset = 0 size = len(b) while offset < size: offset += self._read_compressed_into(b[offset:]) if offset < size: # self._compressed_buffer now empty if self._compressor.closed: # nothing to compress anymore break # compress next bytes self._read_n_compress(size) return offset def _read_n_compress(self, size): assert size > 0 data = self._stream.read(size) # rewind buffer to the start to free up memory # (because anything currently in the buffer should be already # streamed off the object) self._compressed_stream.seek(0) self._compressed_stream.truncate(0) if data: self._compressor.write(data) else: # this will write final data (will flush zlib with Z_FINISH) self._compressor.close() # rewind to the buffer start self._compressed_stream.seek(0) def __repr__(self): return ( '{self.__class__.__name__}(' '{self.stream!r}, ' 'compression_level={self.compression_level!r}' ')' ).format(self=self) # End vendored code def get_mime_icon(mime, filename=''): icon = (APACHE_FILE_ICON_MAP.get(filename) or APACHE_MIME_ICON_MAP.get(mime) or APACHE_MIME_ICON_MAP['_default']) return "data:image/png;base64,%s" % ICON_IMAGES[icon] def retry_function(func): for attempt in range(1, POST_ATTEMPTS + 1): try: return func() except Exception: if attempt >= POST_ATTEMPTS: raise else: logging.exception("Error on attempt %d" % attempt) time.sleep(attempt * 10) def sizeof_fmt(num, suffix='B'): # From http://stackoverflow.com/questions/1094841/ # reusable-library-to-get-human-readable-version-of-file-size for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: if abs(num) < 1024.0: return "%3.1f%s%s" % (num, unit, suffix) num /= 1024.0 return "%.1f%s%s" % (num, 'Y', suffix) class FileDetail(): """ Used to generate indexes with links or as the file path to push to storage. """ def __init__(self, full_path, relative_path, filename=None): """ Args: full_path (str): The absolute path to the file on disk. relative_path (str): The relative path from the artifacts source used for links. filename (str): An optional alternate filename in links. """ # Make FileNotFoundError exception to be compatible with python2 try: FileNotFoundError # noqa: F823 except NameError: FileNotFoundError = OSError self.full_path = full_path if filename is None: self.filename = os.path.basename(full_path) else: self.filename = filename self.relative_path = relative_path if self.full_path and os.path.isfile(self.full_path): mime_guess, encoding = mimetypes.guess_type(self.full_path) self.mimetype = mime_guess if mime_guess else 'text/plain' self.encoding = encoding self.folder = False else: self.mimetype = 'application/directory' self.encoding = None self.folder = True try: st = os.stat(self.full_path) self.last_modified = time.gmtime(st[stat.ST_MTIME]) self.size = st[stat.ST_SIZE] except (FileNotFoundError, TypeError): self.last_modified = time.gmtime(0) self.size = 0 def __repr__(self): t = 'Folder' if self.folder else 'File' return '<%s %s>' % (t, self.relative_path) class FileList(Sequence): '''A collection of FileDetail objects This is a list-like group of FileDetail objects, intended to be used as a context manager around the upload process. ''' def __init__(self): self.file_list = [] self.file_list.append(FileDetail(None, '', '')) self.tempdirs = [] def __enter__(self): return self def __exit__(self, type, value, traceback): for tempdir in self.tempdirs: shutil.rmtree(tempdir) def __getitem__(self, item): return self.file_list.__getitem__(item) def __len__(self): return self.file_list.__len__() def get_tempdir(self): '''Get a temporary directory Returns path to a private temporary directory which will be cleaned on exit ''' tempdir = tempfile.mkdtemp(prefix='s-u-l-tmp') self.tempdirs.append(tempdir) return tempdir @staticmethod def _path_in_tree(root, path): full_path = os.path.realpath(os.path.abspath( os.path.expanduser(path))) if not full_path.startswith(root): logging.debug("Skipping path outside root: %s" % (path,)) return False return True def add(self, file_path): """ Generate a list of files to upload to storage. Recurses through directories """ # file_list: A list of FileDetails to push to storage file_list = [] if os.path.isfile(file_path): relative_path = os.path.basename(file_path) file_list.append(FileDetail(file_path, relative_path)) elif os.path.isdir(file_path): original_root = os.path.realpath(os.path.abspath( os.path.expanduser(file_path))) parent_dir = os.path.dirname(file_path) if not file_path.endswith('/'): filename = os.path.basename(file_path) full_path = file_path relative_name = os.path.relpath(full_path, parent_dir) file_list.append(FileDetail(full_path, relative_name, filename)) # TODO: this will copy the result of symlinked files, but # it won't follow directory symlinks. If we add that, we # should ensure that we don't loop. for path, folders, files in os.walk(file_path): # Sort folder in-place so that we recurse in order. files.sort(key=lambda x: x.lower()) folders.sort(key=lambda x: x.lower()) # relative_path: The path between the given directory # and the one being currently walked. relative_path = os.path.relpath(path, parent_dir) for filename in folders: full_path = os.path.join(path, filename) if not self._path_in_tree(original_root, full_path): continue relative_name = os.path.relpath(full_path, parent_dir) file_list.append(FileDetail(full_path, relative_name, filename)) for filename in files: full_path = os.path.join(path, filename) if not self._path_in_tree(original_root, full_path): continue relative_name = os.path.relpath(full_path, parent_dir) file_detail = FileDetail(full_path, relative_name) file_list.append(file_detail) self.file_list += file_list class Indexer(): """Index a FileList Functions to generate indexes and other collated data for a FileList - make_indexes() : make index.html in folders """ def __init__(self, file_list): ''' Args: file_list (FileList): A FileList object with all files to be indexed. ''' assert isinstance(file_list, FileList) self.file_list = file_list def _make_index_file(self, folder_links, title, tempdir, append_footer): """Writes an index into a file for pushing""" for file_details in folder_links: # Do not generate an index file if one exists already. # This may be the case when uploading other machine generated # content like python coverage info. if self.index_filename == file_details.filename: return index_content = self._generate_log_index( folder_links, title, append_footer) fd = open(os.path.join(tempdir, self.index_filename), 'w') fd.write(index_content) return os.path.join(tempdir, self.index_filename) def _generate_log_index(self, folder_links, title, append_footer): """Create an index of logfiles and links to them""" output = '
Name | Last Modified | ' output += 'Size | |
---|---|---|---|
' % ({ 'm': file_details.mimetype, 'i': get_mime_icon(file_details.mimetype, file_details.filename), })) filename = file_details.filename link_filename = filename if file_details.folder: filename += '/' link_filename += '/index.html' output += ' | %s | ' % ( urlparse.quote(link_filename), filename) output += '%s | ' % time.asctime( file_details.last_modified) size = sizeof_fmt(file_details.size, suffix='') output += '%s | ' % size output += '