#!/usr/bin/env python # # Copyright (c) 2015-2016 IBM Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import stat import os from pwd import getpwuid import logging import argparse import swiftclient from swiftclient.exceptions import ClientException import requests import getpass from functools import wraps from simplejson.scanner import JSONDecodeError """ Namespace reconciliation tool for SwiftOnHPSS. Depends on Keystone for authentication, preferably with the Keystone HPSS identity backend. Has not been tested with any other authentication service. WARNING: Do not run nstool on a regular Swift JBOD disk, or 'fun' things will happen to your metadata. LIMITATION: In order to save unnecessary file I/O on HPSS, it is highly recommended to add 'object_post_as_copy=false' to the configuration of the proxy server that will handle SwiftOnHPSS calls! LIMITATION: Only supports the Keystone Identity V2 API """ # TODO: pull OpenStack details from env vars def trace_function(f): @wraps(f) def wrap(*args, **kwargs): logging.info('entering %s' % f.__name__) logging.info('args: %s' % str(args)) logging.info('kwargs: %s' % str(kwargs)) result = f(*args, **kwargs) logging.info('result: %s' % str(result)) logging.info('exiting %s' % f.__name__) return result return wrap @trace_function def main(program_args): nstool = Reconciler(program_args) if program_args.device.endswith('/'): program_args.device = program_args.device[:-1] # Get and check authentication password = getpass.getpass(prompt='Keystone password for %s@admin: ' % program_args.username) try: admin_keystone_api, admin_token =\ nstool.auth_into_keystone(password, 'admin') except ValueError: logging.debug("Failed login!") print "Authentication failed." sys.exit(1) del password # Figure out what we're doing. target_account = program_args.account target_container = program_args.container # Start doing it. # pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()-1) # Multiprocessing does not play nicely with the lazy loading that # keystoneclient does, so let's not mess with it for now. pool = None # Doing this convolution because if we scope into a project as root@admin, # and initialize it, the symlink always gets written to 'admin' because # SwiftOnHPSS only gets the name of the account currently scoped into, # even if it's not the same as the account we're modifying containers in. # So we just cheat and make ourselves a member of every project, and # then scope in as root into that account to initialize it. # TODO: make this use a multiprocessing pool # FIXME: this repeats itself, refactor to be less bad if program_args.account == "": for account in admin_keystone_api.list_tenants(): try: admin_keystone_api.grant_role_on_tenant(account['name'], program_args.username, '_member_') admin_keystone_api.grant_role_on_tenant(account['name'], program_args.username, 'admin') except KeyError: sys.exit(1) user_keystone_api = \ LightweightKeystoneAPI(admin_keystone_api.url, admin_keystone_api.username, admin_keystone_api.password, account['name'], admin_keystone_api.version) user_keystone_api.authenticate() nstool.initialize_account(keystone_api=user_keystone_api, target_account=account['name']) else: try: admin_keystone_api.grant_role_on_tenant(target_account, program_args.username, '_member_') admin_keystone_api.grant_role_on_tenant(target_account, program_args.username, 'admin') except KeyError: sys.exit(1) user_keystone_api = \ LightweightKeystoneAPI(admin_keystone_api.url, admin_keystone_api.username, admin_keystone_api.password, target_account, admin_keystone_api.version) user_keystone_api.authenticate() nstool.initialize_account(keystone_api=user_keystone_api, target_account=target_account) # Handle reconciling one or all containers. if target_container != "": swift_api = nstool.auth_into_swift(keystone_api=admin_keystone_api, target_account=target_account) nstool.reconcile_container(keystone_api=admin_keystone_api, swift_api=swift_api, target_account=target_account, target_container=target_container) elif target_account != "": nstool.reconcile_account(keystone_api=admin_keystone_api, target_account=target_account, pool=pool) else: # reconcile everything nstool.reconcile_all_accounts(keystone_api=admin_keystone_api, pool=pool) if pool: pool.join() pool.close() print "Done" def check_usage(): """ Checks the user arguments and parses them :returns: argument namespace :rtype: argparse.Namespace """ parser = argparse.ArgumentParser() parser.add_argument("-d", "--device", type=str, help="Swift device path", required=True) parser.add_argument('-a', "--account", type=str, help="Account to reconcile", default="") parser.add_argument('-c', '--container', type=str, help="Container to reconcile", default="") parser.add_argument("-k", "--keystone-url", type=str, help="Keystone url endpoint", required=True) # TODO: get this from Keystone parser.add_argument("-s", "--storage-url", type=str, help="Storage url endpoint", required=True) parser.add_argument("-u", "--username", type=str, help="Admin user (default: root)", default="root") parser.add_argument("-p", "--prefix", type=str, help="Auth prefix for account", required=True) parser.add_argument("-l", "--logfile", type=str, help="Location of log file", default='/var/log/swift/nstool.log') parser.add_argument("-g", "--verify-ssl", type=bool, help="Whether Keystone should verify \ SSL certificate (True or False)", default=True) parser.add_argument("-n", "--storage-policy-name", type=str, help="Swift storage policy name for SwiftOnHPSS", required=True) parser.add_argument("-v", "--verbose", help="Show debug traces", action="store_true") return parser.parse_args() class Reconciler(object): def __init__(self, args): self._args = args def _report(self, msg): if self._args.verbose: print msg @trace_function def auth_into_swift(self, keystone_api, target_account): """ Gets a Swift API client object that is authorized to make changes to the specified Swift account/tenant by name. :param LightweightKeystoneAPI keystone_api: Keystone API instance :param str target_account: Account name from Keystone we want to target :returns: Swift API instance that is authorized to alter the given account """ keystone_acct = keystone_api.get_tenant(target_account) account_url = "%s/%s_%s" % (self._args.storage_url, self._args.prefix, keystone_acct['id']) swift_api = \ swiftclient.client.Connection(preauthurl=account_url, preauthtoken=keystone_api.auth_token) return swift_api def auth_into_keystone(self, admin_password, target_project): """ Returns a Keystone client object and auth token if authentication is successful. :param str admin_password: Password to authenticate with :param str target_project: Project to scope into :returns: Keystone API client, and authorization token """ keystone_api = LightweightKeystoneAPI(url=self._args.keystone_url, username=self._args.username, password=admin_password, tenant_name=target_project, api_version="v2" ) if not keystone_api.authenticate(): raise ValueError('Could not authenticate into Keystone!') return keystone_api, keystone_api.auth_token @trace_function def list_hpss_containers(self, swift_api, target_account): """ Lists containers in Swift metadata that use the given storage policy name, for a given account. :param target_account: Account to look in :param swiftclient.client.Connection swift_api: Swift API client, authenticated for a given account :return: List of container dictionaries with this property """ hpss_containers = [] containers = swift_api.get_account(target_account)[1] for container in containers: storage_policy = swift_api.get_container()[0]['X-Storage-Policy'] if storage_policy == self._args.storage_policy_name: hpss_containers.append(container) return hpss_containers @trace_function def list_accounts(self, keystone_api): """ Convenience function to list all Keystone accounts. :param LightweightKeystoneAPI keystone_api: Keystone API client :returns: List of accounts in Keystone :rtype: List of Keystone tenant names """ return [tenant['name'] for tenant in keystone_api.list_tenants()] @trace_function def initialize_account(self, keystone_api, target_account): """ Initializes a single account in HPSS, or in other words creates the account directory in HPSS. :param LightweightKeystoneAPI root_keystone_api: Keystone API client for root user scoped into target account :param str target_account: Account name to initialize """ # TODO: decide if account needs initializing before here swift_api = self.auth_into_swift(keystone_api=keystone_api, target_account=target_account) temp_name = ".deleteme" print "Initializing account %s..." % target_account swift_api.put_container(temp_name, headers={'X-Storage-Policy': self._args.storage_policy_name}) swift_api.put_object(temp_name, '.deleteme', contents=None) @trace_function def reconcile_all_accounts(self, keystone_api, pool): """ Reconciles all accounts. :param LightweightKeystoneAPI keystone_api: Keystone API client :param multiprocessing.Pool pool: Process pool :returns: nothing """ all_accounts = self.list_accounts(keystone_api) print "Reconciling all accounts..." for account in all_accounts: self.reconcile_account(keystone_api, account, pool) @trace_function def reconcile_account(self, keystone_api, target_account, pool): """ Reconciles all the containers in a single account. :param LightweightKeystoneAPI keystone_api: Keystone API client :param str target_account: Account name with containers to reconcile :param multiprocessing.Pool pool: Process pool :returns: nothing """ swift_containers = [] account_directory = "%s/%s" % (self._args.device, target_account) print "Reconciling account %s" % target_account if not (os.path.isdir(account_directory) or os.path.islink(account_directory)): print "%s is not a directory" % account_directory return if target_account is None: print "Account '%s' does not exist" % target_account logging.exception("Account '%s' does not exist" % target_account) return swift_api = self.auth_into_swift(keystone_api=keystone_api, target_account=target_account) stats, containers = swift_api.get_account() for val in containers: swift_containers.append(val['name']) good_containers = \ self.clean_account(swift_api, target_account) # FIXME: Can we figure out a better way to deal with accounts with a # ridiculous number of containers? if len(good_containers) > 1000000: print "Account has too many containers, exiting..." return # If we're operating on all containers, spawn more worker processes for cont in good_containers: if pool: pool.apply_async(func=self.reconcile_container, args=(keystone_api, swift_api, target_account, cont)) else: self.reconcile_container(keystone_api, swift_api, target_account, cont) @trace_function def reconcile_container(self, keystone_api, swift_api, target_account, target_container): """ Entry point for worker processes. Makes changes to Swift containers depending on the changes made in HPSS :param LightweightKeystoneAPI keystone_api: Keystone API client :param swiftclient.Connection swift_api: Swift API client :param str target_account: Account name the target container belongs to :param str target_container: Container name that needs reconciling :returns: nothing """ swift_containers = [account['name'] for account in swift_api.get_account()[1]] # Check if container directory exists container_dir = "%s/%s/%s" % (self._args.device, target_account, target_container) if not os.path.isdir(container_dir): print "%s is not a directory" % container_dir logging.error("%s is not a directory" % container_dir) return # Check if container exists or else create it if target_container not in swift_containers \ and target_container is not "": print "Container %s does not exist, creating" % target_container try: swift_api.put_container( target_container, headers={'X-Storage-Policy': self._args.storage_policy_name}) except ClientException as e: print "Putting container %s went wrong" % target_container logging.exception("Putting container %s went wrong" % target_container) raise e print "Reconciling container %s/%s" % \ (target_account, target_container) # Make sure those objects get added into the Swift metadata DBs self.add_objects_from_hpss(swift_api, target_container, container_dir) # Clear out objects that exist only in Swift self.clean_container(swift_api, target_account, target_container) # TODO: make sure we don't clobber permissions already existing! # Set up permissions for the container in Swift afterwards self.configure_permissions(swift_api, keystone_api, target_account, target_container) def _get_object_names(self, container_dir): file_gen = os.walk(container_dir) objects = [] for path, directories, files in file_gen: for file in files: objects.append(os.path.relpath('%s/%s' % (path, file), container_dir)) return objects @trace_function def add_objects_from_hpss(self, swift_api, target_container, container_dir): """ Update object metadata on object creates, and returns a list of all the objects existing in the container from Swift. :param swiftclient.Connection swift_api: Swift API client :param str target_container: Container to add objects to :param str container_dir: Container directory to scan for objects in :returns: List of objects from Swift metadata in container """ # TODO: be smarter about what files we HEAD hpss_objects = self._get_object_names(container_dir) logging.debug('hpss_objects is %s' % str(hpss_objects)) for obj in hpss_objects: print "Adding object %s..." % obj try: swift_api.head_object(target_container, obj) except ClientException as err: fail_reason = "Updating object %s in container %s went wrong"\ % (obj, target_container) print fail_reason raise err # Get list of objects from this container try: container_stats, swift_objects =\ swift_api.get_container(target_container) logging.debug('swift_objects is %s' % str(swift_objects)) except ClientException as err: print "Get on container %s went wrong" % target_container raise err return swift_objects @trace_function def clean_account(self, swift_api, target_account): """ Deletes containers in Swift that do not exist on the HPSS filesystem, and returns a list of the containers that exist in HPSS. :param swiftclient.Connection swift_api: Swift API client :param str target_account: Account of the containers that need cleaning :returns: List of containers existing on HPSS mount point """ # Check containers in HPSS account_directory = "%s/%s" % (self._args.device, target_account) # Check containers in Swift swift_containers = [container['name'] for container in swift_api.get_account()[1]] try: hpss_containers = os.listdir(account_directory) except OSError as err: print "Unable to list files under directory: %s" % \ account_directory raise err # Delete containers that only exist in Swift, but not HPSS ghost_containers = list(set(swift_containers) - set(hpss_containers)) for target_container in ghost_containers: try: for obj in swift_api.get_container(target_container)[1]: try: swift_api.delete_object(target_container, obj['name']) except ClientException as e: if e.http_status == 404: pass swift_api.delete_container(target_container) except ClientException: print "Deleting container '%s' went wrong!" % target_container raise return hpss_containers @trace_function def clean_container(self, swift_api, target_account, target_container): """ Deletes Swift objects that do not exist in HPSS from the container :param swiftclient.Connection swift_api: Swift API client :param str target_account: Account that target container belongs to :param str target_container: Container that needs cleaning :returns: nothing """ container_path = "%s/%s/%s" % (self._args.device, target_account, target_container) hpss_objects = self._get_object_names(container_path) swift_objects = [obj['name'] for obj in swift_api.get_container(target_container)[1]] known_good_objects = [] swift_only_objects = list(set(swift_objects) - set(hpss_objects)) # If we have objects that only exist in the Swift metadata, # delete those objects. for target_obj in swift_only_objects: try: swift_api.delete_object(target_container, target_obj) except ClientException: container_stats, post_delete_dict =\ swift_api.get_container(target_container) for obj in post_delete_dict: known_good_objects.append(obj['name']) if target_obj in known_good_objects: fail_reason =\ "Deleting object %s in container %s went wrong"\ % (target_obj, target_container) logging.error(fail_reason) print fail_reason raise IOError(fail_reason) # TODO: clean up this code @trace_function def configure_permissions(self, swift_api, keystone_api, target_account, target_container): """ Configuring Swift Container ACLs :param swiftclient.Connection swift_api: Swift API client :param LightweightKeystoneAPI keystone_api: Keystone API client :param str target_account: Account that container belongs in :param str target_container: Container name :returns: nothing """ # TODO: figure out how to deal with files that have more restrictive # permissions than the container does path = "%s/%s/%s" % (self._args.device, target_account, target_container) # Obtain unix user permissions for path try: file_user = getpwuid(os.stat(path).st_uid).pw_name mode = os.stat(path).st_mode except KeyError as err: fail_reason = "Cannot find permissions for %s" % path print fail_reason logging.exception(fail_reason) raise err # Check if file user exists in Keystone try: keystone_users = [user['name'] for user in keystone_api.list_users()] except Exception as e: fail_reason = "Couldn't get user list" print fail_reason logging.exception(fail_reason) raise e if file_user not in keystone_users: fail_reason = \ "Cannot configure proper permissions for this path %s\ because user %s does not exist in keystone" % \ (path, file_user) print fail_reason logging.error(fail_reason) raise IOError(fail_reason) # Update container ACLs try: if mode & stat.S_IRUSR: headers = {"X-Read-Container": "%s:%s" % (target_account, file_user)} swift_api.post_container(target_container, headers) if mode & stat.S_IWUSR: headers = {"X-Write-Container": "%s:%s" % (target_account, file_user)} swift_api.post_container(target_container, headers) except requests.ConnectionError as err: print "Cannot configure ACLs due to metadata header issue" logging.exception( "Cannot configure ACLs due to metadata header issue") raise err # This only exists because the keystoneclient library is so massive that it has # to have a lazy-loading mechanism that ensures only one of it can be active, # so we can't have handles to multiple different Keystone scopes simultaneously class LightweightKeystoneAPI(object): MEMBER_ROLE_ID = '9fe2ff9ee4384b1894a90878d3e92bab' def __init__(self, url, username, password, tenant_name, api_version='v2'): self.url = url self.username = username self.password = password self.tenant_name = tenant_name self.auth_token = None self.version = api_version logging.debug('New LightweightKeystoneAPI instance created for %s@%s,' ' id = %s' % (self.username, self.tenant_name, hex(id(self)))) def _get_keystone_response(self, method, url, headers=None, json=None): resp = method(url=url, headers=headers, json=json) logging.debug('Response code: %s' % str(resp.status_code)) logging.debug('Response content: %s' % resp.content) if not (200 <= resp.status_code <= 299): raise IOError(resp.status_code, 'Request was unsuccessful') try: resp_json = resp.json() logging.debug('response json: %s' % resp_json) except JSONDecodeError: logging.error('Malformed response from Keystone') raise IOError('Malformed response from Keystone') return resp.headers, resp_json @trace_function def authenticate(self): if self.auth_token: return True if self.version == 'v2': url = '%s/tokens' % self.url creds = {'username': self.username, 'password': self.password} token_req = {'auth': {'tenantName': self.tenant_name, 'passwordCredentials': creds}} else: url = '%s/auth/tokens' % self.url domain = {'id': 'default'} creds = {'user': {'name': self.username, 'password': self.password, 'domain': domain}} scope = {'project': {'name': self.tenant_name, 'domain': domain}} token_req = {'auth': {'identity': {'methods': ['password'], 'password': creds}, 'scope': scope}} try: resp_headers, resp_json =\ self._get_keystone_response(requests.post, url, None, token_req) except IOError: return False if self.version == 'v2': self.auth_token = resp_json['access']['token']['id'] else: self.auth_token = resp_headers['X-Subject-Token'] logging.debug('New auth token: %s' % self.auth_token) return True @trace_function def list_users(self): headers = {'X-Auth-Token': self.auth_token} resp_headers, resp_json = \ self._get_keystone_response(requests.get, '%s/users' % self.url, headers) return resp_json['users'] @trace_function def list_tenants(self): if self.version == 'v2': url = '%s/tenants' % self.url else: url = '%s/projects' % self.url resp_headers, resp_json = \ self._get_keystone_response(requests.get, url, {'X-Auth-Token': self.auth_token}) if self.version == 'v2': return resp_json['tenants'] else: return resp_json['projects'] @trace_function def list_roles(self): if self.version == 'v2': url = '%s/OS-KSADM/roles' % self.url else: url = '%s/roles' % self.url resp_headers, resp_json =\ self._get_keystone_response(requests.get, url, {'X-Auth-Token': self.auth_token}) return resp_json['roles'] @trace_function def get_role(self, role_name): role_id = self._get_id_for_role_name(role_name) if self.version == 'v2': url = '%s/OS-KSADM/%s' % (self.url, role_id) else: url = '%s/roles/%s' % (self.url, role_id) resp_headers, resp_json = \ self._get_keystone_response(requests.get, url, {'X-Auth-Token': self.auth_token}) return resp_json['role'] @trace_function # TODO: write v3 version of this def get_tenant(self, target_tenant): url = '%s/tenants?name=%s' % (self.url, target_tenant) headers = {'X-Auth-Token': self.auth_token, 'name': target_tenant} logging.debug('url: %s' % url) logging.debug('headers: %s' % headers) resp_headers, resp_json = self._get_keystone_response(requests.get, url, headers) return resp_json['tenant'] @trace_function def _get_id_for_user_name(self, user_name): users = [user['id'] for user in self.list_users() if user['username'] == user_name] if len(users) == 0: raise KeyError('%s is not a user' % user_name) else: return users[0] @trace_function def _get_id_for_tenant_name(self, tenant_name): tenants = [tenant['id'] for tenant in self.list_tenants() if tenant['name'] == tenant_name] if len(tenants) == 0: raise KeyError('%s is not a tenant' % tenant_name) else: return tenants[0] @trace_function def _get_id_for_role_name(self, role_name): roles = [role['id'] for role in self.list_roles() if role['name'] == role_name] if len(roles) == 0: raise KeyError('%s is not a role' % role_name) else: return roles[0] @trace_function def grant_role_on_tenant(self, target_project, target_user, target_role): try: tenant_id = self._get_id_for_tenant_name(target_project) user_id = self._get_id_for_user_name(target_user) role_id = self._get_id_for_role_name(target_role) except KeyError as e: print "Could not grant role %s to %s@%s (reason: %s)" % \ (target_role, target_user, target_project, e.message) raise if self.version == 'v2': url = '%s/tenants/%s/users/%s/roles/OS-KSADM/%s' %\ (self.url, tenant_id, user_id, role_id) else: url = '%s/projects/%s/users/%s/roles/%s' %\ (self.url, tenant_id, user_id, role_id) try: self._get_keystone_response(requests.put, url, {'X-Auth-Token': self.auth_token}) except IOError as e: if e.errno == 409: # We must've already granted ourselves this role. Carry on. pass else: raise if __name__ == "__main__": _args = check_usage() if os.getuid() != 0: print 'swiftonhpss-nstool must be run as root' sys.exit(1) # Initiating Log File if _args.verbose: log_level = logging.DEBUG else: log_level = logging.ERROR logging.basicConfig(filename=_args.logfile, level=log_level) main(_args)