From 03de935048407492ae45942f80665973d1d8fc1b Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Thu, 13 Apr 2023 15:28:13 +1000 Subject: [PATCH] launch: refactor to work This was never consistently showing the host key and sshfp records upon launch. Upon digging, a number of things are going wrong. The socket.create_connection() check isn't waiting for the host to be up properly. This means the keyscans were not working, and we'd get blank return values [1]. We have a ssh_connect() routine, rework it to use that to probe. We add a close method to the sshclient so we can shut it down too. I don't know why the inventory output was in dns.py, as it's not really DNS. Move it to the main launch_node.py, and simplify it by using f-strings. While we're here, deliminate the output a bit more and make white-space more consistent. This allows us to simplify dns.py and make it so it handles multiple domains. Since we're actually waiting for ssh to be up now, the keyscan works better and this outputs the information we want. A sample of this is https://paste.opendev.org/show/b1MjiTvYr4E03GTeP56w/ [1] ssh-keyscan has a very short timeout, and just returns blank if it doesn't get a response to it's probes. We weren't checking its return code. Change-Id: I06995027a4b80133bdac91c263d7a92fd495493b --- launch/src/opendev_launch/dns.py | 78 +++++++++++++----------- launch/src/opendev_launch/launch_node.py | 53 ++++++++++++---- launch/src/opendev_launch/sshclient.py | 3 + launch/src/opendev_launch/sshfp.py | 54 ---------------- 4 files changed, 84 insertions(+), 104 deletions(-) delete mode 100755 launch/src/opendev_launch/sshfp.py diff --git a/launch/src/opendev_launch/dns.py b/launch/src/opendev_launch/dns.py index 453ecb934b..4befe8357a 100755 --- a/launch/src/opendev_launch/dns.py +++ b/launch/src/opendev_launch/dns.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 -# Launch a new OpenStack project infrastructure node. +# Output DNS for a new host # Copyright (C) 2013 OpenStack Foundation +# Copyright (C) 2023 Red Hat, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,50 +20,53 @@ # limitations under the License. import argparse -from .sshfp import sshfp_print_records -from .ssh_knownhosts import generate_known_hosts +import subprocess +def print_sshfp_records(hostname, ip): + '''Given a hostname and and IP address, scan the IP address (hostname + not in dns yet) and return a bind string with sshfp records''' + p = ['ssh-keyscan', '-D', ip] + s = subprocess.run(p, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE).stdout.decode('utf-8') + fingerprints = [] + for line in s.split('\n'): + if not line: + continue + _, _, _, algo, key_type, fingerprint = line.split(' ') + # ssh-keygen on the host seems to return DSS/DSA keys, which + # aren't valid to log in and not shown by ssh-keyscan -D + # ... prune it. + if algo == '2': + continue + fingerprints.append( + (algo, key_type, fingerprint)) + + # sort by algo and key_type to keep it consistent + fingerprints = sorted(fingerprints, + key=lambda x: (x[0], x[1])) + + dns_hostname = hostname.split('.')[0] + for f in fingerprints: + print(f"{dns_hostname}\t\t\tIN\tSSHFP\t{f[0]} {f[1]} {f[2]}") def print_dns(cloud, server): ip4 = server.public_v4 ip6 = server.public_v6 + # note handle things like mirror.iad.rax.opendev.org + domain = '.'.join(server.name.split('.')[-2:]) + host = '.'.join(server.name.split('.')[0:-2]) - if server.name.endswith('opendev.org'): - print_dns_opendev(server.name.rsplit('.', 2)[0], ip4, ip6) + # openstack.org is the only domain we deal with that is still in + # RAX DNS + if (domain == "openstack.org"): + print("Add the following manually to openstack.org domain in RAX\n") else: - print("Login to manage.rackspace.com and setup DNS manually.") - - print_inventory_yaml(server, ip4, ip6) - - -def print_dns_opendev(name, ip4, ip6): - print("\n") - print("Put the following into zone-opendev.org:zones/opendev.org/zone.db") - print("{name} IN A {ip4}".format(name=name, ip4=ip4)) + print(f"Put the following into zone-{domain}:zones/{domain}/zone.db\n") + print(f"{host} IN A {ip4}") if ip6: - print("{name} IN AAAA {ip6}".format(name=name, ip6=ip6)) - sshfp_print_records(name, ip4) - - -def print_inventory_yaml(server, ip4, ip6): - known_hosts = generate_known_hosts(ip4) - - print("\n") - print("Put the following into system-config:inventory/base/hosts.yaml") - print("\n") - print(" {name}:".format(name=server.name)) - print(" ansible_host: {ip}".format(ip=ip4 or ip6)) - print(" location:") - print(" cloud: {cloud}".format(cloud=server.location['cloud'])) - print(" region_name: {region_name}".format( - region_name=server.location['region_name'])) - print(" public_v4: {ip4}".format(ip4=ip4)) - if ip6: - print(" public_v6: {ip6}".format(ip6=ip6)) - print(" host_keys:") - for (key, fingerprint) in known_hosts: - print(" - '%s %s'" % (key, fingerprint)) - + print(f"{host} IN AAAA {ip6}") + print_sshfp_records(server.name, ip4) def main(): parser = argparse.ArgumentParser() diff --git a/launch/src/opendev_launch/launch_node.py b/launch/src/opendev_launch/launch_node.py index a411294520..5d90a69c8d 100755 --- a/launch/src/opendev_launch/launch_node.py +++ b/launch/src/opendev_launch/launch_node.py @@ -32,6 +32,7 @@ import traceback from . import dns from . import rax_rdns from . import utils +from .ssh_knownhosts import generate_known_hosts import openstack import paramiko @@ -222,6 +223,7 @@ def bootstrap_server(server, key, name, volume_device, keep, try: ssh_client.ssh("reboot") + ssh_client.close() except Exception as e: # Some init system kill the connection too fast after reboot. # Deal with it by ignoring ssh errors when rebooting. @@ -230,19 +232,14 @@ def bootstrap_server(server, key, name, volume_device, keep, else: raise - timeout = 120 - start = time.perf_counter() - while True: - try: - print("Waiting for ssh...") - with socket.create_connection((host_ip, 22), timeout=5): - break - except OSError as e: - if time.perf_counter() - start >= timeout: - raise Exception("Host did not reboot within timeout") - time.sleep(10) - print("Host rebooted!") - + # Wait a bit and make sure we can ssh back in + print("Waiting 30 seconds for reboot") + time.sleep(30) + ssh_client = utils.ssh_connect(ip, 'root', ssh_kwargs, timeout=90) + if not ssh_client: + raise Exception("Failed to log into host") + ssh_client.close() + print("Host alive") def build_server(cloud, name, image, flavor, volume, keep, network, boot_from_volume, config_drive, @@ -330,6 +327,27 @@ def build_server(cloud, name, image, flavor, return server +def print_inventory_yaml(server): + ip4 = server.public_v4 + ip6 = server.public_v6 + cloud = server.location['cloud'] + region = server.location['region_name'] + + known_hosts = generate_known_hosts(ip4) + + print(f"Put the following into system-config:inventory/base/hosts.yaml") + print() + print(f" {server.name}:") + print(f" ansible_host: {ip4}") + print(f" location:") + print(f" cloud: {cloud}") + print(f" region_name: {region}") + print(f" public_v4: {ip4}") + if ip6: + print(f" public_v6: {ip6}") + print(f" host_keys:") + for (key, fingerprint) in known_hosts: + print(f" - {key} {fingerprint}") def main(): parser = argparse.ArgumentParser() @@ -432,9 +450,18 @@ def main(): options.timeout, options.ignore_ipv6, options.playbooks) if 'rax' in cloud.config.name: + print("Setting reverse DNS for RAX") rax_rdns.set_rax_reverse_dns(cloud, server, server.public_v4, server.public_v6) + + print() + print("-------- CONFIGURATION --------\n") dns.print_dns(cloud, server) + print() + print_inventory_yaml(server) + print() + print("-------------------------------") + print() print("If this is a server that is expected to send email (ask, review,") print("lists, etc) double check that the server's IPs are not listed on") print("the spamhaus pbl.\n") diff --git a/launch/src/opendev_launch/sshclient.py b/launch/src/opendev_launch/sshclient.py index 8ff86ca315..2af58e7ec6 100644 --- a/launch/src/opendev_launch/sshclient.py +++ b/launch/src/opendev_launch/sshclient.py @@ -67,3 +67,6 @@ class SSHClient(object): f = ftp.open(path, mode) yield f ftp.close() + + def close(self): + self.client.close() diff --git a/launch/src/opendev_launch/sshfp.py b/launch/src/opendev_launch/sshfp.py deleted file mode 100755 index 0e2aeba795..0000000000 --- a/launch/src/opendev_launch/sshfp.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import subprocess - -def generate_sshfp_records(hostname, ip): - '''Given a hostname and and IP address, scan the IP address (hostname - not in dns yet) and return a bind string with sshfp records''' - - p = ['ssh-keyscan', '-D', ip] - s = subprocess.run(p, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE).stdout.decode('utf-8') - fingerprints = [] - for line in s.split('\n'): - if not line: - continue - _, _, _, algo, key_type, fingerprint = line.split(' ') - # ssh-keygen on the host seems to return DSS/DSA keys, which - # aren't valid to log in and not shown by ssh-keyscan -D - # ... prune it. - if algo == '2': - continue - fingerprints.append( - (algo, key_type, fingerprint)) - - # sort by algo and key_type to keep it consistent - fingerprints = sorted(fingerprints, - key=lambda x: (x[0], x[1])) - - ret = '' - first = True - dns_hostname = hostname.split('.')[0] - for f in fingerprints: - ret += '%s%s\t\tIN\tSSHFP\t%s %s %s' % \ - ("\n" if not first else '', dns_hostname, f[0], f[1], f[2]) - first = False - return ret - - -def sshfp_print_records(hostname, ip): - print(generate_sshfp_records(hostname, ip)) - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("hostname", help="hostname") - parser.add_argument("ip", help="address to scan") - args = parser.parse_args() - - sshfp_print_records(args.hostname, args.ip) - -if __name__ == '__main__': - main()