launch: refactor to work

This was never consistently showing the host key and sshfp records
upon launch.

Upon digging, a number of things are going wrong.

The socket.create_connection() check isn't waiting for the host to be
up properly.  This means the keyscans were not working, and we'd get
blank return values [1].  We have a ssh_connect() routine, rework it
to use that to probe.  We add a close method to the sshclient so we
can shut it down too.

I don't know why the inventory output was in dns.py, as it's not
really DNS.  Move it to the main launch_node.py, and simplify it by
using f-strings.  While we're here, deliminate the output a bit more
and make white-space more consistent.

This allows us to simplify dns.py and make it so it handles multiple
domains.

Since we're actually waiting for ssh to be up now, the keyscan works
better and this outputs the information we want.  A sample of this is

  https://paste.opendev.org/show/b1MjiTvYr4E03GTeP56w/

[1] ssh-keyscan has a very short timeout, and just returns blank if it
    doesn't get a response to it's probes.  We weren't checking its
    return code.

Change-Id: I06995027a4b80133bdac91c263d7a92fd495493b
This commit is contained in:
Ian Wienand 2023-04-13 15:28:13 +10:00
parent 4a101da52a
commit 03de935048
No known key found for this signature in database
4 changed files with 84 additions and 104 deletions

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Launch a new OpenStack project infrastructure node. # Output DNS for a new host
# Copyright (C) 2013 OpenStack Foundation # Copyright (C) 2013 OpenStack Foundation
# Copyright (C) 2023 Red Hat, Inc.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -19,50 +20,53 @@
# limitations under the License. # limitations under the License.
import argparse import argparse
from .sshfp import sshfp_print_records import subprocess
from .ssh_knownhosts import generate_known_hosts
def print_sshfp_records(hostname, ip):
'''Given a hostname and and IP address, scan the IP address (hostname
not in dns yet) and return a bind string with sshfp records'''
p = ['ssh-keyscan', '-D', ip]
s = subprocess.run(p,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE).stdout.decode('utf-8')
fingerprints = []
for line in s.split('\n'):
if not line:
continue
_, _, _, algo, key_type, fingerprint = line.split(' ')
# ssh-keygen on the host seems to return DSS/DSA keys, which
# aren't valid to log in and not shown by ssh-keyscan -D
# ... prune it.
if algo == '2':
continue
fingerprints.append(
(algo, key_type, fingerprint))
# sort by algo and key_type to keep it consistent
fingerprints = sorted(fingerprints,
key=lambda x: (x[0], x[1]))
dns_hostname = hostname.split('.')[0]
for f in fingerprints:
print(f"{dns_hostname}\t\t\tIN\tSSHFP\t{f[0]} {f[1]} {f[2]}")
def print_dns(cloud, server): def print_dns(cloud, server):
ip4 = server.public_v4 ip4 = server.public_v4
ip6 = server.public_v6 ip6 = server.public_v6
# note handle things like mirror.iad.rax.opendev.org
domain = '.'.join(server.name.split('.')[-2:])
host = '.'.join(server.name.split('.')[0:-2])
if server.name.endswith('opendev.org'): # openstack.org is the only domain we deal with that is still in
print_dns_opendev(server.name.rsplit('.', 2)[0], ip4, ip6) # RAX DNS
if (domain == "openstack.org"):
print("Add the following manually to openstack.org domain in RAX\n")
else: else:
print("Login to manage.rackspace.com and setup DNS manually.") print(f"Put the following into zone-{domain}:zones/{domain}/zone.db\n")
print(f"{host} IN A {ip4}")
print_inventory_yaml(server, ip4, ip6)
def print_dns_opendev(name, ip4, ip6):
print("\n")
print("Put the following into zone-opendev.org:zones/opendev.org/zone.db")
print("{name} IN A {ip4}".format(name=name, ip4=ip4))
if ip6: if ip6:
print("{name} IN AAAA {ip6}".format(name=name, ip6=ip6)) print(f"{host} IN AAAA {ip6}")
sshfp_print_records(name, ip4) print_sshfp_records(server.name, ip4)
def print_inventory_yaml(server, ip4, ip6):
known_hosts = generate_known_hosts(ip4)
print("\n")
print("Put the following into system-config:inventory/base/hosts.yaml")
print("\n")
print(" {name}:".format(name=server.name))
print(" ansible_host: {ip}".format(ip=ip4 or ip6))
print(" location:")
print(" cloud: {cloud}".format(cloud=server.location['cloud']))
print(" region_name: {region_name}".format(
region_name=server.location['region_name']))
print(" public_v4: {ip4}".format(ip4=ip4))
if ip6:
print(" public_v6: {ip6}".format(ip6=ip6))
print(" host_keys:")
for (key, fingerprint) in known_hosts:
print(" - '%s %s'" % (key, fingerprint))
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()

View File

@ -32,6 +32,7 @@ import traceback
from . import dns from . import dns
from . import rax_rdns from . import rax_rdns
from . import utils from . import utils
from .ssh_knownhosts import generate_known_hosts
import openstack import openstack
import paramiko import paramiko
@ -222,6 +223,7 @@ def bootstrap_server(server, key, name, volume_device, keep,
try: try:
ssh_client.ssh("reboot") ssh_client.ssh("reboot")
ssh_client.close()
except Exception as e: except Exception as e:
# Some init system kill the connection too fast after reboot. # Some init system kill the connection too fast after reboot.
# Deal with it by ignoring ssh errors when rebooting. # Deal with it by ignoring ssh errors when rebooting.
@ -230,19 +232,14 @@ def bootstrap_server(server, key, name, volume_device, keep,
else: else:
raise raise
timeout = 120 # Wait a bit and make sure we can ssh back in
start = time.perf_counter() print("Waiting 30 seconds for reboot")
while True: time.sleep(30)
try: ssh_client = utils.ssh_connect(ip, 'root', ssh_kwargs, timeout=90)
print("Waiting for ssh...") if not ssh_client:
with socket.create_connection((host_ip, 22), timeout=5): raise Exception("Failed to log into host")
break ssh_client.close()
except OSError as e: print("Host alive")
if time.perf_counter() - start >= timeout:
raise Exception("Host did not reboot within timeout")
time.sleep(10)
print("Host rebooted!")
def build_server(cloud, name, image, flavor, def build_server(cloud, name, image, flavor,
volume, keep, network, boot_from_volume, config_drive, volume, keep, network, boot_from_volume, config_drive,
@ -330,6 +327,27 @@ def build_server(cloud, name, image, flavor,
return server return server
def print_inventory_yaml(server):
ip4 = server.public_v4
ip6 = server.public_v6
cloud = server.location['cloud']
region = server.location['region_name']
known_hosts = generate_known_hosts(ip4)
print(f"Put the following into system-config:inventory/base/hosts.yaml")
print()
print(f" {server.name}:")
print(f" ansible_host: {ip4}")
print(f" location:")
print(f" cloud: {cloud}")
print(f" region_name: {region}")
print(f" public_v4: {ip4}")
if ip6:
print(f" public_v6: {ip6}")
print(f" host_keys:")
for (key, fingerprint) in known_hosts:
print(f" - {key} {fingerprint}")
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
@ -432,9 +450,18 @@ def main():
options.timeout, options.ignore_ipv6, options.timeout, options.ignore_ipv6,
options.playbooks) options.playbooks)
if 'rax' in cloud.config.name: if 'rax' in cloud.config.name:
print("Setting reverse DNS for RAX")
rax_rdns.set_rax_reverse_dns(cloud, server, rax_rdns.set_rax_reverse_dns(cloud, server,
server.public_v4, server.public_v6) server.public_v4, server.public_v6)
print()
print("-------- CONFIGURATION --------\n")
dns.print_dns(cloud, server) dns.print_dns(cloud, server)
print()
print_inventory_yaml(server)
print()
print("-------------------------------")
print()
print("If this is a server that is expected to send email (ask, review,") print("If this is a server that is expected to send email (ask, review,")
print("lists, etc) double check that the server's IPs are not listed on") print("lists, etc) double check that the server's IPs are not listed on")
print("the spamhaus pbl.\n") print("the spamhaus pbl.\n")

View File

@ -67,3 +67,6 @@ class SSHClient(object):
f = ftp.open(path, mode) f = ftp.open(path, mode)
yield f yield f
ftp.close() ftp.close()
def close(self):
self.client.close()

View File

@ -1,54 +0,0 @@
#!/usr/bin/env python3
import argparse
import subprocess
def generate_sshfp_records(hostname, ip):
'''Given a hostname and and IP address, scan the IP address (hostname
not in dns yet) and return a bind string with sshfp records'''
p = ['ssh-keyscan', '-D', ip]
s = subprocess.run(p,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE).stdout.decode('utf-8')
fingerprints = []
for line in s.split('\n'):
if not line:
continue
_, _, _, algo, key_type, fingerprint = line.split(' ')
# ssh-keygen on the host seems to return DSS/DSA keys, which
# aren't valid to log in and not shown by ssh-keyscan -D
# ... prune it.
if algo == '2':
continue
fingerprints.append(
(algo, key_type, fingerprint))
# sort by algo and key_type to keep it consistent
fingerprints = sorted(fingerprints,
key=lambda x: (x[0], x[1]))
ret = ''
first = True
dns_hostname = hostname.split('.')[0]
for f in fingerprints:
ret += '%s%s\t\tIN\tSSHFP\t%s %s %s' % \
("\n" if not first else '', dns_hostname, f[0], f[1], f[2])
first = False
return ret
def sshfp_print_records(hostname, ip):
print(generate_sshfp_records(hostname, ip))
def main():
parser = argparse.ArgumentParser()
parser.add_argument("hostname", help="hostname")
parser.add_argument("ip", help="address to scan")
args = parser.parse_args()
sshfp_print_records(args.hostname, args.ip)
if __name__ == '__main__':
main()