diff --git a/.zuul.yaml b/.zuul.yaml index 5000ada1fe..0b6b6e0749 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -439,6 +439,34 @@ - playbooks/group_vars/eavesdrop.yaml - testinfra/test_eavesdrop.py + +- job: + name: system-config-run-letsencrypt + parent: system-config-run + description: | + Run the playbook for letsencrypt key acquisition + nodeset: + nodes: + - name: bridge.openstack.org + label: ubuntu-bionic + - name: adns-letsencrypt.opendev.org + label: ubuntu-bionic + - name: letsencrypt01.opendev.org + label: ubuntu-bionic + - name: letsencrypt02.opendev.org + label: ubuntu-bionic + host-vars: + letsencrypt01.opendev.org: + host_copy_output: + '/var/log/acme.sh': logs + letsencrypt02.opendev.org: + host_copy_output: + '/var/log/acme.sh': logs + files: + - .zuul.yaml + - playbooks/group_vars/letsencrypt.yaml + - playbooks/roles/letsencrypt.* + - job: name: system-config-run-nodepool parent: system-config-run @@ -647,6 +675,7 @@ - name: system-config-build-image-gitea soft: true - system-config-run-zuul-preview + - system-config-run-letsencrypt - system-config-build-image-jinja-init - system-config-build-image-gitea-init - system-config-build-image-gitea @@ -673,6 +702,7 @@ - name: system-config-upload-image-gitea soft: true - system-config-run-zuul-preview + - system-config-run-letsencrypt - system-config-upload-image-jinja-init - system-config-upload-image-gitea-init - system-config-upload-image-gitea diff --git a/inventory/groups.yaml b/inventory/groups.yaml index 43610aefc0..394dd653ff 100644 --- a/inventory/groups.yaml +++ b/inventory/groups.yaml @@ -73,6 +73,8 @@ groups: - kdc[0-9]*.open*.org kubernetes: - opendev-k8s*.opendev.org +# letsencrypt: +# - TBD logstash: - logstash[0-9]*.open*.org logstash-worker: diff --git a/playbooks/base.yaml b/playbooks/base.yaml index 84b113c55b..5d66aeb601 100644 --- a/playbooks/base.yaml +++ b/playbooks/base.yaml @@ -91,3 +91,20 @@ roles: - install-docker - zuul-preview + +# This next section needs to happen in order. letsencrypt hosts +# export their TXT authentication records which is installed onto +# adns1, and then the hosts verify to issue/renew keys +- hosts: "letsencrypt:!disabled" + name: "Base: deploy and renew certificates" + roles: + - letsencrypt-acme-sh-install + - letsencrypt-request-certs +- hosts: "adns:!disabled" + name: "Install txt records" + roles: + - letsencrypt-install-txt-record +- hosts: "letsencrypt:!disabled" + name: "Create certs" + roles: + - letsencrypt-create-certs diff --git a/playbooks/roles/letsencrypt-acme-sh-install/README.rst b/playbooks/roles/letsencrypt-acme-sh-install/README.rst new file mode 100644 index 0000000000..76595607a0 --- /dev/null +++ b/playbooks/roles/letsencrypt-acme-sh-install/README.rst @@ -0,0 +1,9 @@ +Install acme.sh client + +This makes the `acme.sh `__ +client available on the host. + +Additionally a ``driver.sh`` script is installed to run the +authentication procedure and parse output. + +**Role Variables** diff --git a/playbooks/roles/letsencrypt-acme-sh-install/files/driver.sh b/playbooks/roles/letsencrypt-acme-sh-install/files/driver.sh new file mode 100644 index 0000000000..0b84f2ac4d --- /dev/null +++ b/playbooks/roles/letsencrypt-acme-sh-install/files/driver.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +ACME_SH=${ACME_SH:-/opt/acme.sh/acme.sh} +CERT_HOME=${CERT_HOME:-/etc/letsencrypt-certs} +CHALLENGE_ALIAS_DOMAIN=${CHALLENGE_ALIAS_DOMAIN:-acme.opendev.org.} +# Set to !0 to use letsencrypt staging rather than production requests +LETSENCRYPT_STAGING=${LETSENCRYPT_STAGING:-0} +LOG_FILE=${LOG_FILE:-/var/log/acme.sh/acme.sh.log} + +STAGING="" +if [[ ${LETSENCRYPT_STAGING} != 0 ]]; then + STAGING="--staging" +fi + +echo -e "\n--- start --- ${1} --- $(date -u '+%Y-%m-%dT%k:%M:%S%z') ---" >> ${LOG_FILE} + +if [[ ${1} == "issue" ]]; then + # Take output like: + # [Thu Feb 14 13:44:37 AEDT 2019] Domain: '_acme-challenge.test.opendev.org' + # [Thu Feb 14 13:44:37 AEDT 2019] TXT value: 'QjkChGcuqD7rl0jN8FNWkWNAISX1Zry_vE-9RxWF2pE' + # + # and turn it into: + # + # _acme-challenge.test.opendev.org:QjkChGcuqD7rl0jN8FNWkWNAISX1Zry_vE-9RxWF2pE + # + # Ansible then parses this back to a dict. + shift; + for arg in "$@"; do + $ACME_SH ${STAGING} \ + --cert-home ${CERT_HOME} \ + --no-color \ + --yes-I-know-dns-manual-mode-enough-go-ahead-please \ + --issue \ + --dns \ + --challenge-alias ${CHALLENGE_ALIAS_DOMAIN} \ + $arg 2>&1 | tee -a ${LOG_FILE} | \ + egrep 'Domain:|TXT value:' | cut -d"'" -f2 | paste -d':' - - + # shell magic ^ is + # - extract everything between ' ' + # - stick every two lines together, separated by a : + done +elif [[ ${1} == "renew" ]]; then + shift; + for arg in "$@"; do + $ACME_SH ${STAGING} \ + --cert-home ${CERT_HOME} \ + --no-color \ + --yes-I-know-dns-manual-mode-enough-go-ahead-please \ + --renew \ + $arg 2>&1 | tee -a ${LOG_FILE} + done +elif [[ ${1} == "selfsign" ]]; then + # For testing, simulate the key generation + shift; + for arg in "$@"; do + # TODO(ianw): Set SAN names from the other "-d" arguments?; + # it's a pita to parse. + { + read -r -a domain_array <<< "$arg" + domain=${domain_array[1]} + mkdir -p ${CERT_HOME}/${domain} + cd ${CERT_HOME}/${domain} + echo "Creating certs in ${CERT_HOME}/${domain}" + openssl genrsa -out ${domain}.key 2048 + openssl rsa -in ${domain}.key -out ${domain}.key + openssl req -sha256 -new -key ${domain}.key -out ${domain}.csr -subj '/CN=localhost' + openssl x509 -req -sha256 -days 365 -in ${domain}.csr -signkey ${domain}.key -out ${domain}.cer + cp ${domain}.cer fullchain.cer + } | tee -a ${LOG_FILE} + done +else + echo "Unknown driver arg: $1" + exit 1 +fi + +echo "--- end --- $(date -u '+%Y-%m-%dT%k:%M:%S%z') ---" >> ${LOG_FILE} diff --git a/playbooks/roles/letsencrypt-acme-sh-install/tasks/main.yaml b/playbooks/roles/letsencrypt-acme-sh-install/tasks/main.yaml new file mode 100644 index 0000000000..cc4ef4ccf1 --- /dev/null +++ b/playbooks/roles/letsencrypt-acme-sh-install/tasks/main.yaml @@ -0,0 +1,23 @@ +- name: Install acme.sh client + git: + repo: https://github.com/Neilpang/acme.sh + dest: /opt/acme.sh + version: dev + +- name: Install driver script + copy: + src: driver.sh + dest: /opt/acme.sh/driver.sh + mode: 0755 + +- name: Setup log directory + file: + path: /var/log/acme.sh + state: directory + mode: 0755 + +- name: Setup log rotation + include_role: + name: logrotate + vars: + logrotate_file_name: /var/log/acme.sh/acme.sh.log \ No newline at end of file diff --git a/playbooks/roles/letsencrypt-create-certs/README.rst b/playbooks/roles/letsencrypt-create-certs/README.rst new file mode 100644 index 0000000000..d121aa8d48 --- /dev/null +++ b/playbooks/roles/letsencrypt-create-certs/README.rst @@ -0,0 +1,19 @@ +Generate letsencrypt certificates + +This must run after the ``letsencrypt-install-acme-sh``, +``letsencrypt-request-certs`` and ``letsencrypt-install-txt-records`` +roles. It will run the ``acme.sh`` process to create the certificates +on the host. + +**Role Variables** + +.. zuul:rolevar:: letsencrypt_test_only + + If set to True, will locally generate self-signed certificates in + the same locations the real script would, instead of contacting + letsencrypt. This is set during gate testing as the + authentication tokens are not available. + +.. zuul:rolevar:: letsencrypt_certs + + The same variable as described in ``letsencrypt-request-certs``. diff --git a/playbooks/roles/letsencrypt-create-certs/defaults/main.yaml b/playbooks/roles/letsencrypt-create-certs/defaults/main.yaml new file mode 100644 index 0000000000..79476abe0a --- /dev/null +++ b/playbooks/roles/letsencrypt-create-certs/defaults/main.yaml @@ -0,0 +1 @@ +letsencrypt_test_only: False diff --git a/playbooks/roles/letsencrypt-create-certs/tasks/acme.yaml b/playbooks/roles/letsencrypt-create-certs/tasks/acme.yaml new file mode 100644 index 0000000000..5ee9887ead --- /dev/null +++ b/playbooks/roles/letsencrypt-create-certs/tasks/acme.yaml @@ -0,0 +1,16 @@ +- name: 'Build arguments for letsencrypt acme.sh driver for: {{ item.key }}' + set_fact: + acme_args: '"{% for domain in item.value %}-d {{ domain }} {% endfor %}"' + +- name: 'Run acme.sh driver for {{ item.key }} certificate issue' + shell: + cmd: | + /opt/acme.sh/driver.sh {{ 'selfsign' if letsencrypt_test_only else 'renew' }} {{ acme_args }} + args: + chdir: /opt/acme.sh/ + register: acme_output + +- debug: + var: acme_output.stdout_lines + +# Keys generated! \ No newline at end of file diff --git a/playbooks/roles/letsencrypt-create-certs/tasks/main.yaml b/playbooks/roles/letsencrypt-create-certs/tasks/main.yaml new file mode 100644 index 0000000000..449e2f09c7 --- /dev/null +++ b/playbooks/roles/letsencrypt-create-certs/tasks/main.yaml @@ -0,0 +1,13 @@ +# NOTE(ianw): this var set for the host by the +# letsencrypt-request-certs role; running this when empty would be a +# no-op but we might as well skip it if we know this host hasn't +# requested anything to actually create/renew. +- name: Check for prerun state + fail: + msg: "acme_txt_required is not defined; was letsencrypt-request-certs run?" + when: acme_txt_required is not defined + +- name: Include ACME renewal + include_tasks: acme.yaml + loop: "{{ query('dict', letsencrypt_certs) }}" + when: acme_txt_required | length > 0 diff --git a/playbooks/roles/letsencrypt-install-txt-record/README.rst b/playbooks/roles/letsencrypt-install-txt-record/README.rst new file mode 100644 index 0000000000..186fa65e3e --- /dev/null +++ b/playbooks/roles/letsencrypt-install-txt-record/README.rst @@ -0,0 +1,19 @@ +Install authentication records for letsencrypt + +Install TXT records to the ``acme.opendev.org`` domain. This role +runs only the adns server, and assumes ownership of the +``/var/lib/bind/zones/acme.opendev.org/zone.db`` file. After +installation the nameserver is refreshed. + +After this, ``letsencrypt-create-certs`` can run on each host to +provision the certificates. + +**Role Variables** + +.. zuul:rolevar:: acme_txt_required + + A global dictionary of TXT records to be installed. This is + generated in a prior step on each host by the + ``letsencrypt-request-certs`` role. + + diff --git a/playbooks/roles/letsencrypt-install-txt-record/tasks/main.yaml b/playbooks/roles/letsencrypt-install-txt-record/tasks/main.yaml new file mode 100644 index 0000000000..db77d5dd8d --- /dev/null +++ b/playbooks/roles/letsencrypt-install-txt-record/tasks/main.yaml @@ -0,0 +1,35 @@ +- name: Make key list + set_fact: + acme_txt_keys: [] + +- name: Build key list + set_fact: + acme_txt_keys: '{{ acme_txt_keys }} + {{ hostvars[item]["acme_txt_required"] }}' + with_inventory_hostnames: letsencrypt + +- name: Final list + debug: + var: acme_txt_keys + +# NOTE(ianw): Most of the time, we won't have anything to actually do +# as we don't have new keys or renewals due. +- name: Deploy TXT records + block: + - name: Deploy new zone.db + template: + src: zone.db.j2 + dest: /var/lib/bind/zones/acme.opendev.org/zone.db + + - name: debug new file + slurp: + src: '/var/lib/bind/zones/acme.opendev.org/zone.db' + register: bind_zone_result + - debug: + msg: "{{ bind_zone_result['content'] | b64decode }}" + + - name: Ensure domain is valid + shell: named-checkzone acme.opendev.org /var/lib/bind/zones/acme.opendev.org/zone.db + + - name: Reload domain + shell: rndc reload acme.opendev.org + when: acme_txt_keys | length > 0 \ No newline at end of file diff --git a/playbooks/roles/letsencrypt-install-txt-record/templates/zone.db.j2 b/playbooks/roles/letsencrypt-install-txt-record/templates/zone.db.j2 new file mode 100644 index 0000000000..a888051fda --- /dev/null +++ b/playbooks/roles/letsencrypt-install-txt-record/templates/zone.db.j2 @@ -0,0 +1,17 @@ +; -*- mode: zone -*- +$ORIGIN acme.opendev.org. +$TTL 1m +@ IN SOA adns1.opendev.org. hostmaster.opendev.org. ( + {{ ansible_date_time.epoch }} ; serial number unixtime + 1h ; refresh (secondary checks for updates) + 10m ; retry (secondary retries failed axfr) + 10d ; expire (secondary ends serving old data) + 5m ) ; min ttl (cache time for failed lookups) +@ IN NS ns1.opendev.org. +@ IN NS ns2.opendev.org. + +; NOTE: DO NOT HAND EDIT. THESE KEYS ARE MANAGED BY ANSIBLE + +{% for key in acme_txt_keys %} +@ IN TXT "{{key[1]}}" +{% endfor %} diff --git a/playbooks/roles/letsencrypt-request-certs/README.rst b/playbooks/roles/letsencrypt-request-certs/README.rst new file mode 100644 index 0000000000..181f2e092e --- /dev/null +++ b/playbooks/roles/letsencrypt-request-certs/README.rst @@ -0,0 +1,53 @@ +Request certificates from letsencrypt + +The role requests certificates (or renews expiring certificates, which +is fundamentally the same thing) from letsencrypt for a host. This +requires the ``acme.sh`` tool and driver which should have been +installed by the ``letsencrypt-acme-sh-install`` role. + +This role does not create the certificates. It will request the +certificates from letsencrypt and populate the authentication data +into the ``acme_txt_required`` variable. These values need to be +installed and activated on the DNS server by the +``letsencrypt-install-txt-record`` role; the +``letsencrypt-create-certs`` will then finish the certificate +provision process. + +**Role Variables** + +.. zuul:rolevar:: letsencrypt_test_only + + Uses staging, rather than prodcution requests to letsencrypt + +.. zuul:rolevar:: letsencrypt_certs + + A host wanting a certificate should define a dictionary variable + ``letsencyrpt_certs``. Each key in this dictionary is a separate + certificate to create (i.e. a host can create multiple separate + certificates). Each key should have a list of hostnames valid for + that certificate. The certificate will be named for the *first* + entry. + + For example: + + .. code-block:: yaml + + letsencrypt_certs: + main: + - hostname01.opendev.org + - hostname.opendev.org + secondary: + - foo.opendev.org + + will ultimately result in two certificates being provisioned on the + host in ``/etc/letsencrypt-certs/hostname01.opendev.org`` and + ``/etc/letsencrypt-certs/foo.opendev.org``. + + Note that each entry will require a ``CNAME`` pointing the ACME + challenge domain to the TXT record that will be created in the + signing domain. For example above, the following records would need + to be pre-created:: + + _acme-challenge.hostname01.opendev.org. IN CNAME acme.opendev.org. + _acme-challenge.hostname.opendev.org. IN CNAME acme.opendev.org. + _acme-challenge.foo.opendev.org. IN CNAME acme.opendev.org. diff --git a/playbooks/roles/letsencrypt-request-certs/defaults/main.yaml b/playbooks/roles/letsencrypt-request-certs/defaults/main.yaml new file mode 100644 index 0000000000..b62bfec72c --- /dev/null +++ b/playbooks/roles/letsencrypt-request-certs/defaults/main.yaml @@ -0,0 +1 @@ +letsencrypt_test_only: False \ No newline at end of file diff --git a/playbooks/roles/letsencrypt-request-certs/tasks/acme.yaml b/playbooks/roles/letsencrypt-request-certs/tasks/acme.yaml new file mode 100644 index 0000000000..1c4672d4c6 --- /dev/null +++ b/playbooks/roles/letsencrypt-request-certs/tasks/acme.yaml @@ -0,0 +1,29 @@ +- name: 'Build arguments for letsencrypt acme.sh driver for: {{ cert.key }}' + set_fact: + # NOTE(ianw): note the domains are passed in one string (between + # ") as it makes argument parsing a little easier in the driver.sh + acme_args: '"{% for domain in cert.value %}-d {{ domain }} {% endfor %}"' + +- name: Run acme.sh driver for certificate issue + shell: + cmd: | + /opt/acme.sh/driver.sh issue {{ acme_args }} + args: + chdir: /opt/acme.sh/ + environment: + LETSENCRYPT_STAGING: '{{ "1" if letsencrypt_test_only else "0" }}' + register: acme_output + +- debug: + var: acme_output.stdout_lines + +# NOTE(ianw): The output is domain:key which we split into a tuple +# here. We don't make use of the domain part ATM; our default CNAME +# setup points "_acme-challenge.host.acme.opendev.org" to just +# "acme.opendev.org" so we put all the keys into "top-level" TXT +# records directly at acme.opendev.org. letsencyrpt doesn't care; it +# just follows the CNAME and enumerates all the TXT records in +# acme.opendev.org looking for one that matches. +- set_fact: + acme_txt_required: '{{ acme_txt_required + [(item.split(":")[0], item.split(":")[1])] }}' + loop: '{{ acme_output.stdout_lines }}' diff --git a/playbooks/roles/letsencrypt-request-certs/tasks/main.yaml b/playbooks/roles/letsencrypt-request-certs/tasks/main.yaml new file mode 100644 index 0000000000..50090e2297 --- /dev/null +++ b/playbooks/roles/letsencrypt-request-certs/tasks/main.yaml @@ -0,0 +1,25 @@ +- set_fact: + acme_txt_required: [] + +- name: Show cert list + debug: + var: letsencrypt_certs + +# Handle multiple certs for a single host; like +# +# letsencrypt_certs: +# main: +# hostname.opendev.org +# secondary: +# foo.opendev.org +# baz.opendev.org +# +# All required TXT keys are put into acme_txt_required + +- include_tasks: acme.yaml + loop: "{{ query('dict', letsencrypt_certs) }}" + loop_control: + loop_var: cert + +- debug: + var: acme_txt_required diff --git a/playbooks/zuul/run-base.yaml b/playbooks/zuul/run-base.yaml index 291221cdbb..372f5c9be5 100644 --- a/playbooks/zuul/run-base.yaml +++ b/playbooks/zuul/run-base.yaml @@ -65,7 +65,10 @@ - group_vars/registry.yaml - group_vars/gitea.yaml - group_vars/gitea-lb.yaml + - group_vars/letsencrypt.yaml - host_vars/bridge.openstack.org.yaml + - host_vars/letsencrypt01.opendev.org.yaml + - host_vars/letsencrypt02.opendev.org.yaml - name: Display group membership command: ansible localhost -m debug -a 'var=groups' - name: Run base.yaml diff --git a/playbooks/zuul/templates/gate-groups.yaml.j2 b/playbooks/zuul/templates/gate-groups.yaml.j2 index a8c8bfeb05..7f2c0b3b52 100644 --- a/playbooks/zuul/templates/gate-groups.yaml.j2 +++ b/playbooks/zuul/templates/gate-groups.yaml.j2 @@ -10,3 +10,7 @@ groups: docker: - bionic-docker + + letsencrypt: + - letsencrypt01.opendev.org + - letsencrypt02.opendev.org diff --git a/playbooks/zuul/templates/group_vars/letsencrypt.yaml.j2 b/playbooks/zuul/templates/group_vars/letsencrypt.yaml.j2 new file mode 100644 index 0000000000..1d315d24e9 --- /dev/null +++ b/playbooks/zuul/templates/group_vars/letsencrypt.yaml.j2 @@ -0,0 +1,4 @@ +# We don't want CI tests trying to really authenticate against +# letsencrypt; apart from just being unfriendly it might cause quota +# issues. +letsencrypt_test_only: True diff --git a/playbooks/zuul/templates/host_vars/letsencrypt01.opendev.org.yaml.j2 b/playbooks/zuul/templates/host_vars/letsencrypt01.opendev.org.yaml.j2 new file mode 100644 index 0000000000..f116c5219a --- /dev/null +++ b/playbooks/zuul/templates/host_vars/letsencrypt01.opendev.org.yaml.j2 @@ -0,0 +1,7 @@ +letsencrypt_certs: + main: + - letsencrypt01.opendev.org + - letsencrypt.opendev.org + - alias.opendev.org + secondary: + - someotherservice.opendev.org \ No newline at end of file diff --git a/playbooks/zuul/templates/host_vars/letsencrypt02.opendev.org.yaml.j2 b/playbooks/zuul/templates/host_vars/letsencrypt02.opendev.org.yaml.j2 new file mode 100644 index 0000000000..9ba000715a --- /dev/null +++ b/playbooks/zuul/templates/host_vars/letsencrypt02.opendev.org.yaml.j2 @@ -0,0 +1,4 @@ +letsencrypt_certs: + main: + - letsencrypt02.opendev.org + - letsencrypt.opendev.org diff --git a/testinfra/test_letsencrypt.py b/testinfra/test_letsencrypt.py new file mode 100644 index 0000000000..841a5c3da4 --- /dev/null +++ b/testinfra/test_letsencrypt.py @@ -0,0 +1,60 @@ +# Copyright 2019 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import pytest + +testinfra_hosts = ['adns-letsencrypt.opendev.org', + 'letsencrypt01.opendev.org', + 'letsencrypt02.opendev.org'] + + +def test_acme_zone(host): + if host.backend.get_hostname() != 'adns-letsencrypt.opendev.org': + pytest.skip() + acme_opendev_zone = host.file('/var/lib/bind/zones/acme.opendev.org/zone.db') + assert acme_opendev_zone.exists + + # On our test nodes, unbound is listening on 127.0.0.1:53; this + # ensures the query hits bind + query_addr = host.ansible("setup")["ansible_facts"]["ansible_default_ipv4"]["address"] + cmd = host.run("dig -t txt acme.opendev.org @" + query_addr) + count = 0 + for line in cmd.stdout.split('\n'): + if line.startswith('acme.opendev.org. 60 IN TXT'): + count = count + 1 + if count != 6: + # NOTE(ianw): I'm sure there's more pytest-y ways to save this + # for debugging ... + print(cmd.stdout) + assert count == 6, "Did not see required number of TXT records!" + +def test_certs_created(host): + if host.backend.get_hostname() == 'letsencrypt01.opendev.org': + domain_one = host.file( + '/etc/letsencrypt-certs/' + 'letsencrypt01.opendev.org/letsencrypt01.opendev.org.key') + assert domain_one.exists + domain_two = host.file( + '/etc/letsencrypt-certs/' + 'someotherservice.opendev.org/someotherservice.opendev.org.key') + assert domain_two.exists + + elif host.backend.get_hostname() == 'letsencrypt02.opendev.org': + domain_one = host.file( + '/etc/letsencrypt-certs/' + 'letsencrypt02.opendev.org/letsencrypt02.opendev.org.key') + assert domain_one.exists + + else: + pytest.skip()