From 119090208b899ca786a3180b7960955cbc9012d5 Mon Sep 17 00:00:00 2001 From: Tristan Cacqueray Date: Tue, 7 Apr 2020 16:08:06 +0000 Subject: [PATCH] Increase scheduler wait timeout and improve logs collection In some case, the test node needs more time to pull the zuul image. This change increases the wait time to 8 minutes to prevent false positive failure. Thsi change also: * adds a build artifact with the generated kubernetes resources. * redirects post commands output to logfiles to unclutter the job-output console. * replaces kubectl wait by rollout status. * wait for operator and nodepool-launcher deployment. Change-Id: I1c499bd11576f92b98511cd1ff180026b8aa70d8 --- .gitignore | 9 ++++ .zuul.yaml | 1 + playbooks/zuul-operator-functional/post.yaml | 11 ++--- .../zuul-operator-functional/pre-k8s.yaml | 6 ++- playbooks/zuul-operator-functional/run.yaml | 42 ++++++++++++++----- 5 files changed, 51 insertions(+), 18 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f14b57f --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +# Auto generated files +*.pem +*.crt +*.key +*.srl +*.csr +id_rsa +id_rsa.pub +*.patch diff --git a/.zuul.yaml b/.zuul.yaml index 2d810fe..7f8fec0 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -10,6 +10,7 @@ # We disable userland-proxy to enable scheduler deployement to connect to the gearman service # see: https://github.com/eclipse/che/issues/8134 docker_userland_proxy: false + container_runtime: docker - job: description: Operator integration tests with Kubernetes diff --git a/playbooks/zuul-operator-functional/post.yaml b/playbooks/zuul-operator-functional/post.yaml index b104c34..dcc500f 100644 --- a/playbooks/zuul-operator-functional/post.yaml +++ b/playbooks/zuul-operator-functional/post.yaml @@ -3,7 +3,8 @@ - collect-container-logs post_tasks: - name: Describe resources - command: "kubectl describe {{ item }}" + command: "bash -c 'kubectl describe {{ item }} > ~/zuul-output/logs/describe-{{ item }}.txt'" + ignore_errors: yes loop: - pods - deployments @@ -12,10 +13,6 @@ - secrets - configmaps - - name: Grab scheduler logs - command: "kubectl logs statefulset/zuul-scheduler" - ignore_errors: yes - - - name: Grab executor logs - command: "kubectl logs statefulset/zuul-executor" + - name: Delete empty container logs + command: "find {{ ansible_user_dir }}/zuul-output/logs/ -type f -empty -delete" ignore_errors: yes diff --git a/playbooks/zuul-operator-functional/pre-k8s.yaml b/playbooks/zuul-operator-functional/pre-k8s.yaml index 670a0f4..3a21e48 100644 --- a/playbooks/zuul-operator-functional/pre-k8s.yaml +++ b/playbooks/zuul-operator-functional/pre-k8s.yaml @@ -14,4 +14,8 @@ buildset_registry_docker_user: root post_tasks: - name: check kubernetes connection - command: kubectl get pods + command: timeout 10s kubectl get pods + register: _api_ready + until: _api_ready.rc == 0 + retries: 6 + delay: 10 diff --git a/playbooks/zuul-operator-functional/run.yaml b/playbooks/zuul-operator-functional/run.yaml index 704ba73..cbc8886 100644 --- a/playbooks/zuul-operator-functional/run.yaml +++ b/playbooks/zuul-operator-functional/run.yaml @@ -1,11 +1,28 @@ - name: install and start zuul operator hosts: all tasks: + - name: Render default crd + when: + - not use_local_role | default(false) | bool + shell: | + set -e + JSON_TO_DHALL="{{ container_runtime }} run -v $(pwd)/conf:/conf:Z --rm --entrypoint json-to-dhall -i docker.io/zuul/zuul-operator" + DHALL_TO_YAML="{{ container_runtime }} run -v $(pwd)/conf:/conf:Z --rm --entrypoint dhall-to-yaml -i docker.io/zuul/zuul-operator" + JSON=$(python3 -c 'import yaml, json; print(json.dumps(yaml.safe_load(open("playbooks/files/cr_spec.yaml"))))') + INPUT=$(echo $JSON | $JSON_TO_DHALL '(/conf/zuul/input.dhall).Input.Type') + echo '(/conf/zuul/resources.dhall ('$INPUT')).List' | $DHALL_TO_YAML > ~/zuul-output/logs/cr_spec-resources.yaml + args: + executable: /bin/bash + chdir: "{{ zuul.projects['opendev.org/zuul/zuul-operator'].src_dir }}" + - name: Setup CRD command: make install args: chdir: "{{ zuul.projects['opendev.org/zuul/zuul-operator'].src_dir }}" + - name: Wait for operator deployment + command: timeout 8m kubectl rollout status deployment/zuul-operator + - name: Generate executor ssh key command: ssh-keygen -t rsa -m PEM -N '' -f 'id_rsa' -q -C 'zuul-executor' args: @@ -116,28 +133,33 @@ secretName: nodepool-kube-config key: kube.config - - - name: Wait maximum 4 minutes for the scheduler pod + - name: Wait maximum 4 minutes for the scheduler deployment shell: | for idx in $(seq 24); do date; - for res in statefulsets deployments pods; do echo == $res ==; kubectl get $res; done - kubectl get pod zuul-scheduler-0 2> /dev/null && break || : + kubectl get statefulset zuul-scheduler 2> /dev/null && break || : sleep 10; done - - name: Wait 2 minutes for the scheduler pod to be ready - command: kubectl wait --for=condition=Ready --timeout=120s pod/zuul-scheduler-0 + - name: Wait for scheduler deployment + command: timeout 10m kubectl rollout status statefulset/zuul-scheduler - - name: Wait 4 minutes for scheduler to settle + - name: Wait 8 minutes for scheduler to settle command: kubectl logs pod/zuul-scheduler-0 register: _scheduler_log until: "'Full reconfiguration complete' in _scheduler_log.stdout" delay: 10 - retries: 24 + retries: 48 - - name: Wait 2 minutes for the executor pod to be ready - command: kubectl wait --for=condition=Ready --timeout=120s pod/zuul-executor-0 + - name: Wait for executor deployment + command: timeout 10m kubectl rollout status statefulset/zuul-executor + + - name: Wait 8 minutes for launcher to settle + command: kubectl logs deployment/zuul-launcher + register: _launcher_log + until: "'Active requests' in _launcher_log.stdout" + delay: 10 + retries: 48 - name: Wait an extra 2 minutes for the services to settle pause: