From aea9bf355058a15e7ce7bb2649de3872e7041c89 Mon Sep 17 00:00:00 2001 From: Scott Shambarger Date: Thu, 13 May 2021 17:42:03 -0700 Subject: [PATCH] monasca-thresh: Fix topology submission to storm monasca-thresh currently runs a local copy of the storm to handle the threshold topology. However, it doesn't setup the environment correctly, and the executable fails, causing the container to continually restart. This patch updates the container command to correctly submit the topology to the running Apache storm. The container will exit after it finishes the submission, so the restart_policy is updated to on-failure, this way if the storm is temporarily unavailable, the submission will be retried. (NOTE: further deploys will see the container as "changed" as it won't be running) Patch uses KOLLA_BOOTSTRAP to trigger the container to check if the topology is already submitted, and if so skips the submission command so the container doesn't fail. The config task now triggers a new reconfigure handler that spawns a one-shot container to replace any existing topology if the configuration has changed. Also, all the storm.* variables in storm.yml.j2 are removed as they were only needed for local mode and make submitted topologies fail to load when the storm is restarted (the referenced directories not mounted on nimbus). Depends-On: https://review.opendev.org/c/openstack/kolla/+/792751 Closes-Bug: #1808805 Change-Id: Ib225d76076782d695c9387e1c2693bae9a4521d7 --- ansible/roles/monasca/defaults/main.yml | 1 + ansible/roles/monasca/handlers/main.yml | 33 +++++++++++++++++-- .../roles/monasca/tasks/check-containers.yml | 1 + ansible/roles/monasca/tasks/config.yml | 4 +-- ansible/roles/monasca/tasks/upgrade.yml | 13 ++++++++ .../monasca-thresh/monasca-thresh.json.j2 | 2 +- .../templates/monasca-thresh/storm.yml.j2 | 8 ----- .../notes/bug-1808805-3ebd9b0edceff170.yaml | 13 ++++++++ 8 files changed, 61 insertions(+), 14 deletions(-) create mode 100644 releasenotes/notes/bug-1808805-3ebd9b0edceff170.yaml diff --git a/ansible/roles/monasca/defaults/main.yml b/ansible/roles/monasca/defaults/main.yml index ab5c8f8d6b..4bc92bfe46 100644 --- a/ansible/roles/monasca/defaults/main.yml +++ b/ansible/roles/monasca/defaults/main.yml @@ -50,6 +50,7 @@ monasca_services: image: "{{ monasca_thresh_image_full }}" volumes: "{{ monasca_thresh_default_volumes + monasca_thresh_extra_volumes }}" dimensions: "{{ monasca_thresh_dimensions }}" + state: "exited" monasca-notification: container_name: monasca_notification group: monasca-notification diff --git a/ansible/roles/monasca/handlers/main.yml b/ansible/roles/monasca/handlers/main.yml index a69b8ef8b0..7a6e145d90 100644 --- a/ansible/roles/monasca/handlers/main.yml +++ b/ansible/roles/monasca/handlers/main.yml @@ -46,16 +46,43 @@ - name: Restart monasca-thresh container vars: - service_name: "monasca-thresh" - service: "{{ monasca_services[service_name] }}" + service: "{{ monasca_services['monasca-thresh'] }}" become: true kolla_docker: - action: "recreate_or_restart_container" + action: "start_container" common_options: "{{ docker_common_options }}" name: "{{ service.container_name }}" image: "{{ service.image }}" volumes: "{{ service.volumes }}" dimensions: "{{ service.dimensions }}" + detach: False + remove_on_exit: false + restart_policy: no + environment: + KOLLA_BOOTSTRAP: + run_once: True + delegate_to: "{{ groups[service.group]|first }}" + when: + - kolla_action != "config" + +- name: Resubmitting monasca-thresh topology + vars: + service: "{{ monasca_services['monasca-thresh'] }}" + become: true + kolla_docker: + action: "start_container" + common_options: "{{ docker_common_options }}" + name: "resubmit_{{ service.container_name }}" + image: "{{ service.image }}" + volumes: "{{ service.volumes }}" + dimensions: "{{ service.dimensions }}" + detach: False + restart_policy: no + environment: + KOLLA_BOOTSTRAP: + TOPOLOGY_REPLACE: + run_once: True + delegate_to: "{{ groups[service.group]|first }}" when: - kolla_action != "config" diff --git a/ansible/roles/monasca/tasks/check-containers.yml b/ansible/roles/monasca/tasks/check-containers.yml index e6f758ea2c..5bd0ee8ee9 100644 --- a/ansible/roles/monasca/tasks/check-containers.yml +++ b/ansible/roles/monasca/tasks/check-containers.yml @@ -9,6 +9,7 @@ pid_mode: "{{ item.value.pid_mode|default('') }}" volumes: "{{ item.value.volumes }}" dimensions: "{{ item.value.dimensions }}" + state: "{{ item.value.state | default('running') }}" when: - inventory_hostname in groups[item.value.group] - item.value.enabled | bool diff --git a/ansible/roles/monasca/tasks/config.yml b/ansible/roles/monasca/tasks/config.yml index fe391ff344..c6d5f58a38 100644 --- a/ansible/roles/monasca/tasks/config.yml +++ b/ansible/roles/monasca/tasks/config.yml @@ -268,7 +268,7 @@ - inventory_hostname in groups[service['group']] - service.enabled | bool notify: - - Restart monasca-thresh container + - Resubmitting monasca-thresh topology - name: Copying over monasca-thresh storm config vars: @@ -286,7 +286,7 @@ - inventory_hostname in groups[service['group']] - service.enabled | bool notify: - - Restart monasca-thresh container + - Resubmitting monasca-thresh topology - name: Copying over monasca-notification config vars: diff --git a/ansible/roles/monasca/tasks/upgrade.yml b/ansible/roles/monasca/tasks/upgrade.yml index ff9990bb1d..7dd63a3ba7 100644 --- a/ansible/roles/monasca/tasks/upgrade.yml +++ b/ansible/roles/monasca/tasks/upgrade.yml @@ -9,5 +9,18 @@ - import_tasks: bootstrap_service.yml +# NOTE(sshambar): We don't want pre-upgrade monasca-thresh instances +# running in local mode after an upgrade, so stop them. +# The first node will be replaced with the submission container in the +# handlers below. +- name: Stopping all monasca-thresh instances but the first node + become: true + kolla_docker: + action: "stop_container" + common_options: "{{ docker_common_options }}" + name: "{{ monasca_services['monasca-thresh']['container_name'] }}" + when: + - inventory_hostname != groups['monasca-thresh']|first + - name: Flush handlers meta: flush_handlers diff --git a/ansible/roles/monasca/templates/monasca-thresh/monasca-thresh.json.j2 b/ansible/roles/monasca/templates/monasca-thresh/monasca-thresh.json.j2 index bbcbb57ecb..b7e28e2fd8 100644 --- a/ansible/roles/monasca/templates/monasca-thresh/monasca-thresh.json.j2 +++ b/ansible/roles/monasca/templates/monasca-thresh/monasca-thresh.json.j2 @@ -1,5 +1,5 @@ { - "command": "/opt/storm/bin/storm jar /monasca-thresh-source/monasca-thresh-*/thresh/target/monasca-thresh-*-SNAPSHOT-shaded.jar -Djava.io.tmpdir=/var/lib/monasca-thresh/data monasca.thresh.ThresholdingEngine /etc/monasca/thresh-config.yml monasca-thresh local", + "command": "/opt/storm/bin/storm jar /monasca-thresh-source/monasca-thresh-*/thresh/target/monasca-thresh-*-SNAPSHOT-shaded.jar -Djava.io.tmpdir=/var/lib/monasca-thresh/data monasca.thresh.ThresholdingEngine /etc/monasca/thresh-config.yml monasca-thresh", "config_files": [ { "source": "{{ container_config_directory }}/thresh-config.yml", diff --git a/ansible/roles/monasca/templates/monasca-thresh/storm.yml.j2 b/ansible/roles/monasca/templates/monasca-thresh/storm.yml.j2 index d8fe3ddaa0..70aa8fd217 100644 --- a/ansible/roles/monasca/templates/monasca-thresh/storm.yml.j2 +++ b/ansible/roles/monasca/templates/monasca-thresh/storm.yml.j2 @@ -1,9 +1 @@ -storm.local.dir: "/var/lib/monasca-thresh/data" -storm.log.dir: "/var/log/kolla/storm" -storm.workers.artifacts.dir: "/var/lib/monasca-thresh/worker-artifacts" nimbus.seeds: [{{ monasca_storm_nimbus_servers }}] -storm.zookeeper.port: {{ zookeeper_client_port }} -storm.zookeeper.servers: -{% for host in groups['zookeeper'] %} - - "{{ 'api' | kolla_address(host) }}" -{% endfor %} diff --git a/releasenotes/notes/bug-1808805-3ebd9b0edceff170.yaml b/releasenotes/notes/bug-1808805-3ebd9b0edceff170.yaml new file mode 100644 index 0000000000..11b758de43 --- /dev/null +++ b/releasenotes/notes/bug-1808805-3ebd9b0edceff170.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + Fixes monasca-thresh to correctly submit the topology to Storm. + The previous container ran the topology in local mode (within the + container), and didn't use the Storm cloud. The new container + handles submitting the topology to Storm and also handles killing + and replaces the topology when it's configuration has changed. + As a result, the monasca-thresh container is only used for + submission, and exits after that's completed. + The logs for the topology will now be available in the storm + worker-artifact logs. + `LP#1808805 `__