Merge "Add Jaeger tracing server"

This commit is contained in:
Zuul 2022-09-19 20:51:06 +00:00 committed by Gerrit Code Review
commit 5e150b7e74
15 changed files with 306 additions and 0 deletions

View File

@ -29,6 +29,7 @@ Major Systems
storyboard storyboard
kerberos kerberos
afs afs
tracing
translate translate
refstack refstack
codesearch codesearch

37
doc/source/tracing.rst Normal file
View File

@ -0,0 +1,37 @@
:title: Tracing
.. _tracing:
Tracing
#######
The Jaeger tracing server is installed on tracing.opendev.org. It is
intended to be used by Zuul, but may be used by other services in the
future. It displays information about Zuul operations in visual form.
At a Glance
===========
:Hosts:
* https://tracing.opendev.org
:Ansible:
* https://opendev.org/opendev/system-config
* :git_file:`playbooks/roles/jaeger`
* :git_file:`playbooks/service-tracing.yaml`
:Projects:
* https://www.jaegertracing.io/
* https://www.jaegertracing.io/docs/latest/getting-started/
:Bugs:
* https://storyboard.openstack.org/#!/project/748
Overview
========
Apache is configured as a reverse proxy and there is an internal
Badger database stored at ``/var/jaeger/badger``.
Zuul sends telemetry information to Jaeger via the gRPC protocol.
The internal CA (`zk-ca`) used to create ZooKeeper certs for Zuul is
used to provide and validate client certificates for the gRPC
connection to Jaeger as well.

View File

@ -0,0 +1,12 @@
letsencrypt_certs:
tracing-opendev-org-main:
- tracing.opendev.org
- '{{ inventory_hostname }}'
jaeger_user: jaeger
jaeger_group: jaeger
jaeger_uid: 10001
jaeger_gid: 10001
iptables_extra_allowed_groups:
# gRPC
- {'protocol': 'tcp', 'port': '4317', 'group': 'nodepool'}
- {'protocol': 'tcp', 'port': '4317', 'group': 'zuul'}

View File

@ -97,6 +97,7 @@ groups:
- review[0-9]*.opendev.org - review[0-9]*.opendev.org
- static[0-9]*.opendev.org - static[0-9]*.opendev.org
- storyboard[0-9]*.opendev.org - storyboard[0-9]*.opendev.org
- tracing[0-9]*.opendev.org
- translate[0-9]*.open*.org - translate[0-9]*.open*.org
- zuul[0-9]*.opendev.org - zuul[0-9]*.opendev.org
mailman: mailman:
@ -146,6 +147,7 @@ groups:
- storyboard[0-9]*.opendev.org - storyboard[0-9]*.opendev.org
storyboard-dev: storyboard-dev:
- storyboard-dev[0-9]*.opendev.org - storyboard-dev[0-9]*.opendev.org
tracing: tracing[0-9]*.opendev.org
translate-dev: translate-dev:
- translate-dev[0-9]*.open*.org - translate-dev[0-9]*.open*.org
translate: translate:
@ -165,6 +167,7 @@ groups:
- static[0-9]*.opendev.org - static[0-9]*.opendev.org
- storyboard-dev[0-9]*.opendev.org - storyboard-dev[0-9]*.opendev.org
- storyboard[0-9]*.opendev.org - storyboard[0-9]*.opendev.org
- tracing[0-9]*.opendev.org
- translate-dev[0-9]*.open*.org - translate-dev[0-9]*.open*.org
- translate[0-9]*.open*.org - translate[0-9]*.open*.org
zookeeper: zookeeper:

View File

@ -0,0 +1,2 @@
Run a Jaeger (tracing) server.

View File

@ -0,0 +1,4 @@
- name: jaeger Reload apache2
service:
name: apache2
state: reloaded

View File

@ -0,0 +1,87 @@
- name: Create jaeger group
group:
name: "{{ jaeger_group }}"
gid: "{{ jaeger_gid }}"
system: yes
- name: Create jaeger user
user:
name: "{{ jaeger_user }}"
group: "{{ jaeger_group }}"
uid: "{{ jaeger_uid }}"
home: "/home/{{ jaeger_user }}"
create_home: yes
shell: /bin/bash
system: yes
- name: Ensure docker-compose directory exists
file:
state: directory
path: /etc/jaeger-docker
- name: Write docker-compose file
template:
src: docker-compose.yaml.j2
dest: /etc/jaeger-docker/docker-compose.yaml
- name: Ensure data directory exists
file:
state: directory
path: /var/jaeger/badger
owner: "{{ jaeger_user }}"
group: "{{ jaeger_group }}"
mode: "0750"
- name: Generate GRPC TLS cert
include_role:
name: zk-ca
vars:
zk_ca_cert_dir: /var/jaeger/tls
zk_ca_cert_dir_owner: "{{ jaeger_user }}"
zk_ca_cert_dir_group: "{{ jaeger_group }}"
- name: Install apache2
apt:
name:
- apache2
- apache2-utils
state: present
- name: Apache modules
apache2_module:
state: present
name: "{{ item }}"
loop:
- rewrite
- proxy
- proxy_http
- ssl
- headers
- name: Copy apache config
template:
src: tracing.vhost.j2
dest: /etc/apache2/sites-enabled/000-default.conf
owner: root
group: root
mode: 0644
notify: jaeger Reload apache2
- name: Run docker-compose pull
shell:
cmd: docker-compose pull
chdir: /etc/jaeger-docker/
- name: Run docker-compose up
shell:
cmd: docker-compose up -d
chdir: /etc/jaeger-docker/
- name: Wait for jaeger to start
wait_for:
port: 16686
timeout: 60
- name: Run docker prune to cleanup unneeded images
shell:
cmd: docker image prune -f

View File

@ -0,0 +1,23 @@
# Version 2 is the latest that is supported by docker-compose in
# Ubuntu Xenial.
version: '2'
services:
jaeger:
image: docker.io/jaegertracing/all-in-one:latest
network_mode: host
restart: always
environment:
- COLLECTOR_OTLP_ENABLED=true
- SPAN_STORAGE_TYPE=badger
- BADGER_EPHEMERAL=false
- BADGER_DIRECTORY_VALUE=/badger/data
- BADGER_DIRECTORY_KEY=/badger/key
- BADGER_SPAN_STORE_TTL=30d
- COLLECTOR_GRPC_TLS_ENABLED=true
- COLLECTOR_GRPC_TLS_CERT=/tls/certs/cert.pem
- COLLECTOR_GRPC_TLS_KEY=/tls/keys/key.pem
- COLLECTOR_GRPC_TLS_CLIENT_CA=/tls/certs/cacert.pem
volumes:
- /var/jaeger/badger:/badger
- /var/jaeger/tls:/tls

View File

@ -0,0 +1,57 @@
<VirtualHost *:80>
ServerName tracing.opendev.org
ServerAdmin webmaster@openstack.org
ErrorLog ${APACHE_LOG_DIR}/tracing-error.log
LogLevel warn
CustomLog ${APACHE_LOG_DIR}/tracing-access.log combined
Redirect / https://tracing.opendev.org/
</VirtualHost>
<VirtualHost *:443>
ServerName tracing.opendev.org
ServerAdmin webmaster@openstack.org
AllowEncodedSlashes On
ErrorLog ${APACHE_LOG_DIR}/tracing-ssl-error.log
LogLevel warn
CustomLog ${APACHE_LOG_DIR}/tracing-ssl-access.log combined
SSLEngine on
SSLProtocol All -SSLv2 -SSLv3
# Note: this list should ensure ciphers that provide forward secrecy
SSLCipherSuite ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:!AES256:!aNULL:!eNULL:!MD5:!DSS:!PSK:!SRP
SSLHonorCipherOrder on
SSLCertificateFile /etc/letsencrypt-certs/tracing.opendev.org/tracing.opendev.org.cer
SSLCertificateKeyFile /etc/letsencrypt-certs/tracing.opendev.org/tracing.opendev.org.key
SSLCertificateChainFile /etc/letsencrypt-certs/tracing.opendev.org/ca.cer
BrowserMatch "MSIE [2-6]" \
nokeepalive ssl-unclean-shutdown \
downgrade-1.0 force-response-1.0
# MSIE 7 and newer should be able to use keepalive
BrowserMatch "MSIE [17-9]" ssl-unclean-shutdown
RewriteEngine on
# Do not rewrite the /server-status URL (though by default, this
# is only accessible from localhost). Connect to it with:
# ssh -L 8443:localhost:443 $HOSTNAME
# https://localhost:8443/server-status
RewriteRule ^/server-status$ /server-status [L]
ProxyPass / http://localhost:16686/ retry=0
ProxyPassReverse / http://localhost:16686/
ProxyPreserveHost on
RequestHeader set "X-Forwarded-Proto" expr=%{REQUEST_SCHEME}
</VirtualHost>

View File

@ -253,6 +253,9 @@
- name: letsencrypt updated storyboard01-opendev-org-main - name: letsencrypt updated storyboard01-opendev-org-main
include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml
- name: letsencrypt updated tracing-opendev-org-main
include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml
- name: letsencrypt updated translate01-openstack-org-main - name: letsencrypt updated translate01-openstack-org-main
include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml include_tasks: roles/letsencrypt-create-certs/handlers/restart_apache.yaml

View File

@ -0,0 +1,6 @@
- hosts: "tracing:!disabled"
name: "Base: configure tracing"
roles:
- iptables
- install-docker
- jaeger

25
testinfra/test_tracing.py Normal file
View File

@ -0,0 +1,25 @@
# Copyright 2022 Acme Gating, LLC
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
testinfra_hosts = ['tracing99.opendev.org']
def test_jaeger_listening(host):
jaeger = host.socket("tcp://127.0.0.1:16686")
assert jaeger.is_listening
def test_tracing_http(host):
cmd = host.run('curl https://tracing99.opendev.org')
assert cmd.succeeded

View File

@ -302,6 +302,20 @@
- playbooks/roles/zuul-user/ - playbooks/roles/zuul-user/
- roles/openafs-client/ - roles/openafs-client/
- job:
name: infra-prod-service-tracing
parent: infra-prod-service-base
description: Run service-tracing.yaml playbook.
vars:
playbook_name: service-tracing.yaml
files:
- inventory/base
- playbooks/service-tracing.yaml
- inventory/service/group_vars/tracing.yaml
- playbooks/roles/jaeger/
- playbooks/roles/install-docker/
- playbooks/roles/iptables/
- job: - job:
name: infra-prod-service-borg-backup name: infra-prod-service-borg-backup
parent: infra-prod-service-base parent: infra-prod-service-base

View File

@ -83,6 +83,7 @@
- name: opendev-buildset-registry - name: opendev-buildset-registry
- name: system-config-build-image-refstack - name: system-config-build-image-refstack
soft: true soft: true
- system-config-run-tracing
- system-config-run-zookeeper: - system-config-run-zookeeper:
dependencies: dependencies:
- name: opendev-buildset-registry - name: opendev-buildset-registry
@ -225,6 +226,7 @@
- name: opendev-buildset-registry - name: opendev-buildset-registry
- name: system-config-upload-image-refstack - name: system-config-upload-image-refstack
soft: true soft: true
- system-config-run-tracing
- system-config-run-zookeeper: - system-config-run-zookeeper:
dependencies: dependencies:
- name: opendev-buildset-registry - name: opendev-buildset-registry
@ -499,6 +501,10 @@
soft: true soft: true
- name: system-config-promote-image-gerrit-3.5 - name: system-config-promote-image-gerrit-3.5
soft: true soft: true
- infra-prod-service-tracing: &infra-prod-service-tracing
dependencies:
- name: infra-prod-letsencrypt
soft: true
- infra-prod-service-zookeeper: &infra-prod-service-zookeeper - infra-prod-service-zookeeper: &infra-prod-service-zookeeper
dependencies: dependencies:
- name: infra-prod-letsencrypt - name: infra-prod-letsencrypt
@ -606,6 +612,7 @@
- infra-prod-service-registry: *infra-prod-service-registry - infra-prod-service-registry: *infra-prod-service-registry
- infra-prod-service-refstack: *infra-prod-service-refstack - infra-prod-service-refstack: *infra-prod-service-refstack
- infra-prod-service-review: *infra-prod-service-review - infra-prod-service-review: *infra-prod-service-review
- infra-prod-service-tracing: *infra-prod-service-tracing
- infra-prod-service-zookeeper: *infra-prod-service-zookeeper - infra-prod-service-zookeeper: *infra-prod-service-zookeeper
- infra-prod-service-zuul: *infra-prod-service-zuul - infra-prod-service-zuul: *infra-prod-service-zuul
- infra-prod-service-zuul-lb: *infra-prod-service-zuul-lb - infra-prod-service-zuul-lb: *infra-prod-service-zuul-lb

View File

@ -800,6 +800,31 @@
- playbooks/test-paste.yaml - playbooks/test-paste.yaml
- testinfra/test_paste.py - testinfra/test_paste.py
- job:
name: system-config-run-tracing
parent: system-config-run
description: |
Run the playbook for the jaeger servers.
nodeset:
nodes:
- name: bridge.openstack.org
label: ubuntu-bionic
- name: tracing99.opendev.org
label: ubuntu-focal
vars:
run_playbooks:
- playbooks/letsencrypt.yaml
- playbooks/service-tracing.yaml
files:
- inventory/service/group_vars/tracing.yaml
- playbooks/install-ansible.yaml
- playbooks/letsencrypt.yaml
- playbooks/service-tracing.yaml
- playbooks/roles/jaeger/
- playbooks/roles/install-docker/
- playbooks/roles/iptables/
- testinfra/test_tracing.py
- job: - job:
name: system-config-run-zookeeper name: system-config-run-zookeeper
parent: system-config-run parent: system-config-run