Autoscale replicas count based on gearman status

This commit is contained in:
Tristan Cacqueray 2019-04-08 06:08:31 +00:00
parent 5a75cb621f
commit 85c4e0f50a
6 changed files with 122 additions and 13 deletions

View File

@ -80,11 +80,13 @@ spec:
# Optional user-provided ssh key # Optional user-provided ssh key
sshsecretename: "" sshsecretename: ""
merger: merger:
instances: 0 min: 0
max: 10
executor: executor:
instances: 1 min: 1
max: 5
web: web:
instances: 1 min: 1
connections: [] connections: []
tenants: tenants:
- tenant: - tenant:

View File

@ -9,11 +9,14 @@ tenants:
sshsecretname: "{{ zuul_cluster_name }}-ssh-secret" sshsecretname: "{{ zuul_cluster_name }}-ssh-secret"
connections: [] connections: []
merger: merger:
instances: 0 min: 0
max: 5
executor: executor:
instances: 1 min: 1
max: 5
web: web:
instances: 1 min: 1
max: 1
namespace: "{{ meta.namespace|default('default') }}" namespace: "{{ meta.namespace|default('default') }}"
state: "present" state: "present"

View File

@ -0,0 +1,83 @@
#!/bin/env python3
#
# Copyright 2019 Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import math
import socket
from ansible.module_utils.basic import AnsibleModule
def gearman_status(host):
skt = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
skt.connect((host, 4730))
skt.send(b"status\n")
status = {}
while True:
data = skt.recv(4096)
for line in data.split(b"\n"):
if line == b".":
skt.close()
return status
if line == b"":
continue
name, queue, running, worker = line.decode('ascii').split()
status[name] = {
"queue": int(queue),
"running": int(running),
"worker": int(worker),
}
skt.close()
return status
def ansible_main():
module = AnsibleModule(
argument_spec=dict(
service=dict(required=True),
gearman=dict(required=True),
min=dict(required=True, type='int'),
max=dict(required=True, type='int'),
)
)
try:
status = gearman_status(module.params.get('gearman'))
except Exception as e:
module.fail_json(msg="Couldn't get gearman status: %s" % e)
service = module.params.get('service')
scale_min = module.params.get('min')
scale_max = module.params.get('max')
count = 0
if service == "merger":
jobs = 0
for job in status:
if job.startswith("merger:"):
stat = status[job]
jobs += stat["queue"] + stat["running"]
count = math.ceil(jobs / 5)
elif service == "executor":
stat = status.get("executor:execute")
if stat:
count = math.ceil((stat["queue"] + stat["running"]) / 10)
module.exit_json(
changed=False, count=int(min(max(count, scale_min), scale_max)))
if __name__ == '__main__':
ansible_main()

View File

@ -1,3 +1,13 @@
- name: Get autoscale count
autoscale_gearman:
service: "{{ deployment_name }}"
gearman: "{{ gearman_service.spec.clusterIP|default(None) }}"
min: "{{ deployment_conf.min|default(0) }}"
max: "{{ deployment_conf.max|default(1) }}"
register: autoscale
when: gearman_service is defined
# TODO: ensure graceful scale-down of service's replicas
- name: Create Deployment - name: Create Deployment
k8s: k8s:
state: "{{ state }}" state: "{{ state }}"
@ -13,7 +23,7 @@
annotations: annotations:
configHash: "" configHash: ""
spec: spec:
replicas: "{{ deployment_replicas|default(1) }}" replicas: "{{ autoscale.count|default(deployment_conf.min) }}"
selector: selector:
matchLabels: matchLabels:
app: "{{ zuul_cluster_name }}-{{ deployment_name }}" app: "{{ zuul_cluster_name }}-{{ deployment_name }}"

View File

@ -12,6 +12,8 @@
- containerPort: 4730 - containerPort: 4730
protocol: "TCP" protocol: "TCP"
deployment_config: "{{ zuul_configmap_name }}-scheduler" deployment_config: "{{ zuul_configmap_name }}-scheduler"
deployment_conf:
min: 1
include_tasks: "./create_deployment.yaml" include_tasks: "./create_deployment.yaml"
register: sched_deployment register: sched_deployment
@ -24,6 +26,13 @@
protocol: TCP protocol: TCP
include_tasks: "./create_service.yaml" include_tasks: "./create_service.yaml"
- name: Wait for Service
set_fact:
gearman_service: "{{ lookup('k8s', api_version='v1', kind='Service', namespace=namespace, resource_name=zuul_cluster_name + '-scheduler') }}"
until: gearman_service
retries: 5
delay: 10
- name: Reload scheduler - name: Reload scheduler
include_tasks: "./reload_scheduler.yaml" include_tasks: "./reload_scheduler.yaml"
when: when:
@ -33,19 +42,19 @@
- name: Merger Deployment - name: Merger Deployment
vars: vars:
deployment_name: merger deployment_name: merger
deployment_replicas: "{{ merger.instances }}" deployment_conf: "{{ merger }}"
include_tasks: "./create_deployment.yaml" include_tasks: "./create_deployment.yaml"
- name: Executor Deployment - name: Executor Deployment
vars: vars:
deployment_name: executor deployment_name: executor
deployment_replicas: "{{ executor.instances }}" deployment_conf: "{{ executor }}"
include_tasks: "./create_deployment.yaml" include_tasks: "./create_deployment.yaml"
- name: Web Deployment - name: Web Deployment
vars: vars:
deployment_name: web deployment_name: web
deployment_replicas: "{{ web.instances }}" deployment_conf: "{{ web }}"
deployment_ports: deployment_ports:
- containerPort: 9000 - containerPort: 9000
protocol: "TCP" protocol: "TCP"

View File

@ -4,10 +4,12 @@ metadata:
name: example-zuul name: example-zuul
spec: spec:
merger: merger:
instances: 0 min: 0
max: 10
executor: executor:
instances: 1 min: 1
max: 5
web: web:
instances: 1 min: 1
connections: [] connections: []
tenants: [] tenants: []