Merge "Update the collectd plugins to be more robust."

This commit is contained in:
Jenkins 2017-07-13 11:49:22 +00:00 committed by Gerrit Code Review
commit 462e506150
2 changed files with 77 additions and 37 deletions

View File

@ -29,37 +29,43 @@ def configure(configobj):
collectd.info('gnocchi_status: Interval: {}'.format(INTERVAL))
collectd.register_read(read, INTERVAL)
def read(data=None):
starttime = time.time()
gnocchi = client.Client(session=keystone_session)
status = gnocchi.status.get()
try:
status = gnocchi.status.get()
metric = collectd.Values()
metric.plugin = 'gnocchi_status'
metric.interval = INTERVAL
metric.type = 'gauge'
metric.type_instance = 'measures'
metric.values = [status['storage']['summary']['measures']]
metric.dispatch()
metric = collectd.Values()
metric.plugin = 'gnocchi_status'
metric.interval = INTERVAL
metric.type = 'gauge'
metric.type_instance = 'measures'
metric.values = [status['storage']['summary']['measures']]
metric.dispatch()
metric = collectd.Values()
metric.plugin = 'gnocchi_status'
metric.interval = INTERVAL
metric.type = 'gauge'
metric.type_instance = 'metrics'
metric.values = [status['storage']['summary']['metrics']]
metric.dispatch()
metric = collectd.Values()
metric.plugin = 'gnocchi_status'
metric.interval = INTERVAL
metric.type = 'gauge'
metric.type_instance = 'metrics'
metric.values = [status['storage']['summary']['metrics']]
metric.dispatch()
except Exception as err:
collectd.error(
'gnocchi_status: Exception getting status: {}'
.format(err))
timediff = time.time() - starttime
if timediff > INTERVAL:
collectd.warning('gnocchi_status: Took: {} > {}'.format(round(timediff, 2),
INTERVAL))
collectd.warning(
'gnocchi_status: Took: {} > {}'
.format(round(timediff, 2), INTERVAL))
def create_keystone_session():
auth = v2.Password(username=os_username,
password=os_password,
tenant_name=os_tenant,
auth = v2.Password(
username=os_username, password=os_password, tenant_name=os_tenant,
auth_url=os_auth_url)
return session.Session(auth=auth)
@ -67,10 +73,11 @@ os_username = os.environ.get('OS_USERNAME')
os_password = os.environ.get('OS_PASSWORD')
os_tenant = os.environ.get('OS_TENANT_NAME')
if os_tenant is None:
os_tenant = os.environ.get('OS_PROJECT_NAME')
os_tenant = os.environ.get('OS_PROJECT_NAME')
os_auth_url = os.environ.get('OS_AUTH_URL')
collectd.info('gnocchi_status: Connecting with user={}, password={}, tenant/project={}, '
collectd.info(
'gnocchi_status: Connecting with user={}, password={}, tenant={}, '
'auth_url={}'.format(os_username, os_password, os_tenant, os_auth_url))
keystone_session = create_keystone_session()

View File

@ -10,12 +10,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Collectd python plugin to read rabbitmq metrics from rabbitmq management plugin."""
"""Collectd python plugin to read rabbitmq metrics from rabbitmq management
plugin.
"""
from pyrabbit.api import Client
from pyrabbit.http import HTTPError
import collectd
import os
import time
def configure(configobj):
global INTERVAL
global cl
@ -27,20 +31,28 @@ def configure(configobj):
port = int(config['port'][0])
username = config['username'][0]
password = config['password'][0]
queues_to_count = config['message_count']
queues_to_count = []
if 'message_count' in config:
queues_to_count = config['message_count']
collectd.info('rabbitmq_monitoring: Interval: {}'.format(INTERVAL))
cl = Client('{}:{}'.format(host, port), username, password)
collectd.info('rabbitmq_monitoring: Connecting to: {}:{} as user:{} password:{}'.format(host, port, username, password))
collectd.info('rabbitmq_monitoring: Counting messages on: {}'.format(queues_to_count))
collectd.info(
'rabbitmq_monitoring: Connecting to: {}:{} as user:{} password:{}'
.format(host, port, username, password))
collectd.info(
'rabbitmq_monitoring: Counting messages on: {}'
.format(queues_to_count))
collectd.register_read(read, INTERVAL)
def read(data=None):
starttime = time.time()
overview = cl.get_overview()
# Object counts
for metric_instance in ['channels', 'connections', 'consumers', 'exchanges', 'queues']:
for metric_instance in \
['channels', 'connections', 'consumers', 'exchanges', 'queues']:
metric = collectd.Values()
metric.plugin = 'rabbitmq_monitoring'
metric.interval = INTERVAL
@ -50,7 +62,8 @@ def read(data=None):
metric.dispatch()
# Aggregated Queue message stats
for metric_instance in ['messages', 'messages_ready', 'messages_unacknowledged']:
for metric_instance in \
['messages', 'messages_ready', 'messages_unacknowledged']:
metric = collectd.Values()
metric.plugin = 'rabbitmq_monitoring'
metric.interval = INTERVAL
@ -64,13 +77,20 @@ def read(data=None):
metric.interval = INTERVAL
metric.type = 'gauge'
metric.type_instance = 'queue_total-{}-rate'.format(metric_instance)
metric.values = [overview['queue_totals']['{}_details'.format(metric_instance)]['rate']]
metric.values = \
[
overview['queue_totals']['{}_details'.format(metric_instance)]
['rate']
]
metric.dispatch()
# Aggregated Message Stats
for metric_instance in ['ack', 'confirm', 'deliver', 'deliver_get', 'deliver_no_ack', 'get',
'get_no_ack', 'publish', 'publish_in', 'publish_out', 'redeliver',
'return_unroutable']:
for metric_instance in \
[
'ack', 'confirm', 'deliver', 'deliver_get', 'deliver_no_ack',
'get', 'get_no_ack', 'publish', 'publish_in', 'publish_out',
'redeliver', 'return_unroutable'
]:
metric = collectd.Values()
metric.plugin = 'rabbitmq_monitoring'
metric.interval = INTERVAL
@ -84,12 +104,22 @@ def read(data=None):
metric.interval = INTERVAL
metric.type = 'gauge'
metric.type_instance = 'message_total-{}-rate'.format(metric_instance)
metric.values = [overview['message_stats']['{}_details'.format(metric_instance)]['rate']]
metric.values = \
[
overview['message_stats']['{}_details'.format(metric_instance)]
['rate']
]
metric.dispatch()
# Configurable per-queue message counts
for queue_name in queues_to_count:
messages_detail = cl.get_messages('/', queue_name)
messages_detail = None
try:
messages_detail = cl.get_messages('/', queue_name)
except HTTPError as err:
collectd.error(
'Error Opening Queue [{}] details: {}'
.format(queue_name, err))
if messages_detail is None:
count = 0
else:
@ -104,7 +134,10 @@ def read(data=None):
timediff = time.time() - starttime
if timediff > INTERVAL:
collectd.warning('rabbitmq_monitoring: Took: {} > {}'.format(round(timediff, 2),
INTERVAL))
collectd.warning(
'rabbitmq_monitoring: Took: {} > {}'.format(
round(timediff, 2),
INTERVAL)
)
collectd.register_config(configure)