Merge "Update the collectd plugins to be more robust."

This commit is contained in:
Jenkins 2017-07-13 11:49:22 +00:00 committed by Gerrit Code Review
commit 462e506150
2 changed files with 77 additions and 37 deletions

View File

@ -29,12 +29,13 @@ def configure(configobj):
collectd.info('gnocchi_status: Interval: {}'.format(INTERVAL)) collectd.info('gnocchi_status: Interval: {}'.format(INTERVAL))
collectd.register_read(read, INTERVAL) collectd.register_read(read, INTERVAL)
def read(data=None): def read(data=None):
starttime = time.time() starttime = time.time()
gnocchi = client.Client(session=keystone_session) gnocchi = client.Client(session=keystone_session)
try:
status = gnocchi.status.get() status = gnocchi.status.get()
metric = collectd.Values() metric = collectd.Values()
metric.plugin = 'gnocchi_status' metric.plugin = 'gnocchi_status'
metric.interval = INTERVAL metric.interval = INTERVAL
@ -50,16 +51,21 @@ def read(data=None):
metric.type_instance = 'metrics' metric.type_instance = 'metrics'
metric.values = [status['storage']['summary']['metrics']] metric.values = [status['storage']['summary']['metrics']]
metric.dispatch() metric.dispatch()
except Exception as err:
collectd.error(
'gnocchi_status: Exception getting status: {}'
.format(err))
timediff = time.time() - starttime timediff = time.time() - starttime
if timediff > INTERVAL: if timediff > INTERVAL:
collectd.warning('gnocchi_status: Took: {} > {}'.format(round(timediff, 2), collectd.warning(
INTERVAL)) 'gnocchi_status: Took: {} > {}'
.format(round(timediff, 2), INTERVAL))
def create_keystone_session(): def create_keystone_session():
auth = v2.Password(username=os_username, auth = v2.Password(
password=os_password, username=os_username, password=os_password, tenant_name=os_tenant,
tenant_name=os_tenant,
auth_url=os_auth_url) auth_url=os_auth_url)
return session.Session(auth=auth) return session.Session(auth=auth)
@ -70,7 +76,8 @@ if os_tenant is None:
os_tenant = os.environ.get('OS_PROJECT_NAME') os_tenant = os.environ.get('OS_PROJECT_NAME')
os_auth_url = os.environ.get('OS_AUTH_URL') os_auth_url = os.environ.get('OS_AUTH_URL')
collectd.info('gnocchi_status: Connecting with user={}, password={}, tenant/project={}, ' collectd.info(
'gnocchi_status: Connecting with user={}, password={}, tenant={}, '
'auth_url={}'.format(os_username, os_password, os_tenant, os_auth_url)) 'auth_url={}'.format(os_username, os_password, os_tenant, os_auth_url))
keystone_session = create_keystone_session() keystone_session = create_keystone_session()

View File

@ -10,12 +10,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Collectd python plugin to read rabbitmq metrics from rabbitmq management plugin.""" """Collectd python plugin to read rabbitmq metrics from rabbitmq management
plugin.
"""
from pyrabbit.api import Client from pyrabbit.api import Client
from pyrabbit.http import HTTPError
import collectd import collectd
import os import os
import time import time
def configure(configobj): def configure(configobj):
global INTERVAL global INTERVAL
global cl global cl
@ -27,20 +31,28 @@ def configure(configobj):
port = int(config['port'][0]) port = int(config['port'][0])
username = config['username'][0] username = config['username'][0]
password = config['password'][0] password = config['password'][0]
queues_to_count = []
if 'message_count' in config:
queues_to_count = config['message_count'] queues_to_count = config['message_count']
collectd.info('rabbitmq_monitoring: Interval: {}'.format(INTERVAL)) collectd.info('rabbitmq_monitoring: Interval: {}'.format(INTERVAL))
cl = Client('{}:{}'.format(host, port), username, password) cl = Client('{}:{}'.format(host, port), username, password)
collectd.info('rabbitmq_monitoring: Connecting to: {}:{} as user:{} password:{}'.format(host, port, username, password)) collectd.info(
collectd.info('rabbitmq_monitoring: Counting messages on: {}'.format(queues_to_count)) 'rabbitmq_monitoring: Connecting to: {}:{} as user:{} password:{}'
.format(host, port, username, password))
collectd.info(
'rabbitmq_monitoring: Counting messages on: {}'
.format(queues_to_count))
collectd.register_read(read, INTERVAL) collectd.register_read(read, INTERVAL)
def read(data=None): def read(data=None):
starttime = time.time() starttime = time.time()
overview = cl.get_overview() overview = cl.get_overview()
# Object counts # Object counts
for metric_instance in ['channels', 'connections', 'consumers', 'exchanges', 'queues']: for metric_instance in \
['channels', 'connections', 'consumers', 'exchanges', 'queues']:
metric = collectd.Values() metric = collectd.Values()
metric.plugin = 'rabbitmq_monitoring' metric.plugin = 'rabbitmq_monitoring'
metric.interval = INTERVAL metric.interval = INTERVAL
@ -50,7 +62,8 @@ def read(data=None):
metric.dispatch() metric.dispatch()
# Aggregated Queue message stats # Aggregated Queue message stats
for metric_instance in ['messages', 'messages_ready', 'messages_unacknowledged']: for metric_instance in \
['messages', 'messages_ready', 'messages_unacknowledged']:
metric = collectd.Values() metric = collectd.Values()
metric.plugin = 'rabbitmq_monitoring' metric.plugin = 'rabbitmq_monitoring'
metric.interval = INTERVAL metric.interval = INTERVAL
@ -64,13 +77,20 @@ def read(data=None):
metric.interval = INTERVAL metric.interval = INTERVAL
metric.type = 'gauge' metric.type = 'gauge'
metric.type_instance = 'queue_total-{}-rate'.format(metric_instance) metric.type_instance = 'queue_total-{}-rate'.format(metric_instance)
metric.values = [overview['queue_totals']['{}_details'.format(metric_instance)]['rate']] metric.values = \
[
overview['queue_totals']['{}_details'.format(metric_instance)]
['rate']
]
metric.dispatch() metric.dispatch()
# Aggregated Message Stats # Aggregated Message Stats
for metric_instance in ['ack', 'confirm', 'deliver', 'deliver_get', 'deliver_no_ack', 'get', for metric_instance in \
'get_no_ack', 'publish', 'publish_in', 'publish_out', 'redeliver', [
'return_unroutable']: 'ack', 'confirm', 'deliver', 'deliver_get', 'deliver_no_ack',
'get', 'get_no_ack', 'publish', 'publish_in', 'publish_out',
'redeliver', 'return_unroutable'
]:
metric = collectd.Values() metric = collectd.Values()
metric.plugin = 'rabbitmq_monitoring' metric.plugin = 'rabbitmq_monitoring'
metric.interval = INTERVAL metric.interval = INTERVAL
@ -84,12 +104,22 @@ def read(data=None):
metric.interval = INTERVAL metric.interval = INTERVAL
metric.type = 'gauge' metric.type = 'gauge'
metric.type_instance = 'message_total-{}-rate'.format(metric_instance) metric.type_instance = 'message_total-{}-rate'.format(metric_instance)
metric.values = [overview['message_stats']['{}_details'.format(metric_instance)]['rate']] metric.values = \
[
overview['message_stats']['{}_details'.format(metric_instance)]
['rate']
]
metric.dispatch() metric.dispatch()
# Configurable per-queue message counts # Configurable per-queue message counts
for queue_name in queues_to_count: for queue_name in queues_to_count:
messages_detail = None
try:
messages_detail = cl.get_messages('/', queue_name) messages_detail = cl.get_messages('/', queue_name)
except HTTPError as err:
collectd.error(
'Error Opening Queue [{}] details: {}'
.format(queue_name, err))
if messages_detail is None: if messages_detail is None:
count = 0 count = 0
else: else:
@ -104,7 +134,10 @@ def read(data=None):
timediff = time.time() - starttime timediff = time.time() - starttime
if timediff > INTERVAL: if timediff > INTERVAL:
collectd.warning('rabbitmq_monitoring: Took: {} > {}'.format(round(timediff, 2), collectd.warning(
INTERVAL)) 'rabbitmq_monitoring: Took: {} > {}'.format(
round(timediff, 2),
INTERVAL)
)
collectd.register_config(configure) collectd.register_config(configure)