Revert "Create NRPE check to verify ceph daemons versions"
This reverts commit dfbda68e1add1e8a31ef0e14c043b584532fcd03. Reason for revert: The Ceph version check seems to be missing a consideration of users to execute the nrpe check. It actually fails to get keyrings to execute the command as it's run by a non-root user. $ juju run-action --wait nrpe/0 run-nrpe-check name=check-ceph-daemons-versions unit-nrpe-0: UnitId: nrpe/0 id: "20" results: Stderr: | 2023-02-01T03:03:09.556+0000 7f4677361700 -1 auth: unable to find a keyring on /etc/ceph/ceph.client.admin.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin: (2) No such file or directory 2023-02-01T03:03:09.556+0000 7f4677361700 -1 AuthRegistry(0x7f467005f540) no keyring found at /etc/ceph/ceph.client.admin.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin, disabling cephx 2023-02-01T03:03:09.556+0000 7f4677361700 -1 auth: unable to find a keyring on /etc/ceph/ceph.client.admin.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin: (2) No such file or directory 2023-02-01T03:03:09.556+0000 7f4677361700 -1 AuthRegistry(0x7f4670064d88) no keyring found at /etc/ceph/ceph.client.admin.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin, disabling cephx 2023-02-01T03:03:09.560+0000 7f4677361700 -1 auth: unable to find a keyring on /etc/ceph/ceph.client.admin.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin: (2) No such file or directory 2023-02-01T03:03:09.560+0000 7f4677361700 -1 AuthRegistry(0x7f4677360000) no keyring found at /etc/ceph/ceph.client.admin.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin, disabling cephx [errno 2] RADOS object not found (error connecting to the cluster) check-output: 'UNKNOWN: could not determine OSDs versions, error: Command ''[''ceph'', ''versions'']'' returned non-zero exit status 1.' status: completed timing: completed: 2023-02-01 03:03:10 +0000 UTC enqueued: 2023-02-01 03:03:09 +0000 UTC started: 2023-02-01 03:03:09 +0000 UTC Related-Bug: #1943628 Change-Id: I84b306e84661e6664e8a69fa93dfdb02fa4f1e7e
This commit is contained in:
parent
87600a9c31
commit
c9389a8cd0
@ -4,8 +4,6 @@ resume-health:
|
||||
description: "Resume ceph health operations across the entire ceph cluster"
|
||||
get-health:
|
||||
description: "Output the current cluster health reported by `ceph health`"
|
||||
get-versions-report:
|
||||
description: "Outputs running daemon versions for all cluster members"
|
||||
create-cache-tier:
|
||||
description: "Create a new cache tier"
|
||||
params:
|
||||
|
@ -23,11 +23,6 @@ from charmhelpers.contrib.storage.linux.ceph import pool_set, \
|
||||
set_pool_quota, snapshot_pool, remove_pool_snapshot
|
||||
|
||||
|
||||
class CephReportError(Exception):
|
||||
"""This indicates a critical error."""
|
||||
pass
|
||||
|
||||
|
||||
def list_pools():
|
||||
"""Return a list of all Ceph pools."""
|
||||
try:
|
||||
@ -37,52 +32,6 @@ def list_pools():
|
||||
action_fail(str(e))
|
||||
|
||||
|
||||
def get_versions_report():
|
||||
"""
|
||||
Return a mapping of hosts and their related ceph daemon versions.
|
||||
|
||||
On error, raise a CephReportError.
|
||||
"""
|
||||
report = dict()
|
||||
try:
|
||||
output = check_output(['ceph', 'node', 'ls']).decode('UTF-8')
|
||||
except CalledProcessError as e:
|
||||
action_fail(str(e))
|
||||
raise(CephReportError("Getting nodes list fail"))
|
||||
nodes_list = json.loads(output)
|
||||
|
||||
# osd versions
|
||||
for osd_host, osds in nodes_list['osd'].items():
|
||||
report.setdefault(osd_host, [])
|
||||
for osd in osds:
|
||||
try:
|
||||
output = check_output(['ceph', 'tell',
|
||||
"osd.{}".format(osd),
|
||||
'version']).decode('UTF-8')
|
||||
except CalledProcessError:
|
||||
raise(
|
||||
CephReportError("Getting osd.{} version fail".format(osd))
|
||||
)
|
||||
report[osd_host].append(json.loads(output)['version'])
|
||||
|
||||
# mon versions
|
||||
for mon_host, mons in nodes_list['mon'].items():
|
||||
report.setdefault(mon_host, [])
|
||||
for mon in mons:
|
||||
try:
|
||||
output = check_output(['ceph', 'tell',
|
||||
"mon.{}".format(mon),
|
||||
'version']).decode('UTF-8')
|
||||
except CalledProcessError as e:
|
||||
action_fail(str(e))
|
||||
raise(
|
||||
CephReportError("Getting mon.{} version fail".format(mon))
|
||||
)
|
||||
report[mon_host].append(json.loads(output)['version'])
|
||||
|
||||
return json.dumps(report, indent=4)
|
||||
|
||||
|
||||
def pool_get():
|
||||
"""
|
||||
Returns a key from a pool using 'ceph osd pool get'.
|
||||
|
@ -1 +0,0 @@
|
||||
get_versions_report.py
|
@ -1,26 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright 2022 Canonical Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from ceph_ops import get_versions_report, CephReportError
|
||||
from charmhelpers.core.hookenv import log, action_set, action_fail
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
action_set({'message': get_versions_report()})
|
||||
except CephReportError as e:
|
||||
log(e)
|
||||
action_fail(
|
||||
"get versions report failed with message: {}".format(str(e)))
|
@ -86,32 +86,6 @@ def get_ceph_version():
|
||||
return out_version
|
||||
|
||||
|
||||
def get_daemons_versions():
|
||||
"""
|
||||
Uses CLI to get the ceph versions
|
||||
|
||||
:returns: set containing tuple of integers,
|
||||
all the differents versions encountered in the cluster
|
||||
:raises: UnknownError
|
||||
"""
|
||||
try:
|
||||
tree = subprocess.check_output(['ceph',
|
||||
'versions']).decode('UTF-8')
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise UnknownError(
|
||||
"UNKNOWN: could not determine OSDs versions, error: {}".format(e))
|
||||
ceph_versions = json.loads(tree)
|
||||
# ceph version command return a json output
|
||||
# containing version of all daemons connected to the cluster
|
||||
# here we parse the overall field,
|
||||
# to get a set of all versions seen by the cluster
|
||||
daemons_versions = set(map(
|
||||
lambda x: tuple(int(i) for i in
|
||||
x.split(' ')[2].split('.')),
|
||||
ceph_versions['overall'].keys()))
|
||||
return daemons_versions
|
||||
|
||||
|
||||
def get_status_and_messages(status_data):
|
||||
"""
|
||||
Used to get general status of a Ceph cluster as well as a list of
|
||||
@ -161,50 +135,6 @@ def check_ceph_status(args):
|
||||
"""
|
||||
|
||||
status_critical = False
|
||||
# if it is just --check_daemons_versions_consistency,
|
||||
# deal with it and ignore overall health
|
||||
if args.check_daemons_versions_consistency:
|
||||
daemons_versions = get_daemons_versions()
|
||||
# we check that the osds have same versions
|
||||
num_of_versions = len(daemons_versions)
|
||||
if num_of_versions == 1:
|
||||
message_ok = "OK: All versions alligned"
|
||||
return message_ok
|
||||
else:
|
||||
# version diverged
|
||||
# we check if major release are the same
|
||||
# by parsing version number in the daemon_version set
|
||||
# and keeping major version number or coverting the minor
|
||||
# version number if major version is 0
|
||||
num_of_releases = set(map(lambda x: x[0], daemons_versions))
|
||||
if len(num_of_releases) == 1:
|
||||
msg = 'WARNING: Components minor versions diverged.'
|
||||
'Run get-versions-report to know more'
|
||||
raise WarnError(msg)
|
||||
else:
|
||||
# Releases diverged
|
||||
major, _minor, _patch = get_ceph_version()
|
||||
release_versions_diff = list(map(lambda x: major - x,
|
||||
num_of_releases))
|
||||
if max(release_versions_diff) >= 2:
|
||||
msg = "CRITICAL: A component is " \
|
||||
"{} version behind osd leader" \
|
||||
". Run get-versions-report to know more".format(
|
||||
max(release_versions_diff))
|
||||
raise CriticalError(msg)
|
||||
if min(release_versions_diff) <= -1:
|
||||
msg = "CRITICAL: A component is " \
|
||||
"{} version ahead osd leader" \
|
||||
". Run get-versions-report to know more".format(
|
||||
abs(min(release_versions_diff)))
|
||||
raise CriticalError(msg)
|
||||
if max(release_versions_diff) == 1:
|
||||
msg = "WARNING: A component is " \
|
||||
"{} version behind osd leader" \
|
||||
". Run get-versions-report to know more".format(
|
||||
max(release_versions_diff))
|
||||
raise WarnError(msg)
|
||||
|
||||
if args.status_file:
|
||||
check_file_freshness(args.status_file)
|
||||
with open(args.status_file) as f:
|
||||
@ -357,11 +287,6 @@ def parse_args(args):
|
||||
dest='check_num_osds', default=False,
|
||||
action='store_true',
|
||||
help="Check whether all OSDs are up and in")
|
||||
parser.add_argument('--check_daemons_versions_consistency',
|
||||
dest='check_daemons_versions_consistency',
|
||||
default=False,
|
||||
action='store_true',
|
||||
help="Check all OSDs versions")
|
||||
|
||||
return parser.parse_args(args)
|
||||
|
||||
|
@ -1185,14 +1185,6 @@ def update_nrpe_config():
|
||||
description='Check whether all OSDs are up and in',
|
||||
check_cmd=check_cmd
|
||||
)
|
||||
if is_leader():
|
||||
check_cmd = 'check_ceph_status.py -f {}' \
|
||||
' --check_daemons_versions'.format(STATUS_FILE)
|
||||
nrpe_setup.add_check(
|
||||
shortname='ceph_daemons_versions',
|
||||
description='Check wheter all ceph daemons versions are alligned',
|
||||
check_cmd=check_cmd
|
||||
)
|
||||
nrpe_setup.write()
|
||||
|
||||
|
||||
|
@ -1,35 +0,0 @@
|
||||
{
|
||||
"mon": {
|
||||
"juju-c8b0a2-3-lxd-0": [
|
||||
"juju-c8b0a2-3-lxd-0"
|
||||
],
|
||||
"juju-c8b0a2-4-lxd-0": [
|
||||
"juju-c8b0a2-4-lxd-0"
|
||||
],
|
||||
"juju-c8b0a2-5-lxd-0": [
|
||||
"juju-c8b0a2-5-lxd-0"
|
||||
]
|
||||
},
|
||||
"osd": {
|
||||
"aware-bee": [
|
||||
1
|
||||
],
|
||||
"grand-ape": [
|
||||
0
|
||||
],
|
||||
"lucky-muskox": [
|
||||
2
|
||||
]
|
||||
},
|
||||
"mgr": {
|
||||
"juju-c8b0a2-3-lxd-0": [
|
||||
"juju-c8b0a2-3-lxd-0"
|
||||
],
|
||||
"juju-c8b0a2-4-lxd-0": [
|
||||
"juju-c8b0a2-4-lxd-0"
|
||||
],
|
||||
"juju-c8b0a2-5-lxd-0": [
|
||||
"juju-c8b0a2-5-lxd-0"
|
||||
]
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
{
|
||||
"mon": {
|
||||
"ceph version 16.2.7 (dd0603118f56ab514f133c8d2e3adfc983942503) pacific (stable)": 3
|
||||
},
|
||||
"mgr": {
|
||||
"ceph version 16.2.7 (dd0603118f56ab514f133c8d2e3adfc983942503) pacific (stable)": 3
|
||||
},
|
||||
"osd": {
|
||||
"ceph version 16.2.7 (dd0603118f56ab514f133c8d2e3adfc983942503) pacific (stable)": 2
|
||||
},
|
||||
"mds": {},
|
||||
"overall": {
|
||||
"ceph version 16.2.7 (dd0603118f56ab514f133c8d2e3adfc983942503) pacific (stable)": 8
|
||||
}
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
{
|
||||
"mon": {
|
||||
"ceph version 15.2.16 (d46a73d6d0a67a79558054a3a5a72cb561724974) octopus (stable)": 1,
|
||||
"ceph version 17.2.0 (43e2e60a7559d3f46c9d53f1ca875fd499a1e35e) quincy (stable)": 2
|
||||
},
|
||||
"mgr": {
|
||||
"ceph version 15.2.16 (d46a73d6d0a67a79558054a3a5a72cb561724974) octopus (stable)": 3
|
||||
},
|
||||
"osd": {
|
||||
"ceph version 16.2.7 (dd0603118f56ab514f133c8d2e3adfc983942503) pacific (stable)": 3,
|
||||
"ceph version 17.2.0 (43e2e60a7559d3f46c9d53f1ca875fd499a1e35e) quincy (stable)": 2
|
||||
},
|
||||
"mds": {},
|
||||
"overall": {
|
||||
"ceph version 15.2.16 (d46a73d6d0a67a79558054a3a5a72cb561724974) octopus (stable)": 4,
|
||||
"ceph version 16.2.7 (dd0603118f56ab514f133c8d2e3adfc983942503) pacific (stable)": 3,
|
||||
"ceph version 17.2.0 (43e2e60a7559d3f46c9d53f1ca875fd499a1e35e) quincy (stable)": 4
|
||||
}
|
||||
}
|
@ -13,7 +13,6 @@
|
||||
import json
|
||||
import sys
|
||||
import unittest.mock as mock
|
||||
from subprocess import CalledProcessError
|
||||
|
||||
from test_utils import CharmTestCase
|
||||
|
||||
@ -49,45 +48,6 @@ class OpsTestCase(CharmTestCase):
|
||||
"action_fail",
|
||||
"open"])
|
||||
|
||||
def test_get_version_report_ok(self):
|
||||
def _call_rslt():
|
||||
with open('unit_tests/ceph_ls_node.json') as f:
|
||||
tree = f.read()
|
||||
yield tree.encode('UTF-8')
|
||||
while True:
|
||||
yield ('{'
|
||||
' "version": "16.2.7",'
|
||||
' "release": "pacific",'
|
||||
' "release_type": "stable"'
|
||||
'}').encode('UTF-8')
|
||||
self.check_output.side_effect = _call_rslt()
|
||||
result = actions.get_versions_report()
|
||||
self.assertEqual('{\n'
|
||||
' "aware-bee": [\n'
|
||||
' "16.2.7"\n'
|
||||
' ],\n'
|
||||
' "grand-ape": [\n'
|
||||
' "16.2.7"\n'
|
||||
' ],\n'
|
||||
' "lucky-muskox": [\n'
|
||||
' "16.2.7"\n'
|
||||
' ],\n'
|
||||
' "juju-c8b0a2-3-lxd-0": [\n'
|
||||
' "16.2.7"\n'
|
||||
' ],\n'
|
||||
' "juju-c8b0a2-4-lxd-0": [\n'
|
||||
' "16.2.7"\n'
|
||||
' ],\n'
|
||||
' "juju-c8b0a2-5-lxd-0": [\n'
|
||||
' "16.2.7"\n'
|
||||
' ]\n'
|
||||
'}', result)
|
||||
|
||||
def test_get_version_report_fail(self):
|
||||
self.check_output.side_effect = CalledProcessError(1, 'ceph node ls')
|
||||
self.assertRaises(actions.CephReportError,
|
||||
lambda: actions.get_versions_report())
|
||||
|
||||
@mock.patch('socket.gethostname')
|
||||
def test_get_quorum_status(self, mock_hostname):
|
||||
mock_hostname.return_value = 'mockhost'
|
||||
|
@ -17,7 +17,6 @@ import os
|
||||
import sys
|
||||
|
||||
from unittest.mock import patch
|
||||
from subprocess import CalledProcessError
|
||||
|
||||
# import the module we want to test
|
||||
os.sys.path.insert(1, os.path.join(sys.path[0], 'files/nagios'))
|
||||
@ -26,90 +25,6 @@ import check_ceph_status
|
||||
|
||||
@patch('subprocess.check_output')
|
||||
class NagiosTestCase(unittest.TestCase):
|
||||
def test_get_daemons_versions_alligned(self, mock_subprocess):
|
||||
with open('unit_tests/ceph_versions_alligned.json', 'rb') as f:
|
||||
mock_subprocess.return_value = f.read()
|
||||
osds_versions = check_ceph_status.get_daemons_versions()
|
||||
self.assertEqual(osds_versions, set([(16, 2, 7)]))
|
||||
|
||||
def test_get_daemons_versions_diverged(self, mock_subprocess):
|
||||
with open('unit_tests/ceph_versions_diverged.json', 'rb') as f:
|
||||
mock_subprocess.return_value = f.read()
|
||||
osds_versions = check_ceph_status.get_daemons_versions()
|
||||
self.assertEqual(osds_versions, set([(16, 2, 7), (17, 2, 0),
|
||||
(15, 2, 16)]))
|
||||
|
||||
def test_get_daemons_versions_exeption(self, mock_subprocess):
|
||||
mock_subprocess.side_effect = CalledProcessError(1, 'ceph versions')
|
||||
self.assertRaises(check_ceph_status.UnknownError,
|
||||
lambda: check_ceph_status.get_daemons_versions())
|
||||
|
||||
# Version Alligned
|
||||
@patch('check_ceph_status.get_daemons_versions')
|
||||
def test_versions_alligned(self, mock_daemons_versions, mock_subprocess):
|
||||
mock_subprocess.return_value = 'ceph version 16.2.7 ' \
|
||||
'(dd0603118f56ab514f133c8d2e3adfc983942503)'.encode('UTF-8')
|
||||
mock_daemons_versions.return_value = set([(16, 2, 7)])
|
||||
args = check_ceph_status.parse_args([
|
||||
'--check_daemons_versions_consistency'])
|
||||
check_output = check_ceph_status.check_ceph_status(args)
|
||||
self.assertRegex(check_output, r"^OK: All versions alligned$")
|
||||
|
||||
# Minor version diverged
|
||||
@patch('check_ceph_status.get_daemons_versions')
|
||||
def test_min_versions_diverged(self, mock_daemons_versions,
|
||||
mock_subprocess):
|
||||
mock_subprocess.return_value = 'ceph version 16.2.7 ' \
|
||||
'(dd0603118f56ab514f133c8d2e3adfc983942503)'.encode('UTF-8')
|
||||
mock_daemons_versions.return_value = set([(16, 2, 7), (16, 1, 7)])
|
||||
args = check_ceph_status.parse_args([
|
||||
'--check_daemons_versions_consistency'])
|
||||
self.assertRaises(check_ceph_status.WarnError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
# Major version ahead
|
||||
@patch('check_ceph_status.get_daemons_versions')
|
||||
def test_one_version_ahead(self, mock_daemons_versions, mock_subprocess):
|
||||
mock_subprocess.return_value = 'ceph version 16.2.7 ' \
|
||||
'(dd0603118f56ab514f133c8d2e3adfc983942503)'.encode('UTF-8')
|
||||
mock_daemons_versions.return_value = set([(16, 2, 7), (17, 2, 0)])
|
||||
args = check_ceph_status.parse_args([
|
||||
'--check_daemons_versions_consistency'])
|
||||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
# Two major version ahead
|
||||
@patch('check_ceph_status.get_daemons_versions')
|
||||
def test_two_version_ahead(self, mock_daemons_versions, mock_subprocess):
|
||||
mock_subprocess.return_value = 'ceph version 15.2.16 ' \
|
||||
'(d46a73d6d0a67a79558054a3a5a72cb561724974)'.encode('UTF-8')
|
||||
mock_daemons_versions.return_value = set([(15, 2, 16), (17, 2, 0)])
|
||||
args = check_ceph_status.parse_args([
|
||||
'--check_daemons_versions_consistency'])
|
||||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
# Major version behind
|
||||
@patch('check_ceph_status.get_daemons_versions')
|
||||
def test_version_behind(self, mock_daemons_versions, mock_subprocess):
|
||||
mock_subprocess.return_value = 'ceph version 16.2.7 ' \
|
||||
'(dd0603118f56ab514f133c8d2e3adfc983942503)'.encode('UTF-8')
|
||||
mock_daemons_versions.return_value = set([(15, 2, 16), (16, 2, 7)])
|
||||
args = check_ceph_status.parse_args([
|
||||
'--check_daemons_versions_consistency'])
|
||||
self.assertRaises(check_ceph_status.WarnError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
# Two major version behind
|
||||
@patch('check_ceph_status.get_daemons_versions')
|
||||
def test_two_version_behind(self, mock_daemons_versions, mock_subprocess):
|
||||
mock_subprocess.return_value = 'ceph version 17.2.0 ' \
|
||||
'(43e2e60a7559d3f46c9d53f1ca875fd499a1e35e)'.encode('UTF-8')
|
||||
mock_daemons_versions.return_value = set([(15, 2, 16), (17, 2, 0)])
|
||||
args = check_ceph_status.parse_args([
|
||||
'--check_daemons_versions_consistency'])
|
||||
self.assertRaises(check_ceph_status.CriticalError,
|
||||
lambda: check_ceph_status.check_ceph_status(args))
|
||||
|
||||
def test_get_ceph_version(self, mock_subprocess):
|
||||
mock_subprocess.return_value = 'ceph version 10.2.9 ' \
|
||||
|
Loading…
x
Reference in New Issue
Block a user