84f1557566
Currently the Ceph helm tests pass when the deployed Ceph cluster is unhealthy. This change expands the cluster status testing logic to pass when all PGs are active and fail if any PG is inactive. The PG autoscaler is currently causing the deployment to deploy unhealthy Ceph clusters. This change also disables it. It should be re-enabled once those issues are resolved. Change-Id: Iea1ff5006fc00e4570cf67c6af5ef6746a538058
600 lines
15 KiB
YAML
600 lines
15 KiB
YAML
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Default values for ceph-client.
|
|
# This is a YAML-formatted file.
|
|
# Declare name/value pairs to be passed into your templates.
|
|
# name: value
|
|
|
|
---
|
|
deployment:
|
|
ceph: true
|
|
|
|
release_group: null
|
|
|
|
images:
|
|
pull_policy: IfNotPresent
|
|
tags:
|
|
ceph_bootstrap: 'docker.io/openstackhelm/ceph-daemon:ubuntu_bionic-20200521'
|
|
ceph_config_helper: 'docker.io/openstackhelm/ceph-config-helper:ubuntu_bionic-20200521'
|
|
ceph_mds: 'docker.io/openstackhelm/ceph-daemon:ubuntu_bionic-20200521'
|
|
ceph_mgr: 'docker.io/openstackhelm/ceph-daemon:ubuntu_bionic-20200521'
|
|
ceph_rbd_pool: 'docker.io/openstackhelm/ceph-config-helper:ubuntu_bionic-20200521'
|
|
dep_check: 'quay.io/airshipit/kubernetes-entrypoint:v1.0.0'
|
|
image_repo_sync: 'docker.io/docker:17.07.0'
|
|
local_registry:
|
|
active: false
|
|
exclude:
|
|
- dep_check
|
|
- image_repo_sync
|
|
|
|
labels:
|
|
job:
|
|
node_selector_key: openstack-control-plane
|
|
node_selector_value: enabled
|
|
test:
|
|
node_selector_key: openstack-control-plane
|
|
node_selector_value: enabled
|
|
mds:
|
|
node_selector_key: ceph-mds
|
|
node_selector_value: enabled
|
|
mgr:
|
|
node_selector_key: ceph-mgr
|
|
node_selector_value: enabled
|
|
checkdns:
|
|
node_selector_key: ceph-mon
|
|
node_selector_value: enabled
|
|
|
|
pod:
|
|
security_context:
|
|
checkdns:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
checkdns:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
mds:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
init_dirs:
|
|
runAsUser: 0
|
|
readOnlyRootFilesystem: true
|
|
mds:
|
|
runAsUser: 0
|
|
readOnlyRootFilesystem: true
|
|
mgr:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
init_dirs:
|
|
runAsUser: 0
|
|
readOnlyRootFilesystem: true
|
|
mgr:
|
|
runAsUser: 0
|
|
readOnlyRootFilesystem: true
|
|
bootstrap:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
bootstrap:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
rbd_pool:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
rbd_pool:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
test:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
test:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
dns_policy: "ClusterFirstWithHostNet"
|
|
replicas:
|
|
mds: 2
|
|
mgr: 2
|
|
lifecycle:
|
|
upgrades:
|
|
deployments:
|
|
pod_replacement_strategy: RollingUpdate
|
|
revision_history: 3
|
|
rolling_update:
|
|
max_surge: 25%
|
|
max_unavailable: 25%
|
|
updateStrategy:
|
|
mgr:
|
|
type: Recreate
|
|
affinity:
|
|
anti:
|
|
type:
|
|
default: preferredDuringSchedulingIgnoredDuringExecution
|
|
topologyKey:
|
|
default: kubernetes.io/hostname
|
|
weight:
|
|
default: 10
|
|
resources:
|
|
enabled: false
|
|
mds:
|
|
requests:
|
|
memory: "10Mi"
|
|
cpu: "250m"
|
|
limits:
|
|
memory: "50Mi"
|
|
cpu: "500m"
|
|
mgr:
|
|
requests:
|
|
memory: "5Mi"
|
|
cpu: "250m"
|
|
limits:
|
|
memory: "50Mi"
|
|
cpu: "500m"
|
|
checkdns:
|
|
requests:
|
|
memory: "5Mi"
|
|
cpu: "250m"
|
|
limits:
|
|
memory: "50Mi"
|
|
cpu: "500m"
|
|
jobs:
|
|
bootstrap:
|
|
limits:
|
|
memory: "1024Mi"
|
|
cpu: "2000m"
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "500m"
|
|
image_repo_sync:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "1024Mi"
|
|
cpu: "2000m"
|
|
rbd_pool:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "1024Mi"
|
|
cpu: "2000m"
|
|
tests:
|
|
requests:
|
|
memory: "10Mi"
|
|
cpu: "250m"
|
|
limits:
|
|
memory: "50Mi"
|
|
cpu: "500m"
|
|
tolerations:
|
|
checkdns:
|
|
tolerations:
|
|
- effect: NoExecute
|
|
key: node.kubernetes.io/not-ready
|
|
operator: Exists
|
|
tolerationSeconds: 60
|
|
- effect: NoExecute
|
|
key: node.kubernetes.io/unreachable
|
|
operator: Exists
|
|
tolerationSeconds: 60
|
|
mds:
|
|
tolerations:
|
|
- effect: NoExecute
|
|
key: node.kubernetes.io/not-ready
|
|
operator: Exists
|
|
tolerationSeconds: 60
|
|
- effect: NoExecute
|
|
key: node.kubernetes.io/unreachable
|
|
operator: Exists
|
|
tolerationSeconds: 60
|
|
mgr:
|
|
tolerations:
|
|
- effect: NoExecute
|
|
key: node.kubernetes.io/not-ready
|
|
operator: Exists
|
|
tolerationSeconds: 60
|
|
- effect: NoExecute
|
|
key: node.kubernetes.io/unreachable
|
|
operator: Exists
|
|
tolerationSeconds: 60
|
|
|
|
secrets:
|
|
keyrings:
|
|
mon: ceph-mon-keyring
|
|
mds: ceph-bootstrap-mds-keyring
|
|
osd: ceph-bootstrap-osd-keyring
|
|
rgw: ceph-bootstrap-rgw-keyring
|
|
mgr: ceph-bootstrap-mgr-keyring
|
|
admin: ceph-client-admin-keyring
|
|
|
|
network:
|
|
public: 192.168.0.0/16
|
|
cluster: 192.168.0.0/16
|
|
|
|
jobs:
|
|
ceph_defragosds:
|
|
# Execute the 1st of each month
|
|
cron: "0 0 1 * *"
|
|
history:
|
|
# Number of successful job to keep
|
|
successJob: 1
|
|
# Number of failed job to keep
|
|
failJob: 1
|
|
concurrency:
|
|
# Skip new job if previous job still active
|
|
execPolicy: Forbid
|
|
startingDeadlineSecs: 60
|
|
pool_checkPGs:
|
|
# Execute every 15 minutes
|
|
cron: "*/15 * * * *"
|
|
history:
|
|
# Number of successful job to keep
|
|
successJob: 1
|
|
# Number of failed job to keep
|
|
failJob: 1
|
|
concurrency:
|
|
# Skip new job if previous job still active
|
|
execPolicy: Forbid
|
|
startingDeadlineSecs: 60
|
|
|
|
conf:
|
|
features:
|
|
mds: true
|
|
mgr: true
|
|
pg_autoscaler: false
|
|
cluster_flags:
|
|
# List of flags to set or unset separated by spaces
|
|
set: ""
|
|
unset: ""
|
|
pool:
|
|
# NOTE(portdirect): this drives a simple approximation of
|
|
# https://ceph.com/pgcalc/, the `target.osd` key should be set to match the
|
|
# expected number of osds in a cluster, and the `target.pg_per_osd` should be
|
|
# set to match the desired number of placement groups on each OSD.
|
|
crush:
|
|
# NOTE(portdirect): to use RBD devices with Ubuntu 16.04's 4.4.x series
|
|
# kernel this should be set to `hammer`
|
|
tunables: null
|
|
target:
|
|
# NOTE(portdirect): arbitrarily we set the default number of expected OSD's to 5
|
|
# to match the number of nodes in the OSH gate (used only for helm tests).
|
|
osd: 5
|
|
# This is just for helm tests to proceed the deployment if we have mentioned % of
|
|
# osds are up and running.
|
|
required_percent_of_osds: 75
|
|
pg_per_osd: 100
|
|
protected: true
|
|
# NOTE(st053q): target quota should be set to the overall cluster full percentage
|
|
# to be tolerated as a quota (percent full to allow in order to tolerate some
|
|
# level of failure)
|
|
quota: 100
|
|
default:
|
|
# NOTE(supamatt): Accepted values are taken from `crush_rules` list.
|
|
crush_rule: replicated_rule
|
|
crush_rules:
|
|
# NOTE(supamatt): Device classes must remain undefined if all OSDs are the
|
|
# same device type of backing disks (ie, all HDD or all SDD).
|
|
- name: same_host
|
|
crush_rule: create-simple
|
|
failure_domain: osd
|
|
device_class:
|
|
- name: replicated_rule
|
|
crush_rule: create-simple
|
|
failure_domain: host
|
|
device_class:
|
|
- name: rack_replicated_rule
|
|
crush_rule: create-simple
|
|
failure_domain: rack
|
|
device_class:
|
|
# - name: replicated_rule-ssd
|
|
# crush_rule: create-replicated
|
|
# failure_domain: host
|
|
# device_class: sdd
|
|
# - name: replicated_rule-hdd
|
|
# crush_rule: create-replicated
|
|
# failure_domain: host
|
|
# device_class: hdd
|
|
# - name: rack_replicated_rule-ssd
|
|
# crush_rule: create-replicated
|
|
# failure_domain: rack
|
|
# device_class: ssd
|
|
# - name: rack_replicated_rule-hdd
|
|
# crush_rule: create-replicated
|
|
# failure_domain: rack
|
|
# device_class: hdd
|
|
# - name: row_replicated_rule
|
|
# crush_rule: create-simple
|
|
# failure_domain: row
|
|
# device_class:
|
|
|
|
# NOTE(portdirect): this section describes the pools that will be managed by
|
|
# the ceph pool management job, as it tunes the pgs and crush rule, based on
|
|
# the above.
|
|
spec:
|
|
# RBD pool
|
|
- name: rbd
|
|
application: rbd
|
|
replication: 3
|
|
percent_total_data: 40
|
|
# NOTE(supamatt): By default the crush rules used to create each pool will be
|
|
# taken from the pool default `crush_rule` unless a pool specific `crush_rule`
|
|
# is specified. The rule MUST exist for it to be defined here.
|
|
# crush_rule: replicated_rule
|
|
# CephFS pools
|
|
- name: cephfs_metadata
|
|
application: cephfs
|
|
replication: 3
|
|
percent_total_data: 5
|
|
- name: cephfs_data
|
|
application: cephfs
|
|
replication: 3
|
|
percent_total_data: 10
|
|
# RadosGW pools
|
|
- name: .rgw.root
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.control
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.data.root
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.gc
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.log
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.intent-log
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.meta
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.usage
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.users.keys
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.users.email
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.users.swift
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.users.uid
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.buckets.extra
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 0.1
|
|
- name: default.rgw.buckets.index
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 3
|
|
- name: default.rgw.buckets.data
|
|
application: rgw
|
|
replication: 3
|
|
percent_total_data: 34.8
|
|
|
|
ceph:
|
|
global:
|
|
# auth
|
|
cephx: true
|
|
cephx_require_signatures: false
|
|
cephx_cluster_require_signatures: true
|
|
cephx_service_require_signatures: false
|
|
objecter_inflight_op_bytes: "1073741824"
|
|
objecter_inflight_ops: 10240
|
|
debug_ms: "0/0"
|
|
log_file: /dev/stdout
|
|
mon_cluster_log_file: /dev/stdout
|
|
osd:
|
|
osd_mkfs_type: xfs
|
|
osd_mkfs_options_xfs: -f -i size=2048
|
|
osd_max_object_name_len: 256
|
|
ms_bind_port_min: 6800
|
|
ms_bind_port_max: 7100
|
|
|
|
dependencies:
|
|
dynamic:
|
|
common:
|
|
local_image_registry:
|
|
jobs:
|
|
- ceph-client-image-repo-sync
|
|
services:
|
|
- endpoint: node
|
|
service: local_image_registry
|
|
static:
|
|
bootstrap:
|
|
jobs: null
|
|
services:
|
|
- endpoint: internal
|
|
service: ceph_mon
|
|
cephfs_client_key_generator:
|
|
jobs: null
|
|
mds:
|
|
jobs:
|
|
- ceph-storage-keys-generator
|
|
- ceph-mds-keyring-generator
|
|
- ceph-rbd-pool
|
|
services:
|
|
- endpoint: internal
|
|
service: ceph_mon
|
|
mgr:
|
|
jobs:
|
|
- ceph-storage-keys-generator
|
|
- ceph-mgr-keyring-generator
|
|
services:
|
|
- endpoint: internal
|
|
service: ceph_mon
|
|
pool_checkpgs:
|
|
jobs:
|
|
- ceph-rbd-pool
|
|
services:
|
|
- endpoint: internal
|
|
service: ceph_mgr
|
|
checkdns:
|
|
services:
|
|
- endpoint: internal
|
|
service: ceph_mon
|
|
namespace_client_key_cleaner:
|
|
jobs: null
|
|
namespace_client_key_generator:
|
|
jobs: null
|
|
rbd_pool:
|
|
services:
|
|
- endpoint: internal
|
|
service: ceph_mon
|
|
- endpoint: internal
|
|
service: ceph_mgr
|
|
image_repo_sync:
|
|
services:
|
|
- endpoint: internal
|
|
service: local_image_registry
|
|
tests:
|
|
jobs:
|
|
- ceph-rbd-pool
|
|
- ceph-mgr-keyring-generator
|
|
services:
|
|
- endpoint: internal
|
|
service: ceph_mon
|
|
- endpoint: internal
|
|
service: ceph_mgr
|
|
|
|
bootstrap:
|
|
enabled: false
|
|
script: |
|
|
ceph -s
|
|
function ensure_pool () {
|
|
ceph osd pool stats $1 || ceph osd pool create $1 $2
|
|
local test_version=$(ceph tell osd.* version | egrep -c "nautilus|mimic|luminous" | xargs echo)
|
|
if [[ ${test_version} -gt 0 ]]; then
|
|
ceph osd pool application enable $1 $3
|
|
fi
|
|
}
|
|
#ensure_pool volumes 8 cinder
|
|
|
|
# Uncomment below to enable mgr modules
|
|
# For a list of available modules:
|
|
# http://docs.ceph.com/docs/master/mgr/
|
|
# This overrides mgr_initial_modules (default: restful, status)
|
|
# Any module not listed here will be disabled
|
|
ceph_mgr_enabled_modules:
|
|
- restful
|
|
- status
|
|
- prometheus
|
|
- balancer
|
|
- iostat
|
|
- pg_autoscaler
|
|
|
|
# You can configure your mgr modules
|
|
# below. Each module has its own set
|
|
# of key/value. Refer to the doc
|
|
# above for more info. For example:
|
|
ceph_mgr_modules_config:
|
|
# balancer:
|
|
# active: 1
|
|
# prometheus:
|
|
# server_port: 9283
|
|
# server_addr: 0.0.0.0
|
|
# dashboard:
|
|
# port: 7000
|
|
# localpool:
|
|
# failure_domain: host
|
|
# subtree: rack
|
|
# pg_num: "128"
|
|
# num_rep: "3"
|
|
# min_size: "2"
|
|
|
|
endpoints:
|
|
cluster_domain_suffix: cluster.local
|
|
local_image_registry:
|
|
name: docker-registry
|
|
namespace: docker-registry
|
|
hosts:
|
|
default: localhost
|
|
internal: docker-registry
|
|
node: localhost
|
|
host_fqdn_override:
|
|
default: null
|
|
port:
|
|
registry:
|
|
node: 5000
|
|
ceph_mon:
|
|
namespace: null
|
|
hosts:
|
|
default: ceph-mon
|
|
discovery: ceph-mon-discovery
|
|
host_fqdn_override:
|
|
default: null
|
|
port:
|
|
mon:
|
|
default: 6789
|
|
mon_msgr2:
|
|
default: 3300
|
|
ceph_mgr:
|
|
namespace: null
|
|
hosts:
|
|
default: ceph-mgr
|
|
host_fqdn_override:
|
|
default: null
|
|
port:
|
|
mgr:
|
|
default: 7000
|
|
metrics:
|
|
default: 9283
|
|
scheme:
|
|
default: http
|
|
|
|
monitoring:
|
|
prometheus:
|
|
enabled: true
|
|
ceph_mgr:
|
|
scrape: true
|
|
port: 9283
|
|
|
|
manifests:
|
|
configmap_bin: true
|
|
configmap_test_bin: true
|
|
configmap_etc: true
|
|
deployment_mds: true
|
|
deployment_mgr: true
|
|
deployment_checkdns: true
|
|
job_bootstrap: false
|
|
job_cephfs_client_key: true
|
|
job_image_repo_sync: true
|
|
job_rbd_pool: true
|
|
service_mgr: true
|
|
helm_tests: true
|
|
cronjob_checkPGs: true
|
|
cronjob_defragosds: true
|
|
...
|