openstack-helm-infra/ceph-client/values.yaml
Parsons, Cliff (cp769u) f20eff164f Allow Ceph RBD pool job to leave failed pods
This patchset will add the capability to configure the
Ceph RBD pool job to leave failed pods behind for debugging
purposes, if it is desired. Default is to not leave them
behind, which is the current behavior.

Change-Id: Ife63b73f89996d59b75ec617129818068b060d1c
2021-03-29 19:38:55 +00:00

625 lines
17 KiB
YAML

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for ceph-client.
# This is a YAML-formatted file.
# Declare name/value pairs to be passed into your templates.
# name: value
---
deployment:
ceph: true
release_group: null
images:
pull_policy: IfNotPresent
tags:
ceph_bootstrap: 'docker.io/openstackhelm/ceph-daemon:change_770201_ubuntu_bionic-20210113'
ceph_config_helper: 'docker.io/openstackhelm/ceph-config-helper:change_770201_ubuntu_bionic-20210113'
ceph_mds: 'docker.io/openstackhelm/ceph-daemon:change_770201_ubuntu_bionic-20210113'
ceph_mgr: 'docker.io/openstackhelm/ceph-daemon:change_770201_ubuntu_bionic-20210113'
ceph_rbd_pool: 'docker.io/openstackhelm/ceph-config-helper:change_770201_ubuntu_bionic-20210113'
dep_check: 'quay.io/airshipit/kubernetes-entrypoint:v1.0.0'
image_repo_sync: 'docker.io/docker:17.07.0'
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
test:
node_selector_key: openstack-control-plane
node_selector_value: enabled
mds:
node_selector_key: ceph-mds
node_selector_value: enabled
mgr:
node_selector_key: ceph-mgr
node_selector_value: enabled
checkdns:
node_selector_key: ceph-mon
node_selector_value: enabled
pod:
security_context:
checkdns:
pod:
runAsUser: 65534
container:
checkdns:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
mds:
pod:
runAsUser: 65534
container:
init_dirs:
runAsUser: 0
readOnlyRootFilesystem: true
mds:
runAsUser: 64045
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
mgr:
pod:
runAsUser: 65534
container:
init_dirs:
runAsUser: 0
readOnlyRootFilesystem: true
mgr:
runAsUser: 64045
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
bootstrap:
pod:
runAsUser: 65534
container:
bootstrap:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
rbd_pool:
pod:
runAsUser: 65534
container:
rbd_pool:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
test:
pod:
runAsUser: 65534
container:
test:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
dns_policy: "ClusterFirstWithHostNet"
replicas:
mds: 2
mgr: 2
lifecycle:
upgrades:
deployments:
pod_replacement_strategy: RollingUpdate
revision_history: 3
rolling_update:
max_surge: 25%
max_unavailable: 25%
updateStrategy:
mgr:
type: Recreate
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
weight:
default: 10
resources:
enabled: false
mds:
requests:
memory: "10Mi"
cpu: "250m"
limits:
memory: "50Mi"
cpu: "500m"
mgr:
requests:
memory: "5Mi"
cpu: "250m"
limits:
memory: "50Mi"
cpu: "500m"
checkdns:
requests:
memory: "5Mi"
cpu: "250m"
limits:
memory: "50Mi"
cpu: "500m"
jobs:
bootstrap:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "500m"
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
rbd_pool:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
tests:
requests:
memory: "10Mi"
cpu: "250m"
limits:
memory: "50Mi"
cpu: "500m"
tolerations:
checkdns:
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 60
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 60
mds:
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 60
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 60
mgr:
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 60
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 60
secrets:
keyrings:
mon: ceph-mon-keyring
mds: ceph-bootstrap-mds-keyring
osd: ceph-bootstrap-osd-keyring
rgw: ceph-bootstrap-rgw-keyring
mgr: ceph-bootstrap-mgr-keyring
admin: ceph-client-admin-keyring
network:
public: 192.168.0.0/16
cluster: 192.168.0.0/16
jobs:
ceph_defragosds:
# Execute the 1st of each month
cron: "0 0 1 * *"
history:
# Number of successful job to keep
successJob: 1
# Number of failed job to keep
failJob: 1
concurrency:
# Skip new job if previous job still active
execPolicy: Forbid
startingDeadlineSecs: 60
pool_checkPGs:
# Execute every 15 minutes
cron: "*/15 * * * *"
history:
# Number of successful job to keep
successJob: 1
# Number of failed job to keep
failJob: 1
concurrency:
# Skip new job if previous job still active
execPolicy: Forbid
startingDeadlineSecs: 60
rbd_pool:
restartPolicy: OnFailure
conf:
features:
mds: true
mgr: true
pg_autoscaler: true
cluster_flags:
# List of flags to set or unset separated by spaces
set: ""
unset: ""
pool:
# NOTE(portdirect): this drives a simple approximation of
# https://ceph.com/pgcalc/, the `target.osd` key should be set to match the
# expected number of osds in a cluster, and the `target.pg_per_osd` should be
# set to match the desired number of placement groups on each OSD.
crush:
# NOTE(portdirect): to use RBD devices with Ubuntu 16.04's 4.4.x series
# kernel this should be set to `hammer`
tunables: null
target:
# NOTE(portdirect): arbitrarily we set the default number of expected OSD's to 5
# to match the number of nodes in the OSH gate.
osd: 5
# This the number of OSDs expected in the final state. This is to allow the above
# target to be smaller initially in the event of a partial deployment. This way
# helm tests can still pass at deployment time and pool quotas can be set based on
# the expected final state (actual target quota = final_osd / osd * quota).
final_osd: 5
# This is just for helm tests to proceed the deployment if we have mentioned % of
# osds are up and running.
required_percent_of_osds: 75
pg_per_osd: 100
# NOTE(bw6938): When pools are created with the autoscaler enabled, a pg_num_min
# value specifies the minimum value of pg_num that the autoscaler will target.
# That default was recently changed from 8 to 32 which severely limits the number
# of pools in a small cluster per https://github.com/rook/rook/issues/5091. This change
# overrides the default pg_num_min value of 32 with a value of 8, matching the default
# pg_num value of 8.
pg_num_min: 8
protected: true
# NOTE(st053q): target quota should be set to the overall cluster full percentage
# to be tolerated as a quota (percent full to allow in order to tolerate some
# level of failure)
# Set target quota to "0" (must be quoted) to remove quotas for all pools
quota: 100
default:
# NOTE(supamatt): Accepted values are taken from `crush_rules` list.
crush_rule: replicated_rule
crush_rules:
# NOTE(supamatt): Device classes must remain undefined if all OSDs are the
# same device type of backing disks (ie, all HDD or all SDD).
- name: same_host
crush_rule: create-simple
failure_domain: osd
device_class:
- name: replicated_rule
crush_rule: create-simple
failure_domain: host
device_class:
- name: rack_replicated_rule
crush_rule: create-simple
failure_domain: rack
device_class:
# - name: replicated_rule-ssd
# crush_rule: create-replicated
# failure_domain: host
# device_class: sdd
# - name: replicated_rule-hdd
# crush_rule: create-replicated
# failure_domain: host
# device_class: hdd
# - name: rack_replicated_rule-ssd
# crush_rule: create-replicated
# failure_domain: rack
# device_class: ssd
# - name: rack_replicated_rule-hdd
# crush_rule: create-replicated
# failure_domain: rack
# device_class: hdd
# - name: row_replicated_rule
# crush_rule: create-simple
# failure_domain: row
# device_class:
# NOTE(portdirect): this section describes the pools that will be managed by
# the ceph pool management job, as it tunes the pgs and crush rule, based on
# the above.
spec:
# Health metrics pool
- name: device_health_metrics
application: mgr_devicehealth
replication: 1
percent_total_data: 5
# RBD pool
- name: rbd
application: rbd
replication: 3
percent_total_data: 40
# Example of 100 GiB pool_quota for rbd pool (no pool quota if absent)
# May be specified in TiB, TB, GiB, GB, MiB, MB, KiB, KB, or bytes
# NOTE: This should always be a string value to avoid Helm issues with large integers
# pool_quota: "100GiB"
# NOTE(supamatt): By default the crush rules used to create each pool will be
# taken from the pool default `crush_rule` unless a pool specific `crush_rule`
# is specified. The rule MUST exist for it to be defined here.
# crush_rule: replicated_rule
# CephFS pools
- name: cephfs_metadata
application: cephfs
replication: 3
percent_total_data: 5
- name: cephfs_data
application: cephfs
replication: 3
percent_total_data: 10
# RadosGW pools
- name: .rgw.root
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.control
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.data.root
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.gc
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.log
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.intent-log
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.meta
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.usage
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.users.keys
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.users.email
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.users.swift
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.users.uid
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.buckets.extra
application: rgw
replication: 3
percent_total_data: 0.1
- name: default.rgw.buckets.index
application: rgw
replication: 3
percent_total_data: 3
- name: default.rgw.buckets.data
application: rgw
replication: 3
percent_total_data: 29
ceph:
global:
# auth
cephx: true
cephx_require_signatures: false
cephx_cluster_require_signatures: true
cephx_service_require_signatures: false
objecter_inflight_op_bytes: "1073741824"
objecter_inflight_ops: 10240
debug_ms: "0/0"
log_file: /dev/stdout
mon_cluster_log_file: /dev/stdout
osd:
osd_mkfs_type: xfs
osd_mkfs_options_xfs: -f -i size=2048
osd_max_object_name_len: 256
ms_bind_port_min: 6800
ms_bind_port_max: 7100
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- ceph-client-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
bootstrap:
jobs: null
services:
- endpoint: internal
service: ceph_mon
cephfs_client_key_generator:
jobs: null
mds:
jobs:
- ceph-storage-keys-generator
- ceph-mds-keyring-generator
- ceph-rbd-pool
services:
- endpoint: internal
service: ceph_mon
mgr:
jobs:
- ceph-storage-keys-generator
- ceph-mgr-keyring-generator
services:
- endpoint: internal
service: ceph_mon
pool_checkpgs:
jobs:
- ceph-rbd-pool
services:
- endpoint: internal
service: ceph_mgr
checkdns:
services:
- endpoint: internal
service: ceph_mon
namespace_client_key_cleaner:
jobs: null
namespace_client_key_generator:
jobs: null
rbd_pool:
services:
- endpoint: internal
service: ceph_mon
- endpoint: internal
service: ceph_mgr
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
tests:
jobs:
- ceph-rbd-pool
- ceph-mgr-keyring-generator
services:
- endpoint: internal
service: ceph_mon
- endpoint: internal
service: ceph_mgr
bootstrap:
enabled: false
script: |
ceph -s
function ensure_pool () {
ceph osd pool stats $1 || ceph osd pool create $1 $2
if [[ $(ceph mon versions | awk '/version/{print $3}' | cut -d. -f1) -ge 12 ]]; then
ceph osd pool application enable $1 $3
fi
}
#ensure_pool volumes 8 cinder
# Uncomment below to enable mgr modules
# For a list of available modules:
# http://docs.ceph.com/docs/master/mgr/
# This overrides mgr_initial_modules (default: restful, status)
# Any module not listed here will be disabled
ceph_mgr_enabled_modules:
- restful
- status
- prometheus
- balancer
- iostat
- pg_autoscaler
# You can configure your mgr modules
# below. Each module has its own set
# of key/value. Refer to the doc
# above for more info. For example:
ceph_mgr_modules_config:
# balancer:
# active: 1
# prometheus:
# server_port: 9283
# server_addr: 0.0.0.0
# dashboard:
# port: 7000
# localpool:
# failure_domain: host
# subtree: rack
# pg_num: "128"
# num_rep: "3"
# min_size: "2"
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
ceph_mon:
namespace: null
hosts:
default: ceph-mon
discovery: ceph-mon-discovery
host_fqdn_override:
default: null
port:
mon:
default: 6789
mon_msgr2:
default: 3300
ceph_mgr:
namespace: null
hosts:
default: ceph-mgr
host_fqdn_override:
default: null
port:
mgr:
default: 7000
metrics:
default: 9283
scheme:
default: http
monitoring:
prometheus:
enabled: true
ceph_mgr:
scrape: true
port: 9283
manifests:
configmap_bin: true
configmap_test_bin: true
configmap_etc: true
deployment_mds: true
deployment_mgr: true
deployment_checkdns: true
job_bootstrap: false
job_cephfs_client_key: true
job_image_repo_sync: true
job_rbd_pool: true
service_mgr: true
helm_tests: true
cronjob_checkPGs: true
cronjob_defragosds: true
...