Matthew Heler e1a3819a0d [CEPH] Support a troubleshooting option to reset PG metadata
Ceph upstream bug: https://tracker.ceph.com/issues/21142 is
impacting the availability of our sites in pipeline. Add an option
to reset the past interval metadata time on an OSDs PG to solve for
this issue if it occurs.

Change-Id: I1fe0bee6ce8aa402c241f1ad457bbf532945a530
2018-12-18 23:26:18 -06:00

252 lines
7.5 KiB
YAML

# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for ceph-osd.
# This is a YAML-formatted file.
# Declare name/value pairs to be passed into your templates.
# name: value
images:
pull_policy: IfNotPresent
tags:
ceph_osd: 'docker.io/ceph/daemon:tag-build-master-luminous-ubuntu-16.04'
ceph_bootstrap: 'docker.io/ceph/daemon:tag-build-master-luminous-ubuntu-16.04'
ceph_config_helper: 'docker.io/port/ceph-config-helper:v1.10.3'
dep_check: 'quay.io/stackanetes/kubernetes-entrypoint:v0.3.1'
image_repo_sync: docker.io/docker:17.07.0
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
osd:
node_selector_key: ceph-osd
node_selector_value: enabled
pod:
dns_policy: "ClusterFirstWithHostNet"
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
resources:
enabled: false
osd:
requests:
memory: "2Gi"
cpu: "1000m"
limits:
memory: "5Gi"
cpu: "2000m"
tests:
requests:
memory: "10Mi"
cpu: "250m"
limits:
memory: "50Mi"
cpu: "500m"
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
secrets:
keyrings:
osd: ceph-bootstrap-osd-keyring
admin: ceph-client-admin-keyring
network:
public: 192.168.0.0/16
cluster: 192.168.0.0/16
conf:
ceph:
global:
# auth
cephx: true
cephx_require_signatures: false
cephx_cluster_require_signatures: true
cephx_service_require_signatures: false
objecter_inflight_op_bytes: "1073741824"
objecter_inflight_ops: 10240
debug_ms: "0/0"
mon_osd_down_out_interval: 1800
osd:
osd_mkfs_type: xfs
osd_mkfs_options_xfs: -f -i size=2048
osd_max_object_name_len: 256
ms_bind_port_min: 6800
ms_bind_port_max: 7100
osd_snap_trim_priority: 1
osd_snap_trim_sleep: 0.1
osd_pg_max_concurrent_snap_trims: 1
filestore_merge_threshold: -10
filestore_split_multiple: 12
filestore_max_sync_interval: 10
osd_scrub_begin_hour: 22
osd_scrub_end_hour: 4
osd_scrub_during_recovery: false
osd_scrub_sleep: 0.1
osd_scrub_chunk_min: 1
osd_scrub_chunk_max: 4
osd_scrub_load_threshold: 10.0
osd_deep_scrub_stride: "1048576"
osd_scrub_priority: 1
osd_recovery_op_priority: 1
osd_recovery_max_active: 1
osd_mount_options_xfs: "rw,noatime,largeio,inode64,swalloc,logbufs=8,logbsize=256k,allocsize=4M"
osd_journal_size: 10240
osd_crush_update_on_start: false
storage:
# NOTE(supamatt): By default use host based buckets for failure domains. Any `failure_domain` defined must
# match the failure domain used on your CRUSH rules for pools. For example with a crush rule of
# rack_replicated_rule you would specify "rack" as the `failure_domain` to use.
# `failure_domain`: Set the CRUSH bucket type for your OSD to reside in. See the supported CRUSH configuration
# as listed here: Supported CRUSH configuration is listed here: http://docs.ceph.com/docs/luminous/rados/operations/crush-map/
# `failure_domain_by_hostname`: Specify the portion of the hostname to use for your failure domain bucket name.
# `failure_domain_name`: Manually name the failure domain bucket name. This configuration option should only be used
# when using host based overrides.
failure_domain: "host"
failure_domain_by_hostname: "false"
failure_domain_name: "false"
# NOTE(supamatt): Add a configurable option to reset the past interval time of a PG.
# This solves an open bug within Ceph Luminous releases. https://tracker.ceph.com/issues/21142
osd_pg_interval_fix: "false"
# NOTE(portdirect): for homogeneous clusters the `osd` key can be used to
# define OSD pods that will be deployed across the cluster.
# when specifing whole disk (/dev/sdf) for journals, ceph-osd chart will create
# needed partitions for each OSDs.
osd:
- data:
type: directory
location: /var/lib/openstack-helm/ceph/osd/osd-one
journal:
type: directory
location: /var/lib/openstack-helm/ceph/osd/journal-one
# - data:
# type: block-logical
# location: /dev/sde
# journal:
# type: block-logical
# location: /dev/sdf
# - data:
# type: block-logical
# location: /dev/sdg
# journal:
# type: directory
# location: /var/lib/openstack-helm/ceph/osd/journal-sdg
# NOTE(portdirect): for heterogeneous clusters the overrides section can be used to define
# OSD pods that will be deployed upon specifc nodes.
# overrides:
# ceph_osd:
# hosts:
# - name: host1.fqdn
# conf:
# storage:
# osd_pg_interval_fix: "true"
# failure_domain_name: "rack1"
# osd:
# - data:
# type: directory
# location: /var/lib/openstack-helm/ceph/osd/data-three
# journal:
# type: directory
# location: /var/lib/openstack-helm/ceph/osd/journal-three
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- ceph-osd-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
osd:
jobs:
- ceph-storage-keys-generator
- ceph-osd-keyring-generator
services:
- endpoint: internal
service: ceph_mon
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
tests:
services:
- endpoint: internal
service: ceph_mon
bootstrap:
enabled: false
script: |
ceph -s
function ensure_pool () {
ceph osd pool stats $1 || ceph osd pool create $1 $2
local test_luminous=$(ceph tell osd.* version | egrep -c "12.2|luminous" | xargs echo)
if [[ ${test_luminous} -gt 0 ]]; then
ceph osd pool application enable $1 $3
fi
}
#ensure_pool volumes 8 cinder
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
ceph_mon:
namespace: null
hosts:
default: ceph-mon
discovery: ceph-mon-discovery
host_fqdn_override:
default: null
port:
mon:
default: 6789
manifests:
configmap_bin: true
configmap_etc: true
configmap_test_bin: true
daemonset_osd: true
job_image_repo_sync: true
helm_tests: true