swift/etc/container-server.conf-sample
Matthew Oliver 2641814010 Add sharder daemon, manage_shard_ranges tool and probe tests
The sharder daemon visits container dbs and when necessary executes
the sharding workflow on the db.

The workflow is, in overview:

- perform an audit of the container for sharding purposes.

- move any misplaced objects that do not belong in the container
  to their correct shard.

- move shard ranges from FOUND state to CREATED state by creating
  shard containers.

- move shard ranges from CREATED to CLEAVED state by cleaving objects
  to shard dbs and replicating those dbs. By default this is done in
  batches of 2 shard ranges per visit.

Additionally, when the auto_shard option is True (NOT yet recommeneded
in production), the sharder will identify shard ranges for containers
that have exceeded the threshold for sharding, and will also manage
the sharding and shrinking of shard containers.

The manage_shard_ranges tool provides a means to manually identify
shard ranges and merge them to a container in order to trigger
sharding. This is currently the recommended way to shard a container.

Co-Authored-By: Alistair Coles <alistairncoles@gmail.com>
Co-Authored-By: Tim Burke <tim.burke@gmail.com>
Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>

Change-Id: I7f192209d4d5580f5a0aa6838f9f04e436cf6b1f
2018-05-18 18:48:13 +01:00

444 lines
17 KiB
Plaintext

[DEFAULT]
# bind_ip = 0.0.0.0
bind_port = 6201
# bind_timeout = 30
# backlog = 4096
# user = swift
# swift_dir = /etc/swift
# devices = /srv/node
# mount_check = true
# disable_fallocate = false
#
# Use an integer to override the number of pre-forked processes that will
# accept connections.
# workers = auto
#
# Maximum concurrent requests per worker
# max_clients = 1024
#
# This is a comma separated list of hosts allowed in the X-Container-Sync-To
# field for containers. This is the old-style of using container sync. It is
# strongly recommended to use the new style of a separate
# container-sync-realms.conf -- see container-sync-realms.conf-sample
# allowed_sync_hosts = 127.0.0.1
#
# You can specify default log routing here if you want:
# log_name = swift
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
# The following caps the length of log lines to the value given; no limit if
# set to 0, the default.
# log_max_line_length = 0
#
# comma separated list of functions to call to setup custom log handlers.
# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
# adapted_logger
# log_custom_handlers =
#
# If set, log_udp_host will override log_address
# log_udp_host =
# log_udp_port = 514
#
# You can enable StatsD logging here:
# log_statsd_host =
# log_statsd_port = 8125
# log_statsd_default_sample_rate = 1.0
# log_statsd_sample_rate_factor = 1.0
# log_statsd_metric_prefix =
#
# If you don't mind the extra disk space usage in overhead, you can turn this
# on to preallocate disk space with SQLite databases to decrease fragmentation.
# db_preallocation = off
#
# eventlet_debug = false
#
# You can set fallocate_reserve to the number of bytes or percentage of disk
# space you'd like fallocate to reserve, whether there is space for the given
# file size or not. Percentage will be used if the value ends with a '%'.
# fallocate_reserve = 1%
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
#
# The prefix used for hidden auto-created accounts, for example accounts in
# which shard containers are created. Defaults to '.'.
# auto_create_account_prefix = .
[pipeline:main]
pipeline = healthcheck recon container-server
[app:container-server]
use = egg:swift#container
# You can override the default log routing for this app here:
# set log_name = container-server
# set log_facility = LOG_LOCAL0
# set log_level = INFO
# set log_requests = true
# set log_address = /dev/log
#
# node_timeout = 3
# conn_timeout = 0.5
# allow_versions = false
# auto_create_account_prefix = .
#
# Configure parameter for creating specific server
# To handle all verbs, including replication verbs, do not specify
# "replication_server" (this is the default). To only handle replication,
# set to a True value (e.g. "True" or "1"). To handle only non-replication
# verbs, set to "False". Unless you have a separate replication network, you
# should not specify any value for "replication_server".
# replication_server = false
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[filter:healthcheck]
use = egg:swift#healthcheck
# An optional filesystem path, which if present, will cause the healthcheck
# URL to return "503 Service Unavailable" with a body of "DISABLED BY FILE"
# disable_path =
[filter:recon]
use = egg:swift#recon
#recon_cache_path = /var/cache/swift
[container-replicator]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-replicator
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# Maximum number of database rows that will be sync'd in a single HTTP
# replication request. Databases with less than or equal to this number of
# differing rows will always be sync'd using an HTTP replication request rather
# than using rsync.
# per_diff = 1000
#
# Maximum number of HTTP replication requests attempted on each replication
# pass for any one container. This caps how long the replicator will spend
# trying to sync a given database per pass so the other databases don't get
# starved.
# max_diffs = 100
#
# Number of replication workers to spawn.
# concurrency = 8
#
# Time in seconds to wait between replication passes
# interval = 30
# run_pause is deprecated, use interval instead
# run_pause = 30
#
# node_timeout = 10
# conn_timeout = 0.5
#
# The replicator also performs reclamation
# reclaim_age = 604800
#
# Allow rsync to compress data which is transmitted to destination node
# during sync. However, this is applicable only when destination node is in
# a different region than the local one.
# rsync_compress = no
#
# Format of the rsync module where the replicator will send data. See
# etc/rsyncd.conf-sample for some usage examples.
# rsync_module = {replication_ip}::container
#
# recon_cache_path = /var/cache/swift
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
#
# The handoffs_only mode option is for special-case emergency
# situations such as full disks in the cluster. This option SHOULD NOT
# BE ENABLED except in emergencies. When handoffs_only mode is enabled
# the replicator will *only* replicate from handoff nodes to primary
# nodes and will not sync primary nodes with other primary nodes.
#
# This has two main effects: first, the replicator becomes much more
# effective at removing misplaced databases, thereby freeing up disk
# space at a much faster pace than normal. Second, the replicator does
# not sync data between primary nodes, so out-of-sync account and
# container listings will not resolve while handoffs_only is enabled.
#
# This mode is intended to allow operators to temporarily sacrifice
# consistency in order to gain faster rebalancing, such as during a
# capacity addition with nearly-full disks. It is not intended for
# long-term use.
#
# handoffs_only = no
[container-updater]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-updater
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# interval = 300
# concurrency = 4
# node_timeout = 3
# conn_timeout = 0.5
#
# Send at most this many container updates per second
# containers_per_second = 50
#
# slowdown will sleep that amount between containers. Deprecated; use
# containers_per_second instead.
# slowdown = 0.01
#
# Seconds to suppress updating an account that has generated an error
# account_suppression_time = 60
#
# recon_cache_path = /var/cache/swift
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[container-auditor]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-auditor
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# Will audit each container at most once per interval
# interval = 1800
#
# containers_per_second = 200
# recon_cache_path = /var/cache/swift
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[container-sync]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-sync
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# If you need to use an HTTP Proxy, set it here; defaults to no proxy.
# You can also set this to a comma separated list of HTTP Proxies and they will
# be randomly used (simple load balancing).
# sync_proxy = http://10.1.1.1:8888,http://10.1.1.2:8888
#
# Will sync each container at most once per interval
# interval = 300
#
# Maximum amount of time to spend syncing each container per pass
# container_time = 60
#
# Maximum amount of time in seconds for the connection attempt
# conn_timeout = 5
# Server errors from requests will be retried by default
# request_tries = 3
#
# Internal client config file path
# internal_client_conf_path = /etc/swift/internal-client.conf
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
# Note: Put it at the beginning of the pipeline to profile all middleware. But
# it is safer to put this after healthcheck.
[filter:xprofile]
use = egg:swift#xprofile
# This option enable you to switch profilers which should inherit from python
# standard profiler. Currently the supported value can be 'cProfile',
# 'eventlet.green.profile' etc.
# profile_module = eventlet.green.profile
#
# This prefix will be used to combine process ID and timestamp to name the
# profile data file. Make sure the executing user has permission to write
# into this path (missing path segments will be created, if necessary).
# If you enable profiling in more than one type of daemon, you must override
# it with an unique value like: /var/log/swift/profile/container.profile
# log_filename_prefix = /tmp/log/swift/profile/default.profile
#
# the profile data will be dumped to local disk based on above naming rule
# in this interval.
# dump_interval = 5.0
#
# Be careful, this option will enable profiler to dump data into the file with
# time stamp which means there will be lots of files piled up in the directory.
# dump_timestamp = false
#
# This is the path of the URL to access the mini web UI.
# path = /__profile__
#
# Clear the data when the wsgi server shutdown.
# flush_at_shutdown = false
#
# unwind the iterator of applications
# unwind = false
[container-sharder]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-sharder
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# Container sharder specific settings
#
# If the auto_shard option is true then the sharder will automatically select
# containers to shard, scan for shard ranges, and select shards to shrink.
# The default is false.
# Warning: auto-sharding is still under development and should not be used in
# production; do not set this option to true in a production cluster.
# auto_shard = false
#
# When auto-sharding is enabled shard_container_threshold defines the object
# count at which a container with container-sharding enabled will start to
# shard. shard_container_threshold also indirectly determines the initial
# nominal size of shard containers, which is shard_container_threshold // 2, as
# well as determining the thresholds for shrinking and merging shard
# containers.
# shard_container_threshold = 1000000
#
# When auto-sharding is enabled shard_shrink_point defines the object count
# below which a 'donor' shard container will be considered for shrinking into
# another 'acceptor' shard container. shard_shrink_point is a percentage of
# shard_container_threshold e.g. the default value of 5 means 5% of the
# shard_container_threshold.
# shard_shrink_point = 5
#
# When auto-sharding is enabled shard_shrink_merge_point defines the maximum
# allowed size of an acceptor shard container after having a donor merged into
# it. Shard_shrink_merge_point is a percentage of shard_container_threshold.
# e.g. the default value of 75 means that the projected sum of a donor object
# count and acceptor count must be less than 75% of shard_container_threshold
# for the donor to be allowed to merge into the acceptor.
#
# For example, if the shard_container_threshold is 1 million,
# shard_shrink_point is 5, and shard_shrink_merge_point is 75 then a shard will
# be considered for shrinking if it has less than or equal to 50 thousand
# objects but will only merge into an acceptor if the combined object count
# would be less than or equal to 750 thousand objects.
# shard_shrink_merge_point = 75
#
# When auto-sharding is enabled shard_scanner_batch_size defines the maximum
# number of shard ranges that will be found each time the sharder daemon visits
# a sharding container. If necessary the sharder daemon will continue to search
# for more shard ranges each time it visits the container.
# shard_scanner_batch_size = 10
#
# cleave_batch_size defines the number of shard ranges that will be cleaved
# each time the sharder daemon visits a sharding container.
# cleave_batch_size = 2
#
# cleave_row_batch_size defines the size of batches of object rows read from a
# sharding container and merged to a shard container during cleaving.
# cleave_row_batch_size = 10000
#
# Defines the number of successfully replicated shard dbs required when
# cleaving a previously uncleaved shard range before the sharder will progress
# to the next shard range. The value should be less than or equal to the
# container ring replica count. The default of 'auto' causes the container ring
# quorum value to be used. This option only applies to the container-sharder
# replication and does not affect the number of shard container replicas that
# will eventually be replicated by the container-replicator.
# shard_replication_quorum = auto
#
# Defines the number of successfully replicated shard dbs required when
# cleaving a shard range that has been previously cleaved on another node
# before the sharder will progress to the next shard range. The value should be
# less than or equal to the container ring replica count. The default of 'auto'
# causes the shard_replication_quorum value to be used. This option only
# applies to the container-sharder replication and does not affect the number
# of shard container replicas that will eventually be replicated by the
# container-replicator.
# existing_shard_replication_quorum = auto
#
# The sharder uses an internal client to create and make requests to
# containers. The absolute path to the client config file can be configured.
# internal_client_conf_path = /etc/swift/internal-client.conf
#
# The number of time the internal client will retry requests.
# request_tries = 3
#
# Each time the sharder dumps stats to the recon cache file it includes a list
# of containers that appear to need sharding but are not yet sharding. By
# default this list is limited to the top 5 containers, ordered by object
# count. The limit may be changed by setting recon_candidates_limit to an
# integer value. A negative value implies no limit.
# recon_candidates_limit = 5
#
# Large databases tend to take a while to work with, but we want to make sure
# we write down our progress. Use a larger-than-normal broker timeout to make
# us less likely to bomb out on a LockTimeout.
# broker_timeout = 60
#
# Time in seconds to wait between sharder cycles
# interval = 30
#
# The container-sharder accepts the following configuration options as defined
# in the container-replicator section:
#
# per_diff = 1000
# max_diffs = 100
# concurrency = 8
# node_timeout = 10
# conn_timeout = 0.5
# reclaim_age = 604800
# rsync_compress = no
# rsync_module = {replication_ip}::container
# recon_cache_path = /var/cache/swift
#