swift/etc/object-server.conf-sample
Tim Burke abfa6bee72 relinker: Parallelize per disk
Add a new option, workers, that works more or less like the same option
from background daemons. Disks will be distributed across N worker
sub-processes so we can make the best use of the I/O available.

While we're at it, log final stats at warning if there were errors.

Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Change-Id: I039d2b8861f69a64bd9d2cdf68f1f534c236b2ba
2021-04-05 12:15:56 -07:00

641 lines
25 KiB
Plaintext

[DEFAULT]
# bind_ip = 0.0.0.0
bind_port = 6200
# keep_idle = 600
# bind_timeout = 30
# backlog = 4096
# user = swift
# swift_dir = /etc/swift
# devices = /srv/node
# mount_check = true
# disable_fallocate = false
# expiring_objects_container_divisor = 86400
# expiring_objects_account_name = expiring_objects
#
# Use an integer to override the number of pre-forked processes that will
# accept connections. NOTE: if servers_per_port is set, this setting is
# ignored.
# workers = auto
#
# Make object-server run this many worker processes per unique port of "local"
# ring devices across all storage policies. The default value of 0 disables this
# feature.
# servers_per_port = 0
#
# Maximum concurrent requests per worker
# max_clients = 1024
#
# You can specify default log routing here if you want:
# log_name = swift
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
# The following caps the length of log lines to the value given; no limit if
# set to 0, the default.
# log_max_line_length = 0
#
# Hashing algorithm for log anonymization. Must be one of algorithms supported
# by Python's hashlib.
# log_anonymization_method = MD5
#
# Salt added during log anonymization
# log_anonymization_salt =
#
# Template used to format logs. All words surrounded by curly brackets
# will be substituted with the appropriate values
# log_format = {remote_addr} - - [{time.d}/{time.b}/{time.Y}:{time.H}:{time.M}:{time.S} +0000] "{method} {path}" {status} {content_length} "{referer}" "{txn_id}" "{user_agent}" {trans_time:.4f} "{additional_info}" {pid} {policy_index}
#
# comma separated list of functions to call to setup custom log handlers.
# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
# adapted_logger
# log_custom_handlers =
#
# If set, log_udp_host will override log_address
# log_udp_host =
# log_udp_port = 514
#
# You can enable StatsD logging here:
# log_statsd_host =
# log_statsd_port = 8125
# log_statsd_default_sample_rate = 1.0
# log_statsd_sample_rate_factor = 1.0
# log_statsd_metric_prefix =
#
# eventlet_debug = false
#
# You can set fallocate_reserve to the number of bytes or percentage of disk
# space you'd like fallocate to reserve, whether there is space for the given
# file size or not. Percentage will be used if the value ends with a '%'.
# fallocate_reserve = 1%
#
# Time to wait while attempting to connect to another backend node.
# conn_timeout = 0.5
# Time to wait while sending each chunk of data to another backend node.
# node_timeout = 3
# Time to wait while sending a container update on object update.
# container_update_timeout = 1.0
# Time to wait while receiving each chunk of data from a client or another
# backend node.
# client_timeout = 60.0
#
# network_chunk_size = 65536
# disk_chunk_size = 65536
#
# Reclamation of tombstone files is performed primarily by the replicator and
# the reconstructor but the object-server and object-auditor also reference
# this value - it should be the same for all object services in the cluster,
# and not greater than the container services reclaim_age
# reclaim_age = 604800
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[pipeline:main]
pipeline = healthcheck recon object-server
[app:object-server]
use = egg:swift#object
# You can override the default log routing for this app here:
# set log_name = object-server
# set log_facility = LOG_LOCAL0
# set log_level = INFO
# set log_requests = true
# set log_address = /dev/log
#
# max_upload_time = 86400
#
# slow is the total amount of seconds an object PUT/DELETE request takes at
# least. If it is faster, the object server will sleep this amount of time minus
# the already passed transaction time. This is only useful for simulating slow
# devices on storage nodes during testing and development.
# slow = 0
#
# Objects smaller than this are not evicted from the buffercache once read
# keep_cache_size = 5242880
#
# If true, objects for authenticated GET requests may be kept in buffer cache
# if small enough
# keep_cache_private = false
#
# on PUTs, sync data every n MB
# mb_per_sync = 512
#
# Comma separated list of headers that can be set in metadata on an object.
# This list is in addition to X-Object-Meta-* headers and cannot include
# Content-Type, etag, Content-Length, or deleted
# allowed_headers = Content-Disposition, Content-Encoding, X-Delete-At, X-Object-Manifest, X-Static-Large-Object, Cache-Control, Content-Language, Expires, X-Robots-Tag
# The number of threads in eventlet's thread pool. Most IO will occur
# in the object server's main thread, but certain "heavy" IO
# operations will occur in separate IO threads, managed by eventlet.
#
# The default value is auto, whose actual value is dependent on the
# servers_per_port value:
#
# - When servers_per_port is zero, the default value of
# eventlet_tpool_num_threads is empty, which uses eventlet's default
# (currently 20 threads).
#
# - When servers_per_port is nonzero, the default value of
# eventlet_tpool_num_threads is 1.
#
# But you may override this value to any integer value.
#
# Note that this value is threads per object-server process, so to
# compute the total number of IO threads on a node, you must multiply
# this by the number of object-server processes on the node.
#
# eventlet_tpool_num_threads = auto
# You can disable REPLICATE and SSYNC handling (default is to allow it). When
# deploying a cluster with a separate replication network, you'll want multiple
# object-server processes running: one for client-driven traffic and another
# for replication traffic. The server handling client-driven traffic may set
# this to false. If there is only one object-server process, leave this as
# true.
# replication_server = true
#
# Set to restrict the number of concurrent incoming SSYNC requests
# Set to 0 for unlimited
# Note that SSYNC requests are only used by the object reconstructor or the
# object replicator when configured to use ssync.
# replication_concurrency = 4
#
# Set to restrict the number of concurrent incoming SSYNC requests per
# device; set to 0 for unlimited requests per device. This can help control
# I/O to each device. This does not override replication_concurrency described
# above, so you may need to adjust both parameters depending on your hardware
# or network capacity.
# replication_concurrency_per_device = 1
#
# Number of seconds to wait for an existing replication device lock before
# giving up.
# replication_lock_timeout = 15
#
# These next two settings control when the SSYNC subrequest handler will
# abort an incoming SSYNC attempt. An abort will occur if there are at
# least threshold number of failures and the value of failures / successes
# exceeds the ratio. The defaults of 100 and 1.0 means that at least 100
# failures have to occur and there have to be more failures than successes for
# an abort to occur.
# replication_failure_threshold = 100
# replication_failure_ratio = 1.0
#
# Use splice() for zero-copy object GETs. This requires Linux kernel
# version 3.0 or greater. If you set "splice = yes" but the kernel
# does not support it, error messages will appear in the object server
# logs at startup, but your object servers should continue to function.
#
# splice = no
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[filter:healthcheck]
use = egg:swift#healthcheck
# An optional filesystem path, which if present, will cause the healthcheck
# URL to return "503 Service Unavailable" with a body of "DISABLED BY FILE"
# disable_path =
[filter:recon]
use = egg:swift#recon
#recon_cache_path = /var/cache/swift
#recon_lock_path = /var/lock
[object-replicator]
# You can override the default log routing for this app here (don't use set!):
# log_name = object-replicator
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# daemonize = on
#
# Time in seconds to wait between replication passes
# interval = 30
# run_pause is deprecated, use interval instead
# run_pause = 30
#
# Number of concurrent replication jobs to run. This is per-process,
# so replicator_workers=W and concurrency=C will result in W*C
# replication jobs running at once.
# concurrency = 1
#
# Number of worker processes to use. No matter how big this number is,
# at most one worker per disk will be used. 0 means no forking; all work
# is done in the main process.
# replicator_workers = 0
#
# stats_interval = 300
#
# default is rsync, alternative is ssync
# sync_method = rsync
#
# max duration of a partition rsync
# rsync_timeout = 900
#
# bandwidth limit for rsync in kB/s. 0 means unlimited
# rsync_bwlimit = 0
#
# passed to rsync for io op timeout
# rsync_io_timeout = 30
#
# Allow rsync to compress data which is transmitted to destination node
# during sync. However, this is applicable only when destination node is in
# a different region than the local one.
# NOTE: Objects that are already compressed (for example: .tar.gz, .mp3) might
# slow down the syncing process.
# rsync_compress = no
#
# Format of the rsync module where the replicator will send data. See
# etc/rsyncd.conf-sample for some usage examples.
# rsync_module = {replication_ip}::object
#
# node_timeout = <whatever's in the DEFAULT section or 10>
# max duration of an http request; this is for REPLICATE finalization calls and
# so should be longer than node_timeout
# http_timeout = 60
#
# attempts to kill all workers if nothing replicates for lockup_timeout seconds
# lockup_timeout = 1800
#
# ring_check_interval = 15
# recon_cache_path = /var/cache/swift
#
# limits how long rsync error log lines are
# 0 means to log the entire line
# rsync_error_log_line_length = 0
#
# handoffs_first and handoff_delete are options for a special case
# such as disk full in the cluster. These two options SHOULD NOT BE
# CHANGED, except for such an extreme situations. (e.g. disks filled up
# or are about to fill up. Anyway, DO NOT let your drives fill up)
# handoffs_first is the flag to replicate handoffs prior to canonical
# partitions. It allows to force syncing and deleting handoffs quickly.
# If set to a True value(e.g. "True" or "1"), partitions
# that are not supposed to be on the node will be replicated first.
# handoffs_first = False
#
# handoff_delete is the number of replicas which are ensured in swift.
# If the number less than the number of replicas is set, object-replicator
# could delete local handoffs even if all replicas are not ensured in the
# cluster. Object-replicator would remove local handoff partition directories
# after syncing partition when the number of successful responses is greater
# than or equal to this number. By default(auto), handoff partitions will be
# removed when it has successfully replicated to all the canonical nodes.
# handoff_delete = auto
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[object-reconstructor]
# You can override the default log routing for this app here (don't use set!):
# Unless otherwise noted, each setting below has the same meaning as described
# in the [object-replicator] section, however these settings apply to the EC
# reconstructor
#
# log_name = object-reconstructor
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# daemonize = on
#
# Time in seconds to wait between reconstruction passes
# interval = 30
# run_pause is deprecated, use interval instead
# run_pause = 30
#
# Maximum number of worker processes to spawn. Each worker will handle a
# subset of devices. Devices will be assigned evenly among the workers so that
# workers cycle at similar intervals (which can lead to fewer workers than
# requested). You can not have more workers than devices. If you have no
# devices only a single worker is spawned.
# reconstructor_workers = 0
#
# concurrency = 1
# stats_interval = 300
# node_timeout = 10
# http_timeout = 60
# lockup_timeout = 1800
# ring_check_interval = 15
# recon_cache_path = /var/cache/swift
# The handoffs_only mode option is for special case emergency situations during
# rebalance such as disk full in the cluster. This option SHOULD NOT BE
# CHANGED, except for extreme situations. When handoffs_only mode is enabled
# the reconstructor will *only* revert fragments from handoff nodes to primary
# nodes and will not sync primary nodes with neighboring primary nodes. This
# will force the reconstructor to sync and delete handoffs' fragments more
# quickly and minimize the time of the rebalance by limiting the number of
# rebuilds. The handoffs_only option is only for temporary use and should be
# disabled as soon as the emergency situation has been resolved. When
# handoffs_only is not set, the deprecated handoffs_first option will be
# honored as a synonym, but may be ignored in a future release.
# handoffs_only = False
#
# The default strategy for unmounted drives will stage rebuilt data on a
# handoff node until updated rings are deployed. Because fragments are rebuilt
# on offset handoffs based on fragment index and the proxy limits how deep it
# will search for EC frags we restrict how many nodes we'll try. Setting to 0
# will disable rebuilds to handoffs and only rebuild fragments for unmounted
# devices to mounted primaries after a ring change.
# Setting to -1 means "no limit".
# rebuild_handoff_node_count = 2
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
#
# When upgrading from liberasurecode<=1.5.0, you may want to continue writing
# legacy CRCs until all nodes are upgraded and capabale of reading fragments
# with zlib CRCs. liberasurecode>=1.6.2 checks for the environment variable
# LIBERASURECODE_WRITE_LEGACY_CRC; if set (value doesn't matter), it will use
# its legacy CRC. Set this option to true or false to ensure the environment
# variable is or is not set. Leave the option blank or absent to not touch
# the environment (default). For more information, see
# https://bugs.launchpad.net/liberasurecode/+bug/1886088
# write_legacy_ec_crc =
[object-updater]
# You can override the default log routing for this app here (don't use set!):
# log_name = object-updater
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# interval = 300
# node_timeout = <whatever's in the DEFAULT section or 10>
#
# updater_workers controls how many processes the object updater will
# spawn, while concurrency controls how many async_pending records
# each updater process will operate on at any one time. With
# concurrency=C and updater_workers=W, there will be up to W*C
# async_pending records being processed at once.
# concurrency = 8
# updater_workers = 1
#
# Send at most this many object updates per second
# objects_per_second = 50
#
# slowdown will sleep that amount between objects. Deprecated; use
# objects_per_second instead.
# slowdown = 0.01
#
# Log stats (at INFO level) every report_interval seconds. This
# logging is per-process, so with concurrency > 1, the logs will
# contain one stats log per worker process every report_interval
# seconds.
# report_interval = 300
#
# recon_cache_path = /var/cache/swift
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[object-auditor]
# You can override the default log routing for this app here (don't use set!):
# log_name = object-auditor
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# Time in seconds to wait between auditor passes
# interval = 30
#
# You can set the disk chunk size that the auditor uses making it larger if
# you like for more efficient local auditing of larger objects
# disk_chunk_size = 65536
# files_per_second = 20
# concurrency = 1
# bytes_per_second = 10000000
# log_time = 3600
# zero_byte_files_per_second = 50
# recon_cache_path = /var/cache/swift
# Takes a comma separated list of ints. If set, the object auditor will
# increment a counter for every object whose size is <= to the given break
# points and report the result after a full scan.
# object_size_stats =
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
# The auditor will cleanup old rsync tempfiles after they are "old
# enough" to delete. You can configure the time elapsed in seconds
# before rsync tempfiles will be unlinked, or the default value of
# "auto" try to use object-replicator's rsync_timeout + 900 and fallback
# to 86400 (1 day).
# rsync_tempfile_timeout = auto
# A comma-separated list of watcher entry points. This lets operators
# programmatically see audited objects.
#
# The entry point group name is "swift.object_audit_watcher". If your
# setup.py has something like this:
#
# entry_points={'swift.object_audit_watcher': [
# 'some_watcher = some_module:Watcher']}
#
# then you would enable it with "watchers = some_package#some_watcher".
# For example, the built-in reference implementation is enabled as
# "watchers = swift#dark_data".
#
# watchers =
# Watcher-specific parameters can he added after "object-auditor:watcher:"
# like the following (note that entry points are qualified by package#):
#
# [object-auditor:watcher:swift#dark_data]
# action=log
[object-expirer]
# If this true, this expirer will execute tasks from legacy expirer task queue,
# at least one object server should run with dequeue_from_legacy = true
# dequeue_from_legacy = false
#
# Note: Be careful not to enable ``dequeue_from_legacy`` on too many expirers
# as all legacy tasks are stored in a single hidden account and the same hidden
# containers. On a large cluster one may inadvertently make the
# acccount/container server for the hidden too busy.
#
# Note: the processes and process options can only be used in conjunction with
# notes using `dequeue_from_legacy = true`. These options are ignored on nodes
# with `dequeue_from_legacy = false`.
#
# processes is how many parts to divide the legacy work into, one part per
# process that will be doing the work
# processes set 0 means that a single legacy process will be doing all the work
# processes can also be specified on the command line and will override the
# config value
# processes = 0
#
# process is which of the parts a particular legacy process will work on
# process can also be specified on the command line and will override the config
# value
# process is "zero based", if you want to use 3 processes, you should run
# processes with process set to 0, 1, and 2
# process = 0
#
# internal_client_conf_path = /etc/swift/internal-client.conf
#
# You can override the default log routing for this app here (don't use set!):
# log_name = object-expirer
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# interval = 300
#
# report_interval = 300
#
# request_tries is the number of times the expirer's internal client will
# attempt any given request in the event of failure. The default is 3.
# request_tries = 3
#
# concurrency is the level of concurrency to use to do the work, this value
# must be set to at least 1
# concurrency = 1
#
# deletes can be ratelimited to prevent the expirer from overwhelming the cluster
# tasks_per_second = 50.0
#
# The expirer will re-attempt expiring if the source object is not available
# up to reclaim_age seconds before it gives up and deletes the entry in the
# queue.
# reclaim_age = 604800
#
# recon_cache_path = /var/cache/swift
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are realtime, best-effort and idle. I/O niceness
# priority is a number which goes from 0 to 7. The higher the value, the lower
# the I/O priority of the process. Work only with ionice_class.
# ionice_class =
# ionice_priority =
#
# Note: Put it at the beginning of the pipleline to profile all middleware. But
# it is safer to put this after healthcheck.
[filter:xprofile]
use = egg:swift#xprofile
# This option enable you to switch profilers which should inherit from python
# standard profiler. Currently the supported value can be 'cProfile',
# 'eventlet.green.profile' etc.
# profile_module = eventlet.green.profile
#
# This prefix will be used to combine process ID and timestamp to name the
# profile data file. Make sure the executing user has permission to write
# into this path (missing path segments will be created, if necessary).
# If you enable profiling in more than one type of daemon, you must override
# it with an unique value like: /var/log/swift/profile/object.profile
# log_filename_prefix = /tmp/log/swift/profile/default.profile
#
# the profile data will be dumped to local disk based on above naming rule
# in this interval.
# dump_interval = 5.0
#
# Be careful, this option will enable profiler to dump data into the file with
# time stamp which means there will be lots of files piled up in the directory.
# dump_timestamp = false
#
# This is the path of the URL to access the mini web UI.
# path = /__profile__
#
# Clear the data when the wsgi server shutdown.
# flush_at_shutdown = false
#
# unwind the iterator of applications
# unwind = false
[object-relinker]
# You can override the default log routing for this app here (don't use set!):
# log_name = object-relinker
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# Start up to this many sub-processes to process disks in parallel. Each disk
# will be handled by at most one child process. By default, one process is
# spawned per disk.
# workers = auto
#
# Target this many relinks/cleanups per second for each worker, to reduce the
# likelihood that the added I/O from a partition-power increase impacts
# client traffic. Use zero for unlimited.
# files_per_second = 0.0
#
# Maximum number of partition power locations to check for a valid link target
# if the relinker encounters an existing tombstone, but with different inode,
# in the next partition power location. If the relinker fails to make a link
# because a different tombstone already exists in the next partition power
# location then it will try to validate that the existing tombstone links to a
# valid target in the current partition power location, or previous partition
# power locations, in descending order. This option limits the number of
# partition power locations searched, including the current partition power,
# and should be a whole number. A value of 0 implies that no validation is
# attempted, and an error is logged, when an existing tombstone prevents link
# creation. A value of 1 implies that an existing link is accepted if it links
# to a tombstone in the current partition power location. The default value of
# 2 implies that an existing link is acceptable if it links to a tombstone in
# the current or previous partition power locations. Increased values may be
# useful if previous partition power increases have failed to cleanup
# tombstones from their old locations, causing duplicate tombstones with
# different inodes to be relinked to the next partition power location.
# link_check_limit = 2