ae6300af86
Add a new tunable, `stale_worker_timeout`, defaulting to 86400 (i.e. 24 hours). Once this time elapses following a reload, the manager process will issue SIGKILLs to any remaining stale workers. This gives operators a way to configure a limit for how long old code and configs may still be running in their cluster. To enable this, the temporary reload child (which waits for the reload to complete then closes the accept socket on all the old workers) has grown the ability to send state to the re-exec'ed manager. Currently, this is limited to just the set of pre-re-exec child PIDs and their reload times, though it was designed to be reasonably extensible. This allows the new manager to recognize stale workers as they exit instead of logging Ignoring wait() result from unknown PID ... With the improved knowledge of subprocesses, we can kick the log level for the above message up from info to warning; we no longer expect it to trigger in practice. Drive-by: Add logging to ServersPerPortStrategy.register_worker_exit that's comparable to what WorkersStrategy does. Change-Id: I8227939d04fda8db66fb2f131f2c71ce8741c7d9
798 lines
33 KiB
Plaintext
798 lines
33 KiB
Plaintext
[DEFAULT]
|
|
# bind_ip = 0.0.0.0
|
|
bind_port = 6200
|
|
# keep_idle = 600
|
|
# bind_timeout = 30
|
|
# backlog = 4096
|
|
# user = swift
|
|
# swift_dir = /etc/swift
|
|
# devices = /srv/node
|
|
# mount_check = true
|
|
# disable_fallocate = false
|
|
# expiring_objects_container_divisor = 86400
|
|
# expiring_objects_account_name = expiring_objects
|
|
#
|
|
# Use an integer to override the number of pre-forked processes that will
|
|
# accept connections. NOTE: if servers_per_port is set, this setting is
|
|
# ignored.
|
|
# workers = auto
|
|
#
|
|
# Make object-server run this many worker processes per unique port of "local"
|
|
# ring devices across all storage policies. The default value of 0 disables this
|
|
# feature.
|
|
# servers_per_port = 0
|
|
#
|
|
# If running in a container, servers_per_port may not be able to use the
|
|
# bind_ip to lookup the ports in the ring. You may instead override the port
|
|
# lookup in the ring using the ring_ip. Any devices/ports associted with the
|
|
# ring_ip will be used when listening on the configured bind_ip address.
|
|
# ring_ip = <bind_ip>
|
|
#
|
|
# Maximum concurrent requests per worker
|
|
# max_clients = 1024
|
|
#
|
|
# You can specify default log routing here if you want:
|
|
# log_name = swift
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
# The following caps the length of log lines to the value given; no limit if
|
|
# set to 0, the default.
|
|
# log_max_line_length = 0
|
|
#
|
|
# Hashing algorithm for log anonymization. Must be one of algorithms supported
|
|
# by Python's hashlib.
|
|
# log_anonymization_method = MD5
|
|
#
|
|
# Salt added during log anonymization
|
|
# log_anonymization_salt =
|
|
#
|
|
# Template used to format logs. All words surrounded by curly brackets
|
|
# will be substituted with the appropriate values
|
|
# log_format = {remote_addr} - - [{time.d}/{time.b}/{time.Y}:{time.H}:{time.M}:{time.S} +0000] "{method} {path}" {status} {content_length} "{referer}" "{txn_id}" "{user_agent}" {trans_time:.4f} "{additional_info}" {pid} {policy_index}
|
|
#
|
|
# comma separated list of functions to call to setup custom log handlers.
|
|
# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
|
|
# adapted_logger
|
|
# log_custom_handlers =
|
|
#
|
|
# If set, log_udp_host will override log_address
|
|
# log_udp_host =
|
|
# log_udp_port = 514
|
|
#
|
|
# You can enable StatsD logging here:
|
|
# log_statsd_host =
|
|
# log_statsd_port = 8125
|
|
# log_statsd_default_sample_rate = 1.0
|
|
# log_statsd_sample_rate_factor = 1.0
|
|
# log_statsd_metric_prefix =
|
|
#
|
|
# eventlet_debug = false
|
|
#
|
|
# You can set fallocate_reserve to the number of bytes or percentage of disk
|
|
# space you'd like fallocate to reserve, whether there is space for the given
|
|
# file size or not. Percentage will be used if the value ends with a '%'.
|
|
# fallocate_reserve = 1%
|
|
#
|
|
# Time to wait while attempting to connect to another backend node.
|
|
# conn_timeout = 0.5
|
|
# Time to wait while sending each chunk of data to another backend node.
|
|
# node_timeout = 3
|
|
# Time to wait while sending a container update on object update.
|
|
# container_update_timeout = 1.0
|
|
# Time to wait while receiving each chunk of data from a client or another
|
|
# backend node.
|
|
# client_timeout = 60.0
|
|
#
|
|
# network_chunk_size = 65536
|
|
# disk_chunk_size = 65536
|
|
#
|
|
# Reclamation of tombstone files is performed primarily by the replicator and
|
|
# the reconstructor but the object-server and object-auditor also reference
|
|
# this value - it should be the same for all object services in the cluster,
|
|
# and not greater than the container services reclaim_age
|
|
# reclaim_age = 604800
|
|
#
|
|
# Non-durable data files may also get reclaimed if they are older than
|
|
# reclaim_age, but not if the time they were written to disk (i.e. mtime) is
|
|
# less than commit_window seconds ago. The commit_window also prevents the
|
|
# reconstructor removing recently written non-durable data files from a handoff
|
|
# node after reverting them to a primary. This gives the object-server a window
|
|
# in which to finish a concurrent PUT on a handoff and mark the data durable. A
|
|
# commit_window greater than zero is strongly recommended to avoid unintended
|
|
# removal of data files that were about to become durable; commit_window should
|
|
# be much less than reclaim_age.
|
|
# commit_window = 60.0
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
[pipeline:main]
|
|
pipeline = healthcheck recon backend_ratelimit object-server
|
|
|
|
[app:object-server]
|
|
use = egg:swift#object
|
|
# You can override the default log routing for this app here:
|
|
# set log_name = object-server
|
|
# set log_facility = LOG_LOCAL0
|
|
# set log_level = INFO
|
|
# set log_requests = true
|
|
# set log_address = /dev/log
|
|
#
|
|
# max_upload_time = 86400
|
|
#
|
|
# slow is the total amount of seconds an object PUT/DELETE request takes at
|
|
# least. If it is faster, the object server will sleep this amount of time minus
|
|
# the already passed transaction time. This is only useful for simulating slow
|
|
# devices on storage nodes during testing and development.
|
|
# slow = 0
|
|
#
|
|
# Objects smaller than this are not evicted from the buffercache once read
|
|
# keep_cache_size = 5242880
|
|
#
|
|
# If true, objects for authenticated GET requests may be kept in buffer cache
|
|
# if small enough
|
|
# keep_cache_private = false
|
|
#
|
|
# If true, SLO object's manifest file for GET requests may be kept in buffer cache
|
|
# if smaller than 'keep_cache_size'. And this config will only matter when
|
|
# 'keep_cache_private' is false.
|
|
# keep_cache_slo_manifest = false
|
|
#
|
|
# cooperative_period defines how frequent object server GET/PUT request will
|
|
# perform the cooperative yielding during iterating the disk chunks. For
|
|
# example, value of '5' will insert one sleep() after every 5 disk_chunk_size
|
|
# chunk reads/writes. A value of '0' (the default) will turn off cooperative
|
|
# yielding.
|
|
# cooperative_period = 0
|
|
#
|
|
# By default, the object-server will always validate the MD5 of object data
|
|
# while streaming a complete object response. Occassionally this is identified
|
|
# as a CPU bottleneck, consuming as much as 40% of the CPU time of the
|
|
# object-server. Since range-request-heavy clients don't get these integrity
|
|
# checks, it seems reasonable to give operators a chance to tune it down and
|
|
# instead rely on the object-auditor to detect and quarantine corrupted objects.
|
|
# etag_validate_pct = 100
|
|
#
|
|
# on PUTs, sync data every n MB
|
|
# mb_per_sync = 512
|
|
#
|
|
# Comma separated list of headers that can be set in metadata on an object.
|
|
# This list is in addition to X-Object-Meta-* headers and cannot include
|
|
# Content-Type, etag, Content-Length, or deleted
|
|
# allowed_headers = Content-Disposition, Content-Encoding, X-Delete-At, X-Object-Manifest, X-Static-Large-Object, Cache-Control, Content-Language, Expires, X-Robots-Tag
|
|
|
|
# The number of threads in eventlet's thread pool. Most IO will occur
|
|
# in the object server's main thread, but certain "heavy" IO
|
|
# operations will occur in separate IO threads, managed by eventlet.
|
|
#
|
|
# The default value is auto, whose actual value is dependent on the
|
|
# servers_per_port value:
|
|
#
|
|
# - When servers_per_port is zero, the default value of
|
|
# eventlet_tpool_num_threads is empty, which uses eventlet's default
|
|
# (currently 20 threads).
|
|
#
|
|
# - When servers_per_port is nonzero, the default value of
|
|
# eventlet_tpool_num_threads is 1.
|
|
#
|
|
# But you may override this value to any integer value.
|
|
#
|
|
# Note that this value is threads per object-server process, so to
|
|
# compute the total number of IO threads on a node, you must multiply
|
|
# this by the number of object-server processes on the node.
|
|
#
|
|
# eventlet_tpool_num_threads = auto
|
|
|
|
# You can disable REPLICATE and SSYNC handling (default is to allow it). When
|
|
# deploying a cluster with a separate replication network, you'll want multiple
|
|
# object-server processes running: one for client-driven traffic and another
|
|
# for replication traffic. The server handling client-driven traffic may set
|
|
# this to false. If there is only one object-server process, leave this as
|
|
# true.
|
|
# replication_server = true
|
|
#
|
|
# Set to restrict the number of concurrent incoming SSYNC requests
|
|
# Set to 0 for unlimited
|
|
# Note that SSYNC requests are only used by the object reconstructor or the
|
|
# object replicator when configured to use ssync.
|
|
# replication_concurrency = 4
|
|
#
|
|
# Set to restrict the number of concurrent incoming SSYNC requests per
|
|
# device; set to 0 for unlimited requests per device. This can help control
|
|
# I/O to each device. This does not override replication_concurrency described
|
|
# above, so you may need to adjust both parameters depending on your hardware
|
|
# or network capacity.
|
|
# replication_concurrency_per_device = 1
|
|
#
|
|
# Number of seconds to wait for an existing replication device lock before
|
|
# giving up.
|
|
# replication_lock_timeout = 15
|
|
#
|
|
# These next two settings control when the SSYNC subrequest handler will
|
|
# abort an incoming SSYNC attempt. An abort will occur if there are at
|
|
# least threshold number of failures and the value of failures / successes
|
|
# exceeds the ratio. The defaults of 100 and 1.0 means that at least 100
|
|
# failures have to occur and there have to be more failures than successes for
|
|
# an abort to occur.
|
|
# replication_failure_threshold = 100
|
|
# replication_failure_ratio = 1.0
|
|
#
|
|
# Use splice() for zero-copy object GETs. This requires Linux kernel
|
|
# version 3.0 or greater. If you set "splice = yes" but the kernel
|
|
# does not support it, error messages will appear in the object server
|
|
# logs at startup, but your object servers should continue to function.
|
|
#
|
|
# splice = no
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
#
|
|
# When reloading servers with SIGUSR1, workers running with old config/code
|
|
# are allowed some time to finish serving in-flight requests. Use this to
|
|
# configure the grace period (in seconds), after which the reloaded server
|
|
# will issue SIGKILLs to remaining stale workers.
|
|
# stale_worker_timeout = 86400
|
|
|
|
[filter:healthcheck]
|
|
use = egg:swift#healthcheck
|
|
# An optional filesystem path, which if present, will cause the healthcheck
|
|
# URL to return "503 Service Unavailable" with a body of "DISABLED BY FILE"
|
|
# disable_path =
|
|
|
|
[filter:recon]
|
|
use = egg:swift#recon
|
|
#recon_cache_path = /var/cache/swift
|
|
#recon_lock_path = /var/lock
|
|
|
|
[filter:backend_ratelimit]
|
|
use = egg:swift#backend_ratelimit
|
|
# Config options can optionally be loaded from a separate config file. Config
|
|
# options in this section will be used unless the same option is found in the
|
|
# config file, in which case the config file option will be used. See the
|
|
# backend-ratelimit.conf-sample file for details of available config options.
|
|
# backend_ratelimit_conf_path = /etc/swift/backend-ratelimit.conf
|
|
|
|
# The minimum interval between attempts to reload any config file at
|
|
# backend_ratelimit_conf_path while the server is running. A value of 0 means
|
|
# that the file is loaded at start-up but not subsequently reloaded. Note that
|
|
# config options in this section are never reloaded after start-up.
|
|
# config_reload_interval = 60
|
|
|
|
[object-replicator]
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# log_name = object-replicator
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# daemonize = on
|
|
#
|
|
# Time in seconds to wait between replication passes
|
|
# interval = 30.0
|
|
# run_pause is deprecated, use interval instead
|
|
# run_pause = 30.0
|
|
#
|
|
# Number of concurrent replication jobs to run. This is per-process,
|
|
# so replicator_workers=W and concurrency=C will result in W*C
|
|
# replication jobs running at once.
|
|
# concurrency = 1
|
|
#
|
|
# Number of worker processes to use. No matter how big this number is,
|
|
# at most one worker per disk will be used. 0 means no forking; all work
|
|
# is done in the main process.
|
|
# replicator_workers = 0
|
|
#
|
|
# stats_interval = 300.0
|
|
#
|
|
# default is rsync, alternative is ssync
|
|
# sync_method = rsync
|
|
#
|
|
# max duration of a partition rsync
|
|
# rsync_timeout = 900
|
|
#
|
|
# bandwidth limit for rsync in kB/s. 0 means unlimited. rsync 3.2.2 and later
|
|
# accept suffixed values like 10M or 1.5G; see the --bwlimit option for rsync(1)
|
|
# rsync_bwlimit = 0
|
|
#
|
|
# passed to rsync for both io op timeout and connection timeout
|
|
# rsync_io_timeout = 30
|
|
#
|
|
# Allow rsync to compress data which is transmitted to destination node
|
|
# during sync. However, this is applicable only when destination node is in
|
|
# a different region than the local one.
|
|
# NOTE: Objects that are already compressed (for example: .tar.gz, .mp3) might
|
|
# slow down the syncing process.
|
|
# rsync_compress = no
|
|
#
|
|
# Format of the rsync module where the replicator will send data. See
|
|
# etc/rsyncd.conf-sample for some usage examples.
|
|
# rsync_module = {replication_ip}::object
|
|
#
|
|
# node_timeout = <whatever's in the DEFAULT section or 10>
|
|
# max duration of an http request; this is for REPLICATE finalization calls and
|
|
# so should be longer than node_timeout
|
|
# http_timeout = 60
|
|
#
|
|
# attempts to kill all workers if nothing replicates for lockup_timeout seconds
|
|
# lockup_timeout = 1800
|
|
#
|
|
# ring_check_interval = 15.0
|
|
# recon_cache_path = /var/cache/swift
|
|
#
|
|
# By default, per-file rsync transfers are logged at debug if successful and
|
|
# error on failure. During large rebalances (which both increase the number
|
|
# of diskfiles transferred and increases the likelihood of failures), this
|
|
# can overwhelm log aggregation while providing little useful insights.
|
|
# Change this to false to disable per-file logging.
|
|
# log_rsync_transfers = true
|
|
#
|
|
# limits how long rsync error log lines are
|
|
# 0 means to log the entire line
|
|
# rsync_error_log_line_length = 0
|
|
#
|
|
# handoffs_first and handoff_delete are options for a special case
|
|
# such as disk full in the cluster. These two options SHOULD NOT BE
|
|
# CHANGED, except for such an extreme situations. (e.g. disks filled up
|
|
# or are about to fill up. Anyway, DO NOT let your drives fill up)
|
|
# handoffs_first is the flag to replicate handoffs prior to canonical
|
|
# partitions. It allows to force syncing and deleting handoffs quickly.
|
|
# If set to a True value(e.g. "True" or "1"), partitions
|
|
# that are not supposed to be on the node will be replicated first.
|
|
# handoffs_first = False
|
|
#
|
|
# handoff_delete is the number of replicas which are ensured in swift.
|
|
# If the number less than the number of replicas is set, object-replicator
|
|
# could delete local handoffs even if all replicas are not ensured in the
|
|
# cluster. Object-replicator would remove local handoff partition directories
|
|
# after syncing partition when the number of successful responses is greater
|
|
# than or equal to this number. By default(auto), handoff partitions will be
|
|
# removed when it has successfully replicated to all the canonical nodes.
|
|
# handoff_delete = auto
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
[object-reconstructor]
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# Unless otherwise noted, each setting below has the same meaning as described
|
|
# in the [object-replicator] section, however these settings apply to the EC
|
|
# reconstructor
|
|
#
|
|
# log_name = object-reconstructor
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# daemonize = on
|
|
#
|
|
# Time in seconds to wait between reconstruction passes
|
|
# interval = 30.0
|
|
# run_pause is deprecated, use interval instead
|
|
# run_pause = 30.0
|
|
#
|
|
# Maximum number of worker processes to spawn. Each worker will handle a
|
|
# subset of devices. Devices will be assigned evenly among the workers so that
|
|
# workers cycle at similar intervals (which can lead to fewer workers than
|
|
# requested). You can not have more workers than devices. If you have no
|
|
# devices only a single worker is spawned.
|
|
# reconstructor_workers = 0
|
|
#
|
|
# concurrency = 1
|
|
# stats_interval = 300.0
|
|
# node_timeout = 10
|
|
# http_timeout = 60
|
|
# lockup_timeout = 1800
|
|
# ring_check_interval = 15.0
|
|
# recon_cache_path = /var/cache/swift
|
|
#
|
|
# The handoffs_only mode option is for special case emergency situations during
|
|
# rebalance such as disk full in the cluster. This option SHOULD NOT BE
|
|
# CHANGED, except for extreme situations. When handoffs_only mode is enabled
|
|
# the reconstructor will *only* revert fragments from handoff nodes to primary
|
|
# nodes and will not sync primary nodes with neighboring primary nodes. This
|
|
# will force the reconstructor to sync and delete handoffs' fragments more
|
|
# quickly and minimize the time of the rebalance by limiting the number of
|
|
# rebuilds. The handoffs_only option is only for temporary use and should be
|
|
# disabled as soon as the emergency situation has been resolved. When
|
|
# handoffs_only is not set, the deprecated handoffs_first option will be
|
|
# honored as a synonym, but may be ignored in a future release.
|
|
# handoffs_only = False
|
|
#
|
|
# The default strategy for unmounted drives will stage rebuilt data on a
|
|
# handoff node until updated rings are deployed. Because fragments are rebuilt
|
|
# on offset handoffs based on fragment index and the proxy limits how deep it
|
|
# will search for EC frags we restrict how many nodes we'll try. Setting to 0
|
|
# will disable rebuilds to handoffs and only rebuild fragments for unmounted
|
|
# devices to mounted primaries after a ring change.
|
|
# Setting to -1 means "no limit".
|
|
# rebuild_handoff_node_count = 2
|
|
#
|
|
# By default the reconstructor attempts to revert all objects from handoff
|
|
# partitions in a single batch using a single SSYNC request. In exceptional
|
|
# circumstances max_objects_per_revert can be used to temporarily limit the
|
|
# number of objects reverted by each reconstructor revert type job. If more
|
|
# than max_objects_per_revert are available in a sender's handoff partition,
|
|
# the remaining objects will remain in the handoff partition and will not be
|
|
# reverted until the next time the reconstructor visits that handoff partition
|
|
# i.e. with this option set, a single cycle of the reconstructor may not
|
|
# completely revert all handoff partitions. The option has no effect on
|
|
# reconstructor sync type jobs between primary partitions. A value of 0 (the
|
|
# default) means there is no limit.
|
|
# max_objects_per_revert = 0
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
#
|
|
# When upgrading from liberasurecode<=1.5.0, you may want to continue writing
|
|
# legacy CRCs until all nodes are upgraded and capabale of reading fragments
|
|
# with zlib CRCs. liberasurecode>=1.6.2 checks for the environment variable
|
|
# LIBERASURECODE_WRITE_LEGACY_CRC; if set (value doesn't matter), it will use
|
|
# its legacy CRC. Set this option to true or false to ensure the environment
|
|
# variable is or is not set. Leave the option blank or absent to not touch
|
|
# the environment (default). For more information, see
|
|
# https://bugs.launchpad.net/liberasurecode/+bug/1886088
|
|
# write_legacy_ec_crc =
|
|
#
|
|
# When attempting to reconstruct a missing fragment on another node from a
|
|
# fragment on the local node, the reconstructor may fail to fetch sufficient
|
|
# fragments to reconstruct the missing fragment. This may be because most or
|
|
# all of the remote fragments have been deleted, and the local fragment is
|
|
# stale, in which case the reconstructor will never succeed in reconstructing
|
|
# the apparently missing fragment and will log errors. If the object's
|
|
# tombstones have been reclaimed then the stale fragment will never be deleted
|
|
# (see https://bugs.launchpad.net/swift/+bug/1655608). If an operator suspects
|
|
# that stale fragments have been re-introduced to the cluster and is seeing
|
|
# error logs similar to those in the bug report, then the quarantine_threshold
|
|
# option may be set to a value greater than zero. This enables the
|
|
# reconstructor to quarantine the stale fragments when it fails to fetch more
|
|
# than the quarantine_threshold number of fragments (including the stale
|
|
# fragment) during an attempt to reconstruct. For example, setting the
|
|
# quarantine_threshold to 1 would cause a fragment to be quarantined if no
|
|
# other fragments can be fetched. The value may be reset to zero after the
|
|
# reconstructor has run on all affected nodes and the error logs are no longer
|
|
# seen.
|
|
# Note: the quarantine_threshold applies equally to all policies, but for each
|
|
# policy it is effectively capped at (ec_ndata - 1) so that a fragment is never
|
|
# quarantined when sufficient fragments exist to reconstruct the object.
|
|
# quarantine_threshold = 0
|
|
#
|
|
# Fragments are not quarantined until they are older than
|
|
# quarantine_age, which defaults to the value of reclaim_age.
|
|
# quarantine_age =
|
|
#
|
|
# Sets the maximum number of nodes to which requests will be made before
|
|
# quarantining a fragment. You can use '* replicas' at the end to have it use
|
|
# the number given times the number of replicas for the ring being used for the
|
|
# requests. The minimum number of nodes to which requests are made is the
|
|
# number of replicas for the policy minus 1 (the node on which the fragment is
|
|
# to be rebuilt). The minimum is only exceeded if request_node_count is
|
|
# greater, and only for the purposes of quarantining.
|
|
# request_node_count = 2 * replicas
|
|
|
|
[object-updater]
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# log_name = object-updater
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# interval = 300.0
|
|
# node_timeout = <whatever's in the DEFAULT section or 10>
|
|
#
|
|
# updater_workers controls how many processes the object updater will
|
|
# spawn, while concurrency controls how many async_pending records
|
|
# each updater process will operate on at any one time. With
|
|
# concurrency=C and updater_workers=W, there will be up to W*C
|
|
# async_pending records being processed at once.
|
|
# concurrency = 8
|
|
# updater_workers = 1
|
|
#
|
|
# Send at most this many object updates per second
|
|
# objects_per_second = 50
|
|
#
|
|
# Send at most this many object updates per bucket per second. The value must
|
|
# be a float greater than or equal to 0. Set to 0 for unlimited.
|
|
# max_objects_per_container_per_second = 0
|
|
#
|
|
# The per_container ratelimit implementation uses a hashring to constrain
|
|
# memory requirements. Orders of magnitude more buckets will use (nominally)
|
|
# more memory, but will ratelimit smaller groups of containers. The value must
|
|
# be an integer greater than 0.
|
|
# per_container_ratelimit_buckets = 1000
|
|
#
|
|
# Updates that cannot be sent due to per-container rate-limiting may be
|
|
# deferred and re-tried at the end of the updater cycle. This option constrains
|
|
# the size of the in-memory data structure used to store deferred updates.
|
|
# Must be an integer value greater than or equal to 0.
|
|
# max_deferred_updates = 10000
|
|
#
|
|
# Maximum number of oldest async pending timestamps to track for each
|
|
# account-container pair.
|
|
# async_tracker_max_entries = 100
|
|
# Maximum number of oldest async pending timestamps to dump to recon cache.
|
|
# async_tracker_dump_count = 5
|
|
#
|
|
# slowdown will sleep that amount between objects. Deprecated; use
|
|
# objects_per_second instead.
|
|
# slowdown = 0.01
|
|
#
|
|
# Log stats (at INFO level) every report_interval seconds. This
|
|
# logging is per-process, so with concurrency > 1, the logs will
|
|
# contain one stats log per worker process every report_interval
|
|
# seconds.
|
|
# report_interval = 300.0
|
|
#
|
|
# recon_cache_path = /var/cache/swift
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
[object-auditor]
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# log_name = object-auditor
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# Time in seconds to wait between auditor passes
|
|
# interval = 30.0
|
|
#
|
|
# You can set the disk chunk size that the auditor uses making it larger if
|
|
# you like for more efficient local auditing of larger objects
|
|
# disk_chunk_size = 65536
|
|
# files_per_second = 20
|
|
# concurrency = 1
|
|
# bytes_per_second = 10000000
|
|
# log_time = 3600
|
|
# zero_byte_files_per_second = 50
|
|
# recon_cache_path = /var/cache/swift
|
|
|
|
# Takes a comma separated list of ints. If set, the object auditor will
|
|
# increment a counter for every object whose size is <= to the given break
|
|
# points and report the result after a full scan.
|
|
# object_size_stats =
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
# The auditor will cleanup old rsync tempfiles after they are "old
|
|
# enough" to delete. You can configure the time elapsed in seconds
|
|
# before rsync tempfiles will be unlinked, or the default value of
|
|
# "auto" try to use object-replicator's rsync_timeout + 900 and fallback
|
|
# to 86400 (1 day).
|
|
# rsync_tempfile_timeout = auto
|
|
|
|
# A comma-separated list of watcher entry points. This lets operators
|
|
# programmatically see audited objects.
|
|
#
|
|
# The entry point group name is "swift.object_audit_watcher". If your
|
|
# setup.py has something like this:
|
|
#
|
|
# entry_points={'swift.object_audit_watcher': [
|
|
# 'some_watcher = some_module:Watcher']}
|
|
#
|
|
# then you would enable it with "watchers = some_package#some_watcher".
|
|
# For example, the built-in reference implementation is enabled as
|
|
# "watchers = swift#dark_data".
|
|
#
|
|
# watchers =
|
|
|
|
# Watcher-specific parameters can be added in a section with a name
|
|
# [object-auditor:watcher:some_package#some_watcher]. The following
|
|
# example uses the built-in reference watcher.
|
|
#
|
|
# [object-auditor:watcher:swift#dark_data]
|
|
#
|
|
# Action type can be 'log' (default), 'delete', or 'quarantine'.
|
|
# action=log
|
|
#
|
|
# The watcher ignores the objects younger than certain minimum age.
|
|
# This prevents spurious actions upon fresh objects while container
|
|
# listings eventually settle.
|
|
# grace_age=604800
|
|
|
|
[object-expirer]
|
|
# If this true, this expirer will execute tasks from legacy expirer task queue,
|
|
# at least one object server should run with dequeue_from_legacy = true
|
|
# dequeue_from_legacy = false
|
|
#
|
|
# Note: Be careful not to enable ``dequeue_from_legacy`` on too many expirers
|
|
# as all legacy tasks are stored in a single hidden account and the same hidden
|
|
# containers. On a large cluster one may inadvertently make the
|
|
# acccount/container server for the hidden too busy.
|
|
#
|
|
# Note: the processes and process options can only be used in conjunction with
|
|
# notes using `dequeue_from_legacy = true`. These options are ignored on nodes
|
|
# with `dequeue_from_legacy = false`.
|
|
#
|
|
# processes is how many parts to divide the legacy work into, one part per
|
|
# process that will be doing the work
|
|
# processes set 0 means that a single legacy process will be doing all the work
|
|
# processes can also be specified on the command line and will override the
|
|
# config value
|
|
# processes = 0
|
|
#
|
|
# process is which of the parts a particular legacy process will work on
|
|
# process can also be specified on the command line and will override the config
|
|
# value
|
|
# process is "zero based", if you want to use 3 processes, you should run
|
|
# processes with process set to 0, 1, and 2
|
|
# process = 0
|
|
#
|
|
# internal_client_conf_path = /etc/swift/internal-client.conf
|
|
#
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# log_name = object-expirer
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# interval = 300.0
|
|
#
|
|
# report_interval = 300.0
|
|
#
|
|
# request_tries is the number of times the expirer's internal client will
|
|
# attempt any given request in the event of failure. The default is 3.
|
|
# request_tries = 3
|
|
#
|
|
# concurrency is the level of concurrency to use to do the work, this value
|
|
# must be set to at least 1
|
|
# concurrency = 1
|
|
#
|
|
# deletes can be ratelimited to prevent the expirer from overwhelming the cluster
|
|
# tasks_per_second = 50.0
|
|
#
|
|
# The expirer will re-attempt expiring if the source object is not available
|
|
# up to reclaim_age seconds before it gives up and deletes the entry in the
|
|
# queue.
|
|
# reclaim_age = 604800
|
|
|
|
# Number of tasks objects to cache before processing. With many nodes it may
|
|
# take some time to fill a larger cache_size but may also have a better chance
|
|
# to distribute DELETEs to multiple target containers.
|
|
# round_robin_task_cache_size = 100000
|
|
|
|
# recon_cache_path = /var/cache/swift
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are realtime, best-effort and idle. I/O niceness
|
|
# priority is a number which goes from 0 to 7. The higher the value, the lower
|
|
# the I/O priority of the process. Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
#
|
|
# The expirer can delay the reaping of expired objects on disk (and in
|
|
# container listings) with an account level or container level delay_reaping
|
|
# time.
|
|
# After the delay_reaping time has passed objects will be reaped as normal.
|
|
# You may configure this delay_reaping value in seconds with dynamic config
|
|
# option names prefixed with delay_reaping_<ACCT> for account level delays
|
|
# and delay_reaping_<ACCT>/<CNTR> for container level delays.
|
|
# Special characters in <ACCT> or <CNTR> should be quoted.
|
|
# The delay_reaping value should be a float value greater than or equal to
|
|
# zero.
|
|
# A container level delay_reaping does not require an account level
|
|
# delay_reaping but overrides the account level delay_reaping for the same
|
|
# account if it exists.
|
|
# For example:
|
|
# delay_reaping_AUTH_test = 300.0
|
|
# delay_reaping_AUTH_test2 = 86400.0
|
|
# delay_reaping_AUTH_test/test = 400.0
|
|
# delay_reaping_AUTH_test/test2 = 600.0
|
|
# delay_reaping_AUTH_test/special%0Achars%3Dshould%20be%20quoted
|
|
# N.B. By default no delay_reaping value is configured for any accounts or
|
|
# containers.
|
|
|
|
# Note: Put it at the beginning of the pipleline to profile all middleware. But
|
|
# it is safer to put this after healthcheck. Not intended for production
|
|
# environments!
|
|
[filter:xprofile]
|
|
use = egg:swift#xprofile
|
|
# This option enable you to switch profilers which should inherit from python
|
|
# standard profiler. Currently the supported value can be 'cProfile',
|
|
# 'eventlet.green.profile' etc.
|
|
# profile_module = eventlet.green.profile
|
|
#
|
|
# This prefix will be used to combine process ID and timestamp to name the
|
|
# profile data file. Make sure the executing user has permission to write
|
|
# into this path (missing path segments will be created, if necessary).
|
|
# If you enable profiling in more than one type of daemon, you must override
|
|
# it with an unique value like: /var/log/swift/profile/object.profile
|
|
# log_filename_prefix = /tmp/log/swift/profile/default.profile
|
|
#
|
|
# the profile data will be dumped to local disk based on above naming rule
|
|
# in this interval.
|
|
# dump_interval = 5.0
|
|
#
|
|
# Be careful, this option will enable profiler to dump data into the file with
|
|
# time stamp which means there will be lots of files piled up in the directory.
|
|
# dump_timestamp = false
|
|
#
|
|
# This is the path of the URL to access the mini web UI.
|
|
# path = /__profile__
|
|
#
|
|
# Clear the data when the wsgi server shutdown.
|
|
# flush_at_shutdown = false
|
|
#
|
|
# unwind the iterator of applications
|
|
# unwind = false
|
|
|
|
[object-relinker]
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# log_name = object-relinker
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# Start up to this many sub-processes to process disks in parallel. Each disk
|
|
# will be handled by at most one child process. By default, one process is
|
|
# spawned per disk.
|
|
# workers = auto
|
|
#
|
|
# Target this many relinks/cleanups per second for each worker, to reduce the
|
|
# likelihood that the added I/O from a partition-power increase impacts
|
|
# client traffic. Use zero for unlimited.
|
|
# files_per_second = 0.0
|
|
#
|
|
# stats_interval = 300.0
|
|
# recon_cache_path = /var/cache/swift
|