f64c00b00a
The object updater has five different stats, but its logging only told you two of them (successes and failures), and it only told you after finishing all the async_pendings for a device. If you have a cluster that's been sick and has millions upon millions of async_pendings laying around, then your object-updaters are frustratingly silent. I've seen one cluster with around 8 million async_pendings per disk where the object-updaters only emitted stats every 12 hours. Yes, if you have StatsD logging set up properly, you can go look at your graphs and get real-time feedback on what it's doing. If you don't have that, all you get is a frustrating silence. Now, the object updater tells you all of its stats (successes, failures, quarantines due to bad pickles, unlinks, and errors), and it tells you incremental progress every five minutes. The logging at the end of a pass remains and has been expanded to also include all stats. Also included is a small change to what counts as an error: unmounted drives no longer do. The goal is that only abnormal things count as errors, like permission problems, malformed filenames, and so on. These are things that should never happen, but if they do, may require operator intervention. Drives fail, so logging an error upon encountering an unmounted drive is not useful. Change-Id: Idbddd507f0b633d14dffb7a9834fce93a10359ab
471 lines
18 KiB
Plaintext
471 lines
18 KiB
Plaintext
[DEFAULT]
|
|
# bind_ip = 0.0.0.0
|
|
bind_port = 6200
|
|
# bind_timeout = 30
|
|
# backlog = 4096
|
|
# user = swift
|
|
# swift_dir = /etc/swift
|
|
# devices = /srv/node
|
|
# mount_check = true
|
|
# disable_fallocate = false
|
|
# expiring_objects_container_divisor = 86400
|
|
# expiring_objects_account_name = expiring_objects
|
|
#
|
|
# Use an integer to override the number of pre-forked processes that will
|
|
# accept connections. NOTE: if servers_per_port is set, this setting is
|
|
# ignored.
|
|
# workers = auto
|
|
#
|
|
# Make object-server run this many worker processes per unique port of "local"
|
|
# ring devices across all storage policies. The default value of 0 disables this
|
|
# feature.
|
|
# servers_per_port = 0
|
|
#
|
|
# Maximum concurrent requests per worker
|
|
# max_clients = 1024
|
|
#
|
|
# You can specify default log routing here if you want:
|
|
# log_name = swift
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
# The following caps the length of log lines to the value given; no limit if
|
|
# set to 0, the default.
|
|
# log_max_line_length = 0
|
|
#
|
|
# comma separated list of functions to call to setup custom log handlers.
|
|
# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
|
|
# adapted_logger
|
|
# log_custom_handlers =
|
|
#
|
|
# If set, log_udp_host will override log_address
|
|
# log_udp_host =
|
|
# log_udp_port = 514
|
|
#
|
|
# You can enable StatsD logging here:
|
|
# log_statsd_host =
|
|
# log_statsd_port = 8125
|
|
# log_statsd_default_sample_rate = 1.0
|
|
# log_statsd_sample_rate_factor = 1.0
|
|
# log_statsd_metric_prefix =
|
|
#
|
|
# eventlet_debug = false
|
|
#
|
|
# You can set fallocate_reserve to the number of bytes or percentage of disk
|
|
# space you'd like fallocate to reserve, whether there is space for the given
|
|
# file size or not. Percentage will be used if the value ends with a '%'.
|
|
# fallocate_reserve = 1%
|
|
#
|
|
# Time to wait while attempting to connect to another backend node.
|
|
# conn_timeout = 0.5
|
|
# Time to wait while sending each chunk of data to another backend node.
|
|
# node_timeout = 3
|
|
# Time to wait while sending a container update on object update.
|
|
# container_update_timeout = 1.0
|
|
# Time to wait while receiving each chunk of data from a client or another
|
|
# backend node.
|
|
# client_timeout = 60
|
|
#
|
|
# network_chunk_size = 65536
|
|
# disk_chunk_size = 65536
|
|
#
|
|
# Reclamation of tombstone files is performed primarily by the replicator and
|
|
# the reconstructor but the object-server and object-auditor also reference
|
|
# this value - it should be the same for all object services in the cluster,
|
|
# and not greater than the container services reclaim_age
|
|
# reclaim_age = 604800
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
[pipeline:main]
|
|
pipeline = healthcheck recon object-server
|
|
|
|
[app:object-server]
|
|
use = egg:swift#object
|
|
# You can override the default log routing for this app here:
|
|
# set log_name = object-server
|
|
# set log_facility = LOG_LOCAL0
|
|
# set log_level = INFO
|
|
# set log_requests = true
|
|
# set log_address = /dev/log
|
|
#
|
|
# max_upload_time = 86400
|
|
#
|
|
# slow is the total amount of seconds an object PUT/DELETE request takes at
|
|
# least. If it is faster, the object server will sleep this amount of time minus
|
|
# the already passed transaction time. This is only useful for simulating slow
|
|
# devices on storage nodes during testing and development.
|
|
# slow = 0
|
|
#
|
|
# Objects smaller than this are not evicted from the buffercache once read
|
|
# keep_cache_size = 5242880
|
|
#
|
|
# If true, objects for authenticated GET requests may be kept in buffer cache
|
|
# if small enough
|
|
# keep_cache_private = false
|
|
#
|
|
# on PUTs, sync data every n MB
|
|
# mb_per_sync = 512
|
|
#
|
|
# Comma separated list of headers that can be set in metadata on an object.
|
|
# This list is in addition to X-Object-Meta-* headers and cannot include
|
|
# Content-Type, etag, Content-Length, or deleted
|
|
# allowed_headers = Content-Disposition, Content-Encoding, X-Delete-At, X-Object-Manifest, X-Static-Large-Object
|
|
#
|
|
# auto_create_account_prefix = .
|
|
#
|
|
|
|
# The number of threads in eventlet's thread pool. Most IO will occur
|
|
# in the object server's main thread, but certain "heavy" IO
|
|
# operations will occur in separate IO threads, managed by eventlet.
|
|
#
|
|
# The default value is auto, whose actual value is dependent on the
|
|
# servers_per_port value:
|
|
#
|
|
# - When servers_per_port is zero, the default value of
|
|
# eventlet_tpool_num_threads is empty, which uses eventlet's default
|
|
# (currently 20 threads).
|
|
#
|
|
# - When servers_per_port is nonzero, the default value of
|
|
# eventlet_tpool_num_threads is 1.
|
|
#
|
|
# But you may override this value to any integer value.
|
|
#
|
|
# Note that this value is threads per object-server process, so to
|
|
# compute the total number of IO threads on a node, you must multiply
|
|
# this by the number of object-server processes on the node.
|
|
#
|
|
# eventlet_tpool_num_threads = auto
|
|
|
|
# Configure parameter for creating specific server
|
|
# To handle all verbs, including replication verbs, do not specify
|
|
# "replication_server" (this is the default). To only handle replication,
|
|
# set to a True value (e.g. "True" or "1"). To handle only non-replication
|
|
# verbs, set to "False". Unless you have a separate replication network, you
|
|
# should not specify any value for "replication_server".
|
|
# replication_server = false
|
|
#
|
|
# Set to restrict the number of concurrent incoming SSYNC requests
|
|
# Set to 0 for unlimited
|
|
# Note that SSYNC requests are only used by the object reconstructor or the
|
|
# object replicator when configured to use ssync.
|
|
# replication_concurrency = 4
|
|
#
|
|
# Set to restrict the number of concurrent incoming SSYNC requests per
|
|
# device; set to 0 for unlimited requests per device. This can help control
|
|
# I/O to each device. This does not override replication_concurrency described
|
|
# above, so you may need to adjust both parameters depending on your hardware
|
|
# or network capacity.
|
|
# replication_concurrency_per_device = 1
|
|
#
|
|
# Number of seconds to wait for an existing replication device lock before
|
|
# giving up.
|
|
# replication_lock_timeout = 15
|
|
#
|
|
# These next two settings control when the SSYNC subrequest handler will
|
|
# abort an incoming SSYNC attempt. An abort will occur if there are at
|
|
# least threshold number of failures and the value of failures / successes
|
|
# exceeds the ratio. The defaults of 100 and 1.0 means that at least 100
|
|
# failures have to occur and there have to be more failures than successes for
|
|
# an abort to occur.
|
|
# replication_failure_threshold = 100
|
|
# replication_failure_ratio = 1.0
|
|
#
|
|
# Use splice() for zero-copy object GETs. This requires Linux kernel
|
|
# version 3.0 or greater. If you set "splice = yes" but the kernel
|
|
# does not support it, error messages will appear in the object server
|
|
# logs at startup, but your object servers should continue to function.
|
|
#
|
|
# splice = no
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
[filter:healthcheck]
|
|
use = egg:swift#healthcheck
|
|
# An optional filesystem path, which if present, will cause the healthcheck
|
|
# URL to return "503 Service Unavailable" with a body of "DISABLED BY FILE"
|
|
# disable_path =
|
|
|
|
[filter:recon]
|
|
use = egg:swift#recon
|
|
#recon_cache_path = /var/cache/swift
|
|
#recon_lock_path = /var/lock
|
|
|
|
[object-replicator]
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# log_name = object-replicator
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# daemonize = on
|
|
#
|
|
# Time in seconds to wait between replication passes
|
|
# interval = 30
|
|
# run_pause is deprecated, use interval instead
|
|
# run_pause = 30
|
|
#
|
|
# concurrency = 1
|
|
# stats_interval = 300
|
|
#
|
|
# default is rsync, alternative is ssync
|
|
# sync_method = rsync
|
|
#
|
|
# max duration of a partition rsync
|
|
# rsync_timeout = 900
|
|
#
|
|
# bandwidth limit for rsync in kB/s. 0 means unlimited
|
|
# rsync_bwlimit = 0
|
|
#
|
|
# passed to rsync for io op timeout
|
|
# rsync_io_timeout = 30
|
|
#
|
|
# Allow rsync to compress data which is transmitted to destination node
|
|
# during sync. However, this is applicable only when destination node is in
|
|
# a different region than the local one.
|
|
# NOTE: Objects that are already compressed (for example: .tar.gz, .mp3) might
|
|
# slow down the syncing process.
|
|
# rsync_compress = no
|
|
#
|
|
# Format of the rsync module where the replicator will send data. See
|
|
# etc/rsyncd.conf-sample for some usage examples.
|
|
# rsync_module = {replication_ip}::object
|
|
#
|
|
# node_timeout = <whatever's in the DEFAULT section or 10>
|
|
# max duration of an http request; this is for REPLICATE finalization calls and
|
|
# so should be longer than node_timeout
|
|
# http_timeout = 60
|
|
#
|
|
# attempts to kill all workers if nothing replicates for lockup_timeout seconds
|
|
# lockup_timeout = 1800
|
|
#
|
|
# ring_check_interval = 15
|
|
# recon_cache_path = /var/cache/swift
|
|
#
|
|
# limits how long rsync error log lines are
|
|
# 0 means to log the entire line
|
|
# rsync_error_log_line_length = 0
|
|
#
|
|
# handoffs_first and handoff_delete are options for a special case
|
|
# such as disk full in the cluster. These two options SHOULD NOT BE
|
|
# CHANGED, except for such an extreme situations. (e.g. disks filled up
|
|
# or are about to fill up. Anyway, DO NOT let your drives fill up)
|
|
# handoffs_first is the flag to replicate handoffs prior to canonical
|
|
# partitions. It allows to force syncing and deleting handoffs quickly.
|
|
# If set to a True value(e.g. "True" or "1"), partitions
|
|
# that are not supposed to be on the node will be replicated first.
|
|
# handoffs_first = False
|
|
#
|
|
# handoff_delete is the number of replicas which are ensured in swift.
|
|
# If the number less than the number of replicas is set, object-replicator
|
|
# could delete local handoffs even if all replicas are not ensured in the
|
|
# cluster. Object-replicator would remove local handoff partition directories
|
|
# after syncing partition when the number of successful responses is greater
|
|
# than or equal to this number. By default(auto), handoff partitions will be
|
|
# removed when it has successfully replicated to all the canonical nodes.
|
|
# handoff_delete = auto
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
[object-reconstructor]
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# Unless otherwise noted, each setting below has the same meaning as described
|
|
# in the [object-replicator] section, however these settings apply to the EC
|
|
# reconstructor
|
|
#
|
|
# log_name = object-reconstructor
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# daemonize = on
|
|
#
|
|
# Time in seconds to wait between reconstruction passes
|
|
# interval = 30
|
|
# run_pause is deprecated, use interval instead
|
|
# run_pause = 30
|
|
#
|
|
# Maximum number of worker processes to spawn. Each worker will handle a
|
|
# subset of devices. Devices will be assigned evenly among the workers so that
|
|
# workers cycle at similar intervals (which can lead to fewer workers than
|
|
# requested). You can not have more workers than devices. If you have no
|
|
# devices only a single worker is spawned.
|
|
# reconstructor_workers = 0
|
|
#
|
|
# concurrency = 1
|
|
# stats_interval = 300
|
|
# node_timeout = 10
|
|
# http_timeout = 60
|
|
# lockup_timeout = 1800
|
|
# ring_check_interval = 15
|
|
# recon_cache_path = /var/cache/swift
|
|
# The handoffs_only mode option is for special case emergency situations during
|
|
# rebalance such as disk full in the cluster. This option SHOULD NOT BE
|
|
# CHANGED, except for extreme situations. When handoffs_only mode is enabled
|
|
# the reconstructor will *only* revert fragments from handoff nodes to primary
|
|
# nodes and will not sync primary nodes with neighboring primary nodes. This
|
|
# will force the reconstructor to sync and delete handoffs' fragments more
|
|
# quickly and minimize the time of the rebalance by limiting the number of
|
|
# rebuilds. The handoffs_only option is only for temporary use and should be
|
|
# disabled as soon as the emergency situation has been resolved. When
|
|
# handoffs_only is not set, the deprecated handoffs_first option will be
|
|
# honored as a synonym, but may be ignored in a future release.
|
|
# handoffs_only = False
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
[object-updater]
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# log_name = object-updater
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# interval = 300
|
|
# concurrency = 1
|
|
# node_timeout = <whatever's in the DEFAULT section or 10>
|
|
#
|
|
# Send at most this many object updates per second
|
|
# objects_per_second = 50
|
|
#
|
|
# slowdown will sleep that amount between objects. Deprecated; use
|
|
# objects_per_second instead.
|
|
# slowdown = 0.01
|
|
#
|
|
# Log stats (at INFO level) every report_interval seconds. This
|
|
# logging is per-process, so with concurrency > 1, the logs will
|
|
# contain one stats log per worker process every report_interval
|
|
# seconds.
|
|
# report_interval = 300
|
|
#
|
|
# recon_cache_path = /var/cache/swift
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
[object-auditor]
|
|
# You can override the default log routing for this app here (don't use set!):
|
|
# log_name = object-auditor
|
|
# log_facility = LOG_LOCAL0
|
|
# log_level = INFO
|
|
# log_address = /dev/log
|
|
#
|
|
# Time in seconds to wait between auditor passes
|
|
# interval = 30
|
|
#
|
|
# You can set the disk chunk size that the auditor uses making it larger if
|
|
# you like for more efficient local auditing of larger objects
|
|
# disk_chunk_size = 65536
|
|
# files_per_second = 20
|
|
# concurrency = 1
|
|
# bytes_per_second = 10000000
|
|
# log_time = 3600
|
|
# zero_byte_files_per_second = 50
|
|
# recon_cache_path = /var/cache/swift
|
|
|
|
# Takes a comma separated list of ints. If set, the object auditor will
|
|
# increment a counter for every object whose size is <= to the given break
|
|
# points and report the result after a full scan.
|
|
# object_size_stats =
|
|
#
|
|
# You can set scheduling priority of processes. Niceness values range from -20
|
|
# (most favorable to the process) to 19 (least favorable to the process).
|
|
# nice_priority =
|
|
#
|
|
# You can set I/O scheduling class and priority of processes. I/O niceness
|
|
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
|
|
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
|
|
# 0 to 7. The higher the value, the lower the I/O priority of the process.
|
|
# Work only with ionice_class.
|
|
# ionice_class =
|
|
# ionice_priority =
|
|
|
|
# The auditor will cleanup old rsync tempfiles after they are "old
|
|
# enough" to delete. You can configure the time elapsed in seconds
|
|
# before rsync tempfiles will be unlinked, or the default value of
|
|
# "auto" try to use object-replicator's rsync_timeout + 900 and fallback
|
|
# to 86400 (1 day).
|
|
# rsync_tempfile_timeout = auto
|
|
|
|
# Note: Put it at the beginning of the pipleline to profile all middleware. But
|
|
# it is safer to put this after healthcheck.
|
|
[filter:xprofile]
|
|
use = egg:swift#xprofile
|
|
# This option enable you to switch profilers which should inherit from python
|
|
# standard profiler. Currently the supported value can be 'cProfile',
|
|
# 'eventlet.green.profile' etc.
|
|
# profile_module = eventlet.green.profile
|
|
#
|
|
# This prefix will be used to combine process ID and timestamp to name the
|
|
# profile data file. Make sure the executing user has permission to write
|
|
# into this path (missing path segments will be created, if necessary).
|
|
# If you enable profiling in more than one type of daemon, you must override
|
|
# it with an unique value like: /var/log/swift/profile/object.profile
|
|
# log_filename_prefix = /tmp/log/swift/profile/default.profile
|
|
#
|
|
# the profile data will be dumped to local disk based on above naming rule
|
|
# in this interval.
|
|
# dump_interval = 5.0
|
|
#
|
|
# Be careful, this option will enable profiler to dump data into the file with
|
|
# time stamp which means there will be lots of files piled up in the directory.
|
|
# dump_timestamp = false
|
|
#
|
|
# This is the path of the URL to access the mini web UI.
|
|
# path = /__profile__
|
|
#
|
|
# Clear the data when the wsgi server shutdown.
|
|
# flush_at_shutdown = false
|
|
#
|
|
# unwind the iterator of applications
|
|
# unwind = false
|