merging in stats stuff

2010-10-13 11:53:07 -07:00 · 2010-10-13 11:53:07 -07:00 · 5ec2003d5d
commit 5ec2003d5d
parent a6251e8c87 a938d2c6ab
41 changed files with 2699 additions and 67 deletions
--- a/bin/st
+++ b/bin/st
@ -1132,6 +1132,11 @@ def st_stat(options, args):
    if not args:
        try:
            headers = conn.head_account()
+            if options.verbose > 1:
+                options.print_queue.put('''
+StorageURL: %s
+Auth Token: %s
+'''.strip('\n') % (conn.url, conn.token))
            container_count = int(headers.get('x-account-container-count', 0))
            object_count = int(headers.get('x-account-object-count', 0))
            bytes_used = int(headers.get('x-account-bytes-used', 0))
@ -1397,8 +1402,10 @@ Example:
 '''.strip('\n') % globals())
    parser.add_option('-s', '--snet', action='store_true', dest='snet',
                      default=False, help='Use SERVICENET internal network')
-    parser.add_option('-q', '--quiet', action='store_false', dest='verbose',
-                      default=True, help='Suppress status output')
+    parser.add_option('-v', '--verbose', action='count', dest='verbose',
+                      default=1, help='Print more info')
+    parser.add_option('-q', '--quiet', action='store_const', dest='verbose',
+                      const=0, default=1, help='Suppress status output')
    parser.add_option('-a', '--all', action='store_true', dest='yes_all',
                      default=False, help='Indicate that you really want the '
                      'whole account for commands that require --all in such '
--- a/bin/swift-account-stats-logger
+++ b/bin/swift-account-stats-logger
@ -0,0 +1,27 @@
+#!/usr/bin/python
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+from swift.stats.account_stats import AccountStat
+from swift.common import utils
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print "Usage: swift-account-stats-logger CONFIG_FILE"
+        sys.exit()
+    stats_conf = utils.readconf(sys.argv[1], 'log-processor-stats')
+    stats = AccountStat(stats_conf).run(once=True)
--- a/bin/swift-bench
+++ b/bin/swift-bench
@ -0,0 +1,132 @@
+#!/usr/bin/python
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import sys
+import signal
+import uuid
+from optparse import OptionParser
+
+from swift.common.bench import BenchController
+from swift.common.utils import readconf, NamedLogger
+
+# The defaults should be sufficient to run swift-bench on a SAIO
+CONF_DEFAULTS = {
+    'auth': '',
+    'user': '',
+    'key': '',
+    'object_sources': '',
+    'put_concurrency': '10',
+    'get_concurrency': '10',
+    'del_concurrency': '10',
+    'concurrency': '',
+    'object_size': '1',
+    'num_objects': '1000',
+    'num_gets': '10000',
+    'delete': 'yes',
+    'container_name': uuid.uuid4().hex,
+    'use_proxy': 'yes',
+    'url': '',
+    'account': '',
+    'devices': 'sdb1',
+    'log_level': 'INFO',
+    'timeout': '10',
+    }
+
+SAIO_DEFAULTS = {
+    'auth': 'http://saio:11000/v1.0',
+    'user': 'test:tester',
+    'key': 'testing',
+    }
+
+if __name__ == '__main__':
+    usage = "usage: %prog [OPTIONS] [CONF_FILE]"
+    usage += """\n\nConf file with SAIO defaults:
+
+    [bench]
+    auth = http://saio:11000/v1.0
+    user = test:tester
+    key = testing
+    concurrency = 10
+    object_size = 1
+    num_objects = 1000
+    num_gets = 10000
+    delete = yes
+    """
+    parser = OptionParser(usage=usage)
+    parser.add_option('', '--saio', dest='saio', action='store_true',
+                      default=False, help='Run benchmark with SAIO defaults')
+    parser.add_option('-A', '--auth', dest='auth',
+                      help='URL for obtaining an auth token')
+    parser.add_option('-U', '--user', dest='user',
+                      help='User name for obtaining an auth token')
+    parser.add_option('-K', '--key', dest='key',
+                      help='Key for obtaining an auth token')
+    parser.add_option('-u', '--url', dest='url',
+                      help='Storage URL')
+    parser.add_option('-c', '--concurrency', dest='concurrency',
+                      help='Number of concurrent connections to use')
+    parser.add_option('-s', '--object-size', dest='object_size',
+                      help='Size of objects to PUT (in bytes)')
+    parser.add_option('-n', '--num-objects', dest='num_objects',
+                      help='Number of objects to PUT')
+    parser.add_option('-g', '--num-gets', dest='num_gets',
+                      help='Number of GET operations to perform')
+    parser.add_option('-x', '--no-delete', dest='delete', action='store_false',
+                      help='If set, will not delete the objects created')
+
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit(1)
+    options, args = parser.parse_args()
+    if options.saio:
+        CONF_DEFAULTS.update(SAIO_DEFAULTS)
+    if args:
+        conf = args[0]
+        if not os.path.exists(conf):
+            sys.exit("No such conf file: %s" % conf)
+        conf = readconf(conf, 'bench', log_name='swift-bench',
+            defaults=CONF_DEFAULTS)
+    else:
+        conf = CONF_DEFAULTS
+    parser.set_defaults(**conf)
+    options, _ = parser.parse_args()
+    if options.concurrency is not '':
+        options.put_concurrency = options.concurrency
+        options.get_concurrency = options.concurrency
+        options.del_concurrency = options.concurrency
+
+    def sigterm(signum, frame):
+        sys.exit('Termination signal received.')
+    signal.signal(signal.SIGTERM, sigterm)
+
+    logger = logging.getLogger()
+    logger.setLevel({
+        'debug': logging.DEBUG,
+        'info': logging.INFO,
+        'warning': logging.WARNING,
+        'error': logging.ERROR,
+        'critical': logging.CRITICAL}.get(
+            options.log_level.lower(), logging.INFO))
+    loghandler = logging.StreamHandler()
+    logformat = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
+    loghandler.setFormatter(logformat)
+    logger.addHandler(loghandler)
+    logger = NamedLogger(logger, 'swift-bench')
+
+    controller = BenchController(logger, options)
+    controller.run()
--- a/bin/swift-log-stats-collector
+++ b/bin/swift-log-stats-collector
@ -0,0 +1,27 @@
+#!/usr/bin/python
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+from swift.stats.log_processor import LogProcessorDaemon
+from swift.common import utils
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print "Usage: swift-log-stats-collector CONFIG_FILE"
+        sys.exit()
+    conf = utils.readconf(sys.argv[1], log_name='log-stats-collector')
+    stats = LogProcessorDaemon(conf).run(once=True)
--- a/bin/swift-log-uploader
+++ b/bin/swift-log-uploader
@ -0,0 +1,31 @@
+#!/usr/bin/python
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+from swift.stats.log_uploader import LogUploader
+from swift.common import utils
+
+if __name__ == '__main__':
+    if len(sys.argv) < 3:
+        print "Usage: swift-log-uploader CONFIG_FILE plugin"
+        sys.exit()
+    uploader_conf = utils.readconf(sys.argv[1], 'log-processor')
+    plugin = sys.argv[2]
+    section_name = 'log-processor-%s' % plugin
+    plugin_conf = utils.readconf(sys.argv[1], section_name)
+    uploader_conf.update(plugin_conf)
+    uploader = LogUploader(uploader_conf, plugin).run(once=True)
--- a/doc/source/deployment_guide.rst
+++ b/doc/source/deployment_guide.rst
@ -47,7 +47,7 @@ If you need more throughput to either Account or Container Services, they may
 each be deployed to their own servers. For example you might use faster (but
 more expensive) SAS or even SSD drives to get faster disk I/O to the databases.

-Load balancing and network design is left as an excercise to the reader,
+Load balancing and network design is left as an exercise to the reader,
 but this is a very important part of the cluster, so time should be spent
 designing the network for a Swift cluster.

@ -59,7 +59,7 @@ Preparing the Ring

 The first step is to determine the number of partitions that will be in the
 ring. We recommend that there be a minimum of 100 partitions per drive to
-insure even distribution accross the drives. A good starting point might be
+insure even distribution across the drives. A good starting point might be
 to figure out the maximum number of drives the cluster will contain, and then
 multiply by 100, and then round up to the nearest power of two.

@ -154,8 +154,8 @@ Option              Default     Description
 ------------------  ----------  ---------------------------------------------
 swift_dir           /etc/swift  Swift configuration directory
 devices             /srv/node   Parent directory of where devices are mounted
-mount_check         true        Weather or not check if the devices are
-                                mounted to prevent accidently writing
+mount_check         true        Whether or not check if the devices are
+                                mounted to prevent accidentally writing
                                to the root device
 bind_ip             0.0.0.0     IP Address for server to bind to
 bind_port           6000        Port for server to bind to
@ -173,7 +173,7 @@ use                                paste.deploy entry point for the object
 log_name            object-server  Label used when logging
 log_facility        LOG_LOCAL0     Syslog log facility
 log_level           INFO           Logging level
-log_requests        True           Weather or not to log each request
+log_requests        True           Whether or not to log each request
 user                swift          User to run as
 node_timeout        3              Request timeout to external services
 conn_timeout        0.5            Connection timeout to external services
@ -193,7 +193,7 @@ Option              Default            Description
 log_name            object-replicator  Label used when logging
 log_facility        LOG_LOCAL0         Syslog log facility
 log_level           INFO               Logging level
-daemonize           yes                Weather or not to run replication as a
+daemonize           yes                Whether or not to run replication as a
                                       daemon
 run_pause           30                 Time in seconds to wait between 
                                       replication passes
@ -249,9 +249,9 @@ The following configuration options are available:
 Option              Default     Description
 ------------------  ----------  --------------------------------------------
 swift_dir           /etc/swift  Swift configuration directory
-devices             /srv/node   Parent irectory of where devices are mounted
-mount_check         true        Weather or not check if the devices are
-                                mounted to prevent accidently writing
+devices             /srv/node   Parent directory of where devices are mounted
+mount_check         true        Whether or not check if the devices are
+                                mounted to prevent accidentally writing
                                to the root device
 bind_ip             0.0.0.0     IP Address for server to bind to
 bind_port           6001        Port for server to bind to
@ -339,8 +339,8 @@ Option              Default     Description
 ------------------  ----------  ---------------------------------------------
 swift_dir           /etc/swift  Swift configuration directory
 devices             /srv/node   Parent directory or where devices are mounted
-mount_check         true        Weather or not check if the devices are
-                                mounted to prevent accidently writing
+mount_check         true        Whether or not check if the devices are
+                                mounted to prevent accidentally writing
                                to the root device
 bind_ip             0.0.0.0     IP Address for server to bind to
 bind_port           6002        Port for server to bind to
@ -353,7 +353,7 @@ user                swift       User to run as
 ==================  ==============  ==========================================
 Option              Default         Description
 ------------------  --------------  ------------------------------------------
-use                                 paste.deploy entry point for the account
+use                                 Entry point for paste.deploy for the account
                                    server.  For most cases, this should be
                                    `egg:swift#account`.
 log_name            account-server  Label used when logging
@ -412,6 +412,11 @@ conn_timeout        0.5              Connection timeout to external services
 Proxy Server Configuration
 --------------------------

+An example Proxy Server configuration can be found at 
+etc/proxy-server.conf-sample in the source code repository.
+
+The following configuration options are available:
+
 [DEFAULT]

 ============================  ===============  =============================
@ -432,7 +437,7 @@ key_file                                       Path to the ssl .key
 ============================  ===============  =============================
 Option                        Default          Description
 ----------------------------  ---------------  -----------------------------
-use                                            paste.deploy entry point for
+use                                            Entry point for paste.deploy for
                                               the proxy server.  For most
                                               cases, this should be
                                               `egg:swift#proxy`.
@ -443,10 +448,10 @@ log_headers                   True             If True, log headers in each
                                               request
 recheck_account_existence     60               Cache timeout in seconds to
                                               send memcached for account
-                                               existance
+                                               existence
 recheck_container_existence   60               Cache timeout in seconds to
                                               send memcached for container
-                                               existance
+                                               existence
 object_chunk_size             65536            Chunk size to read from
                                               object servers
 client_chunk_size             65536            Chunk size to read from
@ -474,7 +479,7 @@ rate_limit_account_whitelist                   Comma separated list of
                                               rate limit
 rate_limit_account_blacklist                   Comma separated list of
                                               account name hashes to block
-                                               completly
+                                               completely
 ============================  ===============  =============================

 [auth]
@ -482,7 +487,7 @@ rate_limit_account_blacklist                   Comma separated list of
 ============  ===================================  ========================
 Option        Default                              Description
 ------------  -----------------------------------  ------------------------
-use                                                paste.deploy entry point
+use                                                Entry point for paste.deploy 
                                                   to use for auth.  To
                                                   use the swift dev auth,
                                                   set to:
@ -500,7 +505,7 @@ Memcached Considerations
 ------------------------

 Several of the Services rely on Memcached for caching certain types of
-lookups, such as auth tokens, and container/account existance.  Swift does
+lookups, such as auth tokens, and container/account existence.  Swift does
 not do any caching of actual object data.  Memcached should be able to run
 on any servers that have available RAM and CPU.  At Rackspace, we run 
 Memcached on the proxy servers.  The `memcache_servers` config option
@ -526,7 +531,7 @@ Most services support either a worker or concurrency value in the settings.
 This allows the services to make effective use of the cores available. A good
 starting point to set the concurrency level for the proxy and storage services
 to 2 times the number of cores available. If more than one service is
-sharing a server, then some experimentaiton may be needed to find the best
+sharing a server, then some experimentation may be needed to find the best
 balance.

 At Rackspace, our Proxy servers have dual quad core processors, giving us 8
@ -548,7 +553,7 @@ Filesystem Considerations
 -------------------------

 Swift is designed to be mostly filesystem agnostic--the only requirement
-beeing that the filesystem supports extended attributes (xattrs). After
+being that the filesystem supports extended attributes (xattrs). After
 thorough testing with our use cases and hardware configurations, XFS was
 the best all-around choice. If you decide to use a filesystem other than
 XFS, we highly recommend thorough testing.
@ -611,5 +616,5 @@ Logging Considerations

 Swift is set up to log directly to syslog. Every service can be configured
 with the `log_facility` option to set the syslog log facility destination.
-It is recommended to use syslog-ng to route the logs to specific log
+We recommended using syslog-ng to route the logs to specific log
 files locally on the server and also to remote log collecting servers.
--- a/doc/source/development_saio.rst
+++ b/doc/source/development_saio.rst
@ -7,9 +7,7 @@ Instructions for setting up a dev VM
 ------------------------------------

 This documents setting up a virtual machine for doing Swift development. The
-virtual machine will emulate running a four node Swift cluster. It assumes
-you're using *VMware Fusion 3* on *Mac OS X Snow Leopard*, but should give a
-good idea what to do on other environments.
+virtual machine will emulate running a four node Swift cluster.

 * Get the *Ubuntu 10.04 LTS (Lucid Lynx)* server image:

@ -17,20 +15,9 @@ good idea what to do on other environments.
  - Ubuntu Live/Install: http://cdimage.ubuntu.com/releases/10.04/release/ubuntu-10.04-dvd-amd64.iso (4.1 GB)
  - Ubuntu Mirrors: https://launchpad.net/ubuntu/+cdmirrors

-* Create guest virtual machine:
-
-  #. `Continue without disc`
-  #. `Use operating system installation disc image file`, pick the .iso
-     from above.
-  #. Select `Linux` and `Ubuntu 64-bit`.
-  #. Fill in the *Linux Easy Install* details.
-  #. `Customize Settings`, name the image whatever you want 
-     (`SAIO` for instance.)
-  #. When the `Settings` window comes up, select `Hard Disk`, create an
-     extra disk (the defaults are fine).
-  #. Start the virtual machine up and wait for the easy install to
-     finish.
-
+* Create guest virtual machine from the Ubuntu image (if you are going to use
+  a separate partition for swift data, be sure to add another device when
+  creating the VM)
 * As root on guest (you'll have to log in as you, then `sudo su -`):

  #. `apt-get install python-software-properties`
@ -41,11 +28,22 @@ good idea what to do on other environments.
     python-xattr sqlite3 xfsprogs python-webob python-eventlet
     python-greenlet python-pastedeploy`
  #. Install anything else you want, like screen, ssh, vim, etc.
-  #. `fdisk /dev/sdb` (set up a single partition)
-  #. `mkfs.xfs -i size=1024 /dev/sdb1`
+  #. If you would like to use another partition for storage:
+
+    #. `fdisk /dev/sdb` (set up a single partition)
+    #. `mkfs.xfs -i size=1024 /dev/sdb1`
+    #. Edit `/etc/fstab` and add
+       `/dev/sdb1 /mnt/sdb1 xfs noatime,nodiratime,nobarrier,logbufs=8 0 0`
+
+  #. If you would like to use a loopback device instead of another partition:
+
+    #. `dd if=/dev/zero of=/srv/swift-disk bs=1024 count=0 seek=1000000` 
+       (modify seek to make a larger or smaller partition)
+    #. `mkfs.xfs -i size=1024 /srv/swift-disk`
+    #. Edit `/etc/fstab` and add
+       `/srv/swift-disk /mnt/sdb1 xfs loop,noatime,nodiratime,nobarrier,logbufs=8 0 0`
+
  #. `mkdir /mnt/sdb1`
-  #. Edit `/etc/fstab` and add
-     `/dev/sdb1 /mnt/sdb1 xfs noatime,nodiratime,nobarrier,logbufs=8 0 0`
  #. `mount /mnt/sdb1`
  #. `mkdir /mnt/sdb1/1 /mnt/sdb1/2 /mnt/sdb1/3 /mnt/sdb1/4 /mnt/sdb1/test`
  #. `chown <your-user-name>:<your-group-name> /mnt/sdb1/*`
@ -56,7 +54,7 @@ good idea what to do on other environments.
  #. Add to `/etc/rc.local` (before the `exit 0`)::

        mkdir /var/run/swift
-        chown <your-user-name>:<your-user-name> /var/run/swift
+        chown <your-user-name>:<your-group-name> /var/run/swift

  #. Create /etc/rsyncd.conf::

@ -64,7 +62,7 @@ good idea what to do on other environments.
        gid = <Your group name>
        log file = /var/log/rsyncd.log
        pid file = /var/run/rsyncd.pid
-
+        address = 127.0.0.1

        [account6012]
        max connections = 25
@ -472,6 +470,11 @@ good idea what to do on other environments.
        sudo service rsyslog restart
        sudo service memcached restart

+  .. note::
+
+    If you are using a loopback device, substitute `/dev/sdb1` above with
+    `/srv/swift-disk`
+
  #. Create `~/bin/remakerings`::

        #!/bin/bash
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@ -24,6 +24,7 @@ Overview:
    overview_reaper
    overview_auth
    overview_replication
+    overview_stats
    rate_limiting

 Development:
--- a/doc/source/overview_stats.rst
+++ b/doc/source/overview_stats.rst
@ -0,0 +1,184 @@
+==================
+Swift stats system
+==================
+
+The swift stats system is composed of three parts parts: log creation, log
+uploading, and log processing. The system handles two types of logs (access
+and account stats), but it can be extended to handle other types of logs.
+
+---------
+Log Types
+---------
+
+***********
+Access logs
+***********
+
+Access logs are the proxy server logs. Rackspace uses syslog-ng to redirect
+the proxy log output to an hourly log file. For example, a proxy request that
+is made on August 4, 2010 at 12:37 gets logged in a file named 2010080412.
+This allows easy log rotation and easy per-hour log processing.
+
+******************
+Account stats logs
+******************
+
+Account stats logs are generated by a stats system process.
+swift-account-stats-logger runs on each account server (via cron) and walks
+the filesystem looking for account databases. When an account database is
+found, the logger selects the account hash, bytes_used, container_count, and
+object_count. These values are then written out as one line in a csv file. One
+csv file is produced for every run of swift-account-stats-logger. This means
+that, system wide, one csv file is produced for every storage node. Rackspace
+runs the account stats logger every hour. Therefore, in a cluster of ten
+account servers, ten csv files are produced every hour. Also, every account
+will have one entry for every replica in the system. On average, there will be
+three copies of each account in the aggregate of all account stat csv files
+created in one system-wide run.
+
+----------------------
+Log Processing plugins
+----------------------
+
+The swift stats system is written to allow a plugin to be defined for every
+log type. Swift includes plugins for both access logs and storage stats logs.
+Each plugin is responsible for defining, in a config section, where the logs
+are stored on disk, where the logs will be stored in swift (account and
+container), the filename format of the logs on disk, the location of the
+plugin class definition, and any plugin-specific config values.
+
+The plugin class definition defines three methods. The constructor must accept
+one argument (the dict representation of the plugin's config section). The
+process method must accept an iterator, and the account, container, and object
+name of the log. The keylist_mapping accepts no parameters.
+
+-------------
+Log Uploading
+-------------
+
+swift-log-uploader accepts a config file and a plugin name. It finds the log
+files on disk according to the plugin config section and uploads them to the
+swift cluster. This means one uploader process will run on each proxy server
+node and each account server node. To not upload partially-written log files,
+the uploader will not upload files with an mtime of less than two hours ago.
+Rackspace runs this process once an hour via cron.
+
+--------------
+Log Processing
+--------------
+
+swift-log-stats-collector accepts a config file and generates a csv that is
+uploaded to swift. It loads all plugins defined in the config file, generates
+a list of all log files in swift that need to be processed, and passes an
+iterable of the log file data to the appropriate plugin's process method. The
+process method returns a dictionary of data in the log file keyed on (account,
+year, month, day, hour). The log-stats-collector process then combines all
+dictionaries from all calls to a process method into one dictionary. Key
+collisions within each (account, year, month, day, hour) dictionary are
+summed. Finally, the summed dictionary is mapped to the final csv values with
+each plugin's keylist_mapping method.
+
+The resulting csv file has one line per (account, year, month, day, hour) for
+all log files processed in that run of swift-log-stats-collector.
+
+
+================================
+Running the stats system on SAIO
+================================
+
+#. Create a swift account to use for storing stats information, and note the
+   account hash. The hash will be used in config files.
+
+#. Install syslog-ng::
+
+        sudo apt-get install syslog-ng
+
+#. Add the following to the end of `/etc/syslog-ng/syslog-ng.conf`::
+
+		# Added for swift logging
+		destination df_local1 { file("/var/log/swift/proxy.log" owner(<username>) group(<groupname>)); };
+		destination df_local1_err { file("/var/log/swift/proxy.error" owner(<username>) group(<groupname>)); };
+		destination df_local1_hourly { file("/var/log/swift/hourly/$YEAR$MONTH$DAY$HOUR" owner(<username>) group(<groupname>)); };
+		filter f_local1 { facility(local1) and level(info); };
+
+		filter f_local1_err { facility(local1) and not level(info); };
+
+		# local1.info                        -/var/log/swift/proxy.log
+		# write to local file and to remove log server
+		log {
+		        source(s_all);
+		        filter(f_local1);
+		        destination(df_local1);
+		        destination(df_local1_hourly);
+		};
+
+		# local1.error                        -/var/log/swift/proxy.error
+		# write to local file and to remove log server
+		log {
+		        source(s_all);
+		        filter(f_local1_err);
+		        destination(df_local1_err);
+		};
+
+#. Restart syslog-ng
+
+#. Create the log directories::
+
+		mkdir /var/log/swift/hourly
+		mkdir /var/log/swift/stats
+		chown -R <username>:<groupname> /var/log/swift
+
+#. Create `/etc/swift/log-processor.conf`::
+
+		[log-processor]
+		swift_account = <your-stats-account-hash>
+		user = <your-user-name>
+
+		[log-processor-access]
+		swift_account = <your-stats-account-hash>
+		container_name = log_data
+		log_dir = /var/log/swift/hourly/
+		source_filename_format = %Y%m%d%H
+		class_path = swift.stats.access_processor.AccessLogProcessor
+		user = <your-user-name>
+
+		[log-processor-stats]
+		swift_account = <your-stats-account-hash>
+		container_name = account_stats
+		log_dir = /var/log/swift/stats/
+		source_filename_format = %Y%m%d%H_*
+		class_path = swift.stats.stats_processor.StatsLogProcessor
+		account_server_conf = /etc/swift/account-server/1.conf
+		user = <your-user-name>
+
+#. Add the following under [app:proxy-server] in `/etc/swift/proxy-server.conf`::
+
+		log_facility = LOG_LOCAL1
+
+#. Create a `cron` job to run once per hour to create the stats logs. In
+   `/etc/cron.d/swift-stats-log-creator`::
+
+		0 * * * * <your-user-name> swift-account-stats-logger /etc/swift/log-processor.conf
+
+#. Create a `cron` job to run once per hour to upload the stats logs. In
+   `/etc/cron.d/swift-stats-log-uploader`::
+
+        10 * * * * <your-user-name> swift-log-uploader /etc/swift/log-processor.conf stats
+
+#. Create a `cron` job to run once per hour to upload the access logs. In
+   `/etc/cron.d/swift-access-log-uploader`::
+
+        5 * * * * <your-user-name> swift-log-uploader /etc/swift/log-processor.conf access
+
+#. Create a `cron` job to run once per hour to process the logs. In
+   `/etc/cron.d/swift-stats-processor`::
+
+        30 * * * * <your-user-name> swift-log-stats-collector /etc/swift/log-processor.conf
+
+After running for a few hours, you should start to see .csv files in the
+log_processing_data container in the swift stats account that was created
+earlier. This file will have one entry per account per hour for each account
+with activity in that hour. One .csv file should be produced per hour. Note
+that the stats will be delayed by at least two hours by default. This can be
+changed with the new_log_cutoff variable in the config file. See
+`log-processing.conf-sample` for more details.
--- a/etc/log-processing.conf-sample
+++ b/etc/log-processing.conf-sample
@ -0,0 +1,39 @@
+# plugin section format is named "log-processor-<plugin>"
+
+[log-processor]
+swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
+# container_name = log_processing_data
+# proxy_server_conf = /etc/swift/proxy-server.conf
+# log_facility = LOG_LOCAL0
+# log_level = INFO
+# lookback_hours = 120
+# lookback_window = 120
+# user = swift
+
+[log-processor-access]
+# log_dir = /var/log/swift/
+swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
+container_name = log_data
+source_filename_format = access-%Y%m%d%H
+# new_log_cutoff = 7200
+# unlink_log = True
+class_path = swift.stats.access_processor.AccessLogProcessor
+# service ips is for client ip addresses that should be counted as servicenet
+# service_ips =
+# load balancer private ips is for load balancer ip addresses that should be
+# counted as servicenet
+# lb_private_ips =
+# server_name = proxy
+# user = swift
+# warn_percent = 0.8
+
+[log-processor-stats]
+# log_dir = /var/log/swift/
+swift_account = AUTH_7abbc116-8a07-4b63-819d-02715d3e0f31
+container_name = account_stats
+source_filename_format = stats-%Y%m%d%H_*
+# new_log_cutoff = 7200
+# unlink_log = True
+class_path = swift.stats.stats_processor.StatsLogProcessor
+# account_server_conf = /etc/swift/account-server.conf
+# user = swift
--- a/setup.py
+++ b/setup.py
@ -74,7 +74,11 @@ setup(
        'bin/swift-object-server',
        'bin/swift-object-updater', 'bin/swift-proxy-server',
        'bin/swift-ring-builder', 'bin/swift-stats-populate',
-        'bin/swift-stats-report'
+        'bin/swift-stats-report',
+        'bin/swift-bench',
+        'bin/swift-log-uploader',
+        'bin/swift-log-stats-collector',
+        'bin/swift-account-stats-logger',
        ],
    entry_points={
        'paste.app_factory': [
--- a/swift/common/bench.py
+++ b/swift/common/bench.py
@ -0,0 +1,236 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import uuid
+import time
+import random
+from urlparse import urlparse
+from contextlib import contextmanager
+
+import eventlet.pools
+from eventlet.green.httplib import CannotSendRequest
+
+from swift.common.utils import TRUE_VALUES
+from swift.common import client
+from swift.common import direct_client
+
+
+class ConnectionPool(eventlet.pools.Pool):
+
+    def __init__(self, url, size):
+        self.url = url
+        eventlet.pools.Pool.__init__(self, size, size)
+
+    def create(self):
+        return client.http_connection(self.url)
+
+
+class Bench(object):
+
+    def __init__(self, logger, conf, names):
+        self.logger = logger
+        self.user = conf.user
+        self.key = conf.key
+        self.auth_url = conf.auth
+        self.use_proxy = conf.use_proxy in TRUE_VALUES
+        if self.use_proxy:
+            url, token = client.get_auth(self.auth_url, self.user, self.key)
+            self.token = token
+            self.account = url.split('/')[-1]
+            if conf.url == '':
+                self.url = url
+            else:
+                self.url = conf.url
+        else:
+            self.token = 'SlapChop!'
+            self.account = conf.account
+            self.url = conf.url
+            self.ip, self.port = self.url.split('/')[2].split(':')
+        self.container_name = conf.container_name
+
+        self.object_size = int(conf.object_size)
+        self.object_sources = conf.object_sources
+        self.files = []
+        if self.object_sources:
+            self.object_sources = self.object_sources.split()
+            self.files = [file(f, 'rb').read() for f in self.object_sources]
+
+        self.put_concurrency = int(conf.put_concurrency)
+        self.get_concurrency = int(conf.get_concurrency)
+        self.del_concurrency = int(conf.del_concurrency)
+        self.total_objects = int(conf.num_objects)
+        self.total_gets = int(conf.num_gets)
+        self.timeout = int(conf.timeout)
+        self.devices = conf.devices.split()
+        self.names = names
+        self.conn_pool = ConnectionPool(self.url,
+            max(self.put_concurrency, self.get_concurrency,
+                self.del_concurrency))
+
+    def _log_status(self, title):
+        total = time.time() - self.beginbeat
+        self.logger.info('%s %s [%s failures], %.01f/s' % (
+                self.complete, title, self.failures,
+                (float(self.complete) / total),
+            ))
+
+    @contextmanager
+    def connection(self):
+        try:
+            hc = self.conn_pool.get()
+            try:
+                yield hc
+            except CannotSendRequest:
+                self.logger.info("CannotSendRequest.  Skipping...")
+                try:
+                    hc.close()
+                except:
+                    pass
+                self.failures += 1
+                hc = self.conn_pool.create()
+        finally:
+            self.conn_pool.put(hc)
+
+    def run(self):
+        pool = eventlet.GreenPool(self.concurrency)
+        events = []
+        self.beginbeat = self.heartbeat = time.time()
+        self.heartbeat -= 13    # just to get the first report quicker
+        self.failures = 0
+        self.complete = 0
+        for i in xrange(self.total):
+            pool.spawn_n(self._run, i)
+        pool.waitall()
+        self._log_status(self.msg + ' **FINAL**')
+
+    def _run(self, thread):
+        return
+
+
+class BenchController(object):
+
+    def __init__(self, logger, conf):
+        self.logger = logger
+        self.conf = conf
+        self.names = []
+        self.delete = conf.delete in TRUE_VALUES
+        self.gets = int(conf.num_gets)
+
+    def run(self):
+        puts = BenchPUT(self.logger, self.conf, self.names)
+        puts.run()
+        if self.gets:
+            gets = BenchGET(self.logger, self.conf, self.names)
+            gets.run()
+        if self.delete:
+            dels = BenchDELETE(self.logger, self.conf, self.names)
+            dels.run()
+
+
+class BenchDELETE(Bench):
+
+    def __init__(self, logger, conf, names):
+        Bench.__init__(self, logger, conf, names)
+        self.concurrency = self.del_concurrency
+        self.total = len(names)
+        self.msg = 'DEL'
+
+    def _run(self, thread):
+        if time.time() - self.heartbeat >= 15:
+            self.heartbeat = time.time()
+            self._log_status('DEL')
+        device, partition, name = self.names.pop()
+        with self.connection() as conn:
+            try:
+                if self.use_proxy:
+                    client.delete_object(self.url, self.token,
+                        self.container_name, name, http_conn=conn)
+                else:
+                    node = {'ip': self.ip, 'port': self.port, 'device': device}
+                    direct_client.direct_delete_object(node, partition,
+                        self.account, self.container_name, name)
+            except client.ClientException, e:
+                self.logger.debug(str(e))
+                self.failures += 1
+        self.complete += 1
+
+
+class BenchGET(Bench):
+
+    def __init__(self, logger, conf, names):
+        Bench.__init__(self, logger, conf, names)
+        self.concurrency = self.get_concurrency
+        self.total = self.total_gets
+        self.msg = 'GETS'
+
+    def _run(self, thread):
+        if time.time() - self.heartbeat >= 15:
+            self.heartbeat = time.time()
+            self._log_status('GETS')
+        device, partition, name = random.choice(self.names)
+        with self.connection() as conn:
+            try:
+                if self.use_proxy:
+                    client.get_object(self.url, self.token,
+                        self.container_name, name, http_conn=conn)
+                else:
+                    node = {'ip': self.ip, 'port': self.port, 'device': device}
+                    direct_client.direct_get_object(node, partition,
+                        self.account, self.container_name, name)
+            except client.ClientException, e:
+                self.logger.debug(str(e))
+                self.failures += 1
+        self.complete += 1
+
+
+class BenchPUT(Bench):
+
+    def __init__(self, logger, conf, names):
+        Bench.__init__(self, logger, conf, names)
+        self.concurrency = self.put_concurrency
+        self.total = self.total_objects
+        self.msg = 'PUTS'
+        if self.use_proxy:
+            with self.connection() as conn:
+                client.put_container(self.url, self.token,
+                    self.container_name, http_conn=conn)
+
+    def _run(self, thread):
+        if time.time() - self.heartbeat >= 15:
+            self.heartbeat = time.time()
+            self._log_status('PUTS')
+        name = uuid.uuid4().hex
+        if self.object_sources:
+            source = random.choice(self.files)
+        else:
+            source = '0' * self.object_size
+        device = random.choice(self.devices)
+        partition = str(random.randint(1, 3000))
+        with self.connection() as conn:
+            try:
+                if self.use_proxy:
+                    client.put_object(self.url, self.token,
+                        self.container_name, name, source,
+                        content_length=len(source), http_conn=conn)
+                else:
+                    node = {'ip': self.ip, 'port': self.port, 'device': device}
+                    direct_client.direct_put_object(node, partition,
+                        self.account, self.container_name, name, source,
+                        content_length=len(source))
+            except client.ClientException, e:
+                self.logger.debug(str(e))
+                self.failures += 1
+        self.names.append((device, partition, name))
+        self.complete += 1
--- a/swift/common/client.py
+++ b/swift/common/client.py
@ -18,7 +18,7 @@ Cloud Files client library used internally
 """
 import socket
 from cStringIO import StringIO
-from httplib import HTTPConnection, HTTPException, HTTPSConnection
+from httplib import HTTPException, HTTPSConnection
 from re import compile, DOTALL
 from tokenize import generate_tokens, STRING, NAME, OP
 from urllib import quote as _quote, unquote
@ -29,6 +29,8 @@ try:
 except:
    from time import sleep

+from swift.common.bufferedhttp \
+    import BufferedHTTPConnection as HTTPConnection

 def quote(value, safe='/'):
    """
--- a/swift/common/compressing_file_reader.py
+++ b/swift/common/compressing_file_reader.py
@ -0,0 +1,73 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import zlib
+import struct
+
+
+class CompressingFileReader(object):
+    '''
+    Wraps a file object and provides a read method that returns gzip'd data.
+
+    One warning: if read is called with a small value, the data returned may
+    be bigger than the value. In this case, the "compressed" data will be
+    bigger than the original data. To solve this, use a bigger read buffer.
+
+    An example use case:
+    Given an uncompressed file on disk, provide a way to read compressed data
+    without buffering the entire file data in memory. Using this class, an
+    uncompressed log file could be uploaded as compressed data with chunked
+    transfer encoding.
+
+    gzip header and footer code taken from the python stdlib gzip module
+
+    :param file_obj: File object to read from
+    :param compresslevel: compression level
+    '''
+
+    def __init__(self, file_obj, compresslevel=9):
+        self._f = file_obj
+        self._compressor = zlib.compressobj(compresslevel,
+                                            zlib.DEFLATED,
+                                            -zlib.MAX_WBITS,
+                                            zlib.DEF_MEM_LEVEL,
+                                            0)
+        self.done = False
+        self.first = True
+        self.crc32 = 0
+        self.total_size = 0
+
+    def read(self, *a, **kw):
+        if self.done:
+            return ''
+        x = self._f.read(*a, **kw)
+        if x:
+            self.crc32 = zlib.crc32(x, self.crc32) & 0xffffffffL
+            self.total_size += len(x)
+            compressed = self._compressor.compress(x)
+            if not compressed:
+                compressed = self._compressor.flush(zlib.Z_SYNC_FLUSH)
+        else:
+            compressed = self._compressor.flush(zlib.Z_FINISH)
+            crc32 = struct.pack("<L", self.crc32 & 0xffffffffL)
+            size = struct.pack("<L", self.total_size & 0xffffffffL)
+            footer = crc32 + size
+            compressed += footer
+            self.done = True
+        if self.first:
+            self.first = False
+            header = '\037\213\010\000\000\000\000\000\002\377'
+            compressed = header + compressed
+        return compressed
--- a/swift/common/daemon.py
+++ b/swift/common/daemon.py
@ -34,12 +34,15 @@ class Daemon(object):
        """Override this to run forever"""
        raise NotImplementedError('run_forever not implemented')

-    def run(self, once=False):
+    def run(self, once=False, capture_stdout=True, capture_stderr=True):
        """Run the daemon"""
        # log uncaught exceptions
        sys.excepthook = lambda *exc_info: \
            self.logger.critical('UNCAUGHT EXCEPTION', exc_info=exc_info)
-        sys.stdout = sys.stderr = utils.LoggerFileObject(self.logger)
+        if capture_stdout:
+            sys.stdout = utils.LoggerFileObject(self.logger)
+        if capture_stderr:
+            sys.stderr = utils.LoggerFileObject(self.logger)

        utils.drop_privileges(self.conf.get('user', 'swift'))

--- a/swift/common/direct_client.py
+++ b/swift/common/direct_client.py
@ -230,6 +230,62 @@ def direct_get_object(node, part, account, container, obj, conn_timeout=5,
    return resp_headers, object_body


+def direct_put_object(node, part, account, container, name, contents,
+                      content_length=None, etag=None, content_type=None,
+                      headers=None, conn_timeout=5, response_timeout=15,
+                      resp_chunk_size=None):
+    """
+    Put object directly from the object server.
+
+    :param node: node dictionary from the ring
+    :param part: partition the container is on
+    :param account: account name
+    :param container: container name
+    :param name: object name
+    :param contents: a string to read object data from
+    :param content_length: value to send as content-length header
+    :param etag: etag of contents
+    :param content_type: value to send as content-type header
+    :param headers: additional headers to include in the request
+    :param conn_timeout: timeout in seconds for establishing the connection
+    :param response_timeout: timeout in seconds for getting the response
+    :param chunk_size: if defined, chunk size of data to send.
+    :returns: etag from the server response
+    """
+    # TODO: Add chunked puts
+    path = '/%s/%s/%s' % (account, container, name)
+    if headers is None:
+        headers = {}
+    if etag:
+        headers['ETag'] = etag.strip('"')
+    if content_length is not None:
+        headers['Content-Length'] = str(content_length)
+    if content_type is not None:
+        headers['Content-Type'] = content_type
+    else:
+        headers['Content-Type'] = 'application/octet-stream'
+    if not contents:
+        headers['Content-Length'] = '0'
+    headers['X-Timestamp'] = normalize_timestamp(time())
+    with Timeout(conn_timeout):
+        conn = http_connect(node['ip'], node['port'], node['device'], part,
+                'PUT', path, headers=headers)
+    conn.send(contents)
+    with Timeout(response_timeout):
+        resp = conn.getresponse()
+        resp.read()
+    if resp.status < 200 or resp.status >= 300:
+        raise ClientException(
+                'Object server %s:%s direct PUT %s gave status %s' %
+                (node['ip'], node['port'],
+                repr('/%s/%s%s' % (node['device'], part, path)),
+                resp.status),
+                http_host=node['ip'], http_port=node['port'],
+                http_device=node['device'], http_status=resp.status,
+                http_reason=resp.reason)
+    return resp.getheader('etag').strip('"')
+
+
 def direct_delete_object(node, part, account, container, obj,
        conn_timeout=5, response_timeout=15, headers={}):
    """
--- a/swift/common/internal_proxy.py
+++ b/swift/common/internal_proxy.py
@ -0,0 +1,210 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import webob
+from urllib import quote, unquote
+from json import loads as json_loads
+
+from swift.common.compressing_file_reader import CompressingFileReader
+from swift.proxy.server import BaseApplication
+
+
+class MemcacheStub(object):
+
+    def get(self, *a, **kw):
+        return None
+
+    def set(self, *a, **kw):
+        return None
+
+    def incr(self, *a, **kw):
+        return 0
+
+    def delete(self, *a, **kw):
+        return None
+
+    def set_multi(self, *a, **kw):
+        return None
+
+    def get_multi(self, *a, **kw):
+        return []
+
+
+class InternalProxy(object):
+    """
+    Set up a private instance of a proxy server that allows normal requests
+    to be made without having to actually send the request to the proxy.
+    This also doesn't log the requests to the normal proxy logs.
+
+    :param proxy_server_conf: proxy server configuration dictionary
+    :param logger: logger to log requests to
+    :param retries: number of times to retry each request
+    """
+
+    def __init__(self, proxy_server_conf=None, logger=None, retries=0):
+        self.upload_app = BaseApplication(proxy_server_conf,
+                                          memcache=MemcacheStub(),
+                                          logger=logger)
+        self.retries = retries
+
+    def upload_file(self, source_file, account, container, object_name,
+                    compress=True, content_type='application/x-gzip',
+                    etag=None):
+        """
+        Upload a file to cloud files.
+
+        :param source_file: path to or file like object to upload
+        :param account: account to upload to
+        :param container: container to upload to
+        :param object_name: name of object being uploaded
+        :param compress: if True, compresses object as it is uploaded
+        :param content_type: content-type of object
+        :param etag: etag for object to check successful upload
+        :returns: True if successful, False otherwise
+        """
+        target_name = '/v1/%s/%s/%s' % (account, container, object_name)
+
+        # create the container
+        if not self.create_container(account, container):
+            return False
+
+        # upload the file to the account
+        req = webob.Request.blank(target_name,
+                            environ={'REQUEST_METHOD': 'PUT'},
+                            headers={'Transfer-Encoding': 'chunked'})
+        if compress:
+            if hasattr(source_file, 'read'):
+                compressed_file = CompressingFileReader(source_file)
+            else:
+                compressed_file = CompressingFileReader(
+                                    open(source_file, 'rb'))
+            req.body_file = compressed_file
+        else:
+            if not hasattr(source_file, 'read'):
+                source_file = open(source_file, 'rb')
+            req.body_file = source_file
+        req.account = account
+        req.content_type = content_type
+        req.content_length = None   # to make sure we send chunked data
+        if etag:
+            req.etag = etag
+        resp = self.upload_app.handle_request(
+                                self.upload_app.update_request(req))
+        tries = 1
+        while (resp.status_int < 200 or resp.status_int > 299) \
+                and tries <= self.retries:
+            resp = self.upload_app.handle_request(
+                                self.upload_app.update_request(req))
+            tries += 1
+        if not (200 <= resp.status_int < 300):
+            return False
+        return True
+
+    def get_object(self, account, container, object_name):
+        """
+        Get object.
+
+        :param account: account name object is in
+        :param container: container name object is in
+        :param object_name: name of object to get
+        :returns: iterator for object data
+        """
+        req = webob.Request.blank('/v1/%s/%s/%s' %
+                            (account, container, object_name),
+                            environ={'REQUEST_METHOD': 'GET'})
+        req.account = account
+        resp = self.upload_app.handle_request(
+                                self.upload_app.update_request(req))
+        tries = 1
+        while (resp.status_int < 200 or resp.status_int > 299) \
+                and tries <= self.retries:
+            resp = self.upload_app.handle_request(
+                                self.upload_app.update_request(req))
+            tries += 1
+        return resp.status_int, resp.app_iter
+
+    def create_container(self, account, container):
+        """
+        Create container.
+
+        :param account: account name to put the container in
+        :param container: container name to create
+        :returns: True if successful, otherwise False
+        """
+        req = webob.Request.blank('/v1/%s/%s' % (account, container),
+                            environ={'REQUEST_METHOD': 'PUT'})
+        req.account = account
+        resp = self.upload_app.handle_request(
+                                self.upload_app.update_request(req))
+        tries = 1
+        while (resp.status_int < 200 or resp.status_int > 299) \
+                and tries <= self.retries:
+            resp = self.upload_app.handle_request(
+                                self.upload_app.update_request(req))
+            tries += 1
+        return 200 <= resp.status_int < 300
+
+    def get_container_list(self, account, container, marker=None, limit=None,
+                           prefix=None, delimiter=None, full_listing=True):
+        """
+        Get container listing.
+
+        :param account: account name for the container
+        :param container: container name to get the listing of
+        :param marker: marker query
+        :param limit: limit to query
+        :param prefix: prefix query
+        :param delimeter: delimeter for query
+        :param full_listing: if True, make enough requests to get all listings
+        :returns: list of objects
+        """
+        if full_listing:
+            rv = []
+            listing = self.get_container_list(account, container, marker,
+                limit, prefix, delimiter, full_listing=False)
+            while listing:
+                rv.extend(listing)
+                if not delimiter:
+                    marker = listing[-1]['name']
+                else:
+                    marker = listing[-1].get('name', listing[-1].get('subdir'))
+                listing = self.get_container_list(account, container, marker,
+                    limit, prefix, delimiter, full_listing=False)
+            return rv
+        path = '/v1/%s/%s' % (account, container)
+        qs = 'format=json'
+        if marker:
+            qs += '&marker=%s' % quote(marker)
+        if limit:
+            qs += '&limit=%d' % limit
+        if prefix:
+            qs += '&prefix=%s' % quote(prefix)
+        if delimiter:
+            qs += '&delimiter=%s' % quote(delimiter)
+        path += '?%s' % qs
+        req = webob.Request.blank(path, environ={'REQUEST_METHOD': 'GET'})
+        req.account = account
+        resp = self.upload_app.handle_request(
+                                self.upload_app.update_request(req))
+        tries = 1
+        while (resp.status_int < 200 or resp.status_int > 299) \
+                and tries <= self.retries:
+            resp = self.upload_app.handle_request(
+                                self.upload_app.update_request(req))
+            tries += 1
+        if resp.status_int == 204:
+            return []
+        if 200 <= resp.status_int < 300:
+            return json_loads(resp.body)
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@ -553,30 +553,42 @@ def cache_from_env(env):
    return item_from_env(env, 'swift.cache')


-def readconf(conf, section_name, log_name=None):
+def readconf(conf, section_name=None, log_name=None, defaults=None):
    """
    Read config file and return config items as a dict

    :param conf: path to config file
-    :param section_name: config section to read
+    :param section_name: config section to read (will return all sections if
+                     not defined)
    :param log_name: name to be used with logging (will use section_name if
                     not defined)
+    :param defaults: dict of default values to pre-populate the config with
    :returns: dict of config items
    """
-    c = ConfigParser()
+    if defaults is None:
+        defaults = {}
+    c = ConfigParser(defaults)
    if not c.read(conf):
        print "Unable to read config file %s" % conf
        sys.exit(1)
-    if c.has_section(section_name):
-        conf = dict(c.items(section_name))
-    else:
-        print "Unable to find %s config section in %s" % (section_name, conf)
-        sys.exit(1)
-    if "log_name" not in conf:
-        if log_name is not None:
-            conf['log_name'] = log_name
+    if section_name:
+        if c.has_section(section_name):
+            conf = dict(c.items(section_name))
        else:
-            conf['log_name'] = section_name
+            print "Unable to find %s config section in %s" % (section_name,
+                                                              conf)
+            sys.exit(1)
+        if "log_name" not in conf:
+            if log_name is not None:
+                conf['log_name'] = log_name
+            else:
+                conf['log_name'] = section_name
+    else:
+        conf = {}
+        for s in c.sections():
+            conf.update({s: dict(c.items(s))})
+        if 'log_name' not in conf:
+            conf['log_name'] = log_name
    return conf


--- a/swift/common/wsgi.py
+++ b/swift/common/wsgi.py
@ -95,7 +95,7 @@ def run_wsgi(conf_file, app_section, *args, **kwargs):   # pragma: no cover
    retry_until = time.time() + 30
    while not sock and time.time() < retry_until:
        try:
-            sock = listen(bind_addr)
+            sock = listen(bind_addr, backlog=int(conf.get('backlog', 4096)))
            if 'cert_file' in conf:
                sock = ssl.wrap_socket(sock, certfile=conf['cert_file'],
                    keyfile=conf['key_file'])
--- a/swift/stats/init.py
+++ b/swift/stats/init.py
--- a/swift/stats/access_processor.py
+++ b/swift/stats/access_processor.py
@ -0,0 +1,239 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+from urllib import unquote
+import copy
+
+from swift.common.utils import split_path, get_logger
+
+month_map = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
+
+
+class AccessLogProcessor(object):
+    """Transform proxy server access logs"""
+
+    def __init__(self, conf):
+        self.server_name = conf.get('server_name', 'proxy')
+        self.lb_private_ips = [x.strip() for x in \
+                               conf.get('lb_private_ips', '').split(',')\
+                               if x.strip()]
+        self.service_ips = [x.strip() for x in \
+                            conf.get('service_ips', '').split(',')\
+                            if x.strip()]
+        self.warn_percent = float(conf.get('warn_percent', '0.8'))
+        self.logger = get_logger(conf)
+
+    def log_line_parser(self, raw_log):
+        '''given a raw access log line, return a dict of the good parts'''
+        d = {}
+        try:
+            (_,
+            server,
+            client_ip,
+            lb_ip,
+            timestamp,
+            method,
+            request,
+            http_version,
+            code,
+            referrer,
+            user_agent,
+            auth_token,
+            bytes_in,
+            bytes_out,
+            etag,
+            trans_id,
+            headers,
+            processing_time) = (unquote(x) for x in raw_log[16:].split(' '))
+        except ValueError:
+            self.logger.debug('Bad line data: %s' % repr(raw_log))
+            return {}
+        if server != self.server_name:
+            # incorrect server name in log line
+            self.logger.debug('Bad server name: found "%s" expected "%s"' \
+                               % (server, self.server_name))
+            return {}
+        (version,
+        account,
+        container_name,
+        object_name) = split_path(request, 2, 4, True)
+        if container_name is not None:
+            container_name = container_name.split('?', 1)[0]
+        if object_name is not None:
+            object_name = object_name.split('?', 1)[0]
+        account = account.split('?', 1)[0]
+        query = None
+        if '?' in request:
+            request, query = request.split('?', 1)
+            args = query.split('&')
+            # Count each query argument. This is used later to aggregate
+            # the number of format, prefix, etc. queries.
+            for q in args:
+                if '=' in q:
+                    k, v = q.split('=', 1)
+                else:
+                    k = q
+                # Certain keys will get summmed in stats reporting
+                # (format, path, delimiter, etc.). Save a "1" here
+                # to indicate that this request is 1 request for
+                # its respective key.
+                d[k] = 1
+        d['client_ip'] = client_ip
+        d['lb_ip'] = lb_ip
+        d['method'] = method
+        d['request'] = request
+        if query:
+            d['query'] = query
+        d['http_version'] = http_version
+        d['code'] = code
+        d['referrer'] = referrer
+        d['user_agent'] = user_agent
+        d['auth_token'] = auth_token
+        d['bytes_in'] = bytes_in
+        d['bytes_out'] = bytes_out
+        d['etag'] = etag
+        d['trans_id'] = trans_id
+        d['processing_time'] = processing_time
+        day, month, year, hour, minute, second = timestamp.split('/')
+        d['day'] = day
+        month = ('%02s' % month_map.index(month)).replace(' ', '0')
+        d['month'] = month
+        d['year'] = year
+        d['hour'] = hour
+        d['minute'] = minute
+        d['second'] = second
+        d['tz'] = '+0000'
+        d['account'] = account
+        d['container_name'] = container_name
+        d['object_name'] = object_name
+        d['bytes_out'] = int(d['bytes_out'].replace('-', '0'))
+        d['bytes_in'] = int(d['bytes_in'].replace('-', '0'))
+        d['code'] = int(d['code'])
+        return d
+
+    def process(self, obj_stream, account, container, object_name):
+        '''generate hourly groupings of data from one access log file'''
+        hourly_aggr_info = {}
+        total_lines = 0
+        bad_lines = 0
+        for line in obj_stream:
+            line_data = self.log_line_parser(line)
+            total_lines += 1
+            if not line_data:
+                bad_lines += 1
+                continue
+            account = line_data['account']
+            container_name = line_data['container_name']
+            year = line_data['year']
+            month = line_data['month']
+            day = line_data['day']
+            hour = line_data['hour']
+            bytes_out = line_data['bytes_out']
+            bytes_in = line_data['bytes_in']
+            method = line_data['method']
+            code = int(line_data['code'])
+            object_name = line_data['object_name']
+            client_ip = line_data['client_ip']
+
+            op_level = None
+            if not container_name:
+                op_level = 'account'
+            elif container_name and not object_name:
+                op_level = 'container'
+            elif object_name:
+                op_level = 'object'
+
+            aggr_key = (account, year, month, day, hour)
+            d = hourly_aggr_info.get(aggr_key, {})
+            if line_data['lb_ip'] in self.lb_private_ips:
+                source = 'service'
+            else:
+                source = 'public'
+
+            if line_data['client_ip'] in self.service_ips:
+                source = 'service'
+
+            d[(source, 'bytes_out')] = d.setdefault((
+                source, 'bytes_out'), 0) + bytes_out
+            d[(source, 'bytes_in')] = d.setdefault((source, 'bytes_in'), 0) + \
+                                      bytes_in
+
+            d['format_query'] = d.setdefault('format_query', 0) + \
+                                line_data.get('format', 0)
+            d['marker_query'] = d.setdefault('marker_query', 0) + \
+                                line_data.get('marker', 0)
+            d['prefix_query'] = d.setdefault('prefix_query', 0) + \
+                                line_data.get('prefix', 0)
+            d['delimiter_query'] = d.setdefault('delimiter_query', 0) + \
+                                   line_data.get('delimiter', 0)
+            path = line_data.get('path', 0)
+            d['path_query'] = d.setdefault('path_query', 0) + path
+
+            code = '%dxx' % (code / 100)
+            key = (source, op_level, method, code)
+            d[key] = d.setdefault(key, 0) + 1
+
+            hourly_aggr_info[aggr_key] = d
+        if bad_lines > (total_lines * self.warn_percent):
+            name = '/'.join([account, container, object_name])
+            self.logger.warning('I found a bunch of bad lines in %s '\
+                        '(%d bad, %d total)' % (name, bad_lines, total_lines))
+        return hourly_aggr_info
+
+    def keylist_mapping(self):
+        source_keys = 'service public'.split()
+        level_keys = 'account container object'.split()
+        verb_keys = 'GET PUT POST DELETE HEAD COPY'.split()
+        code_keys = '2xx 4xx 5xx'.split()
+
+        keylist_mapping = {
+        #   <db key> : <row key> or <set of row keys>
+            'service_bw_in': ('service', 'bytes_in'),
+            'service_bw_out': ('service', 'bytes_out'),
+            'public_bw_in': ('public', 'bytes_in'),
+            'public_bw_out': ('public', 'bytes_out'),
+            'account_requests': set(),
+            'container_requests': set(),
+            'object_requests': set(),
+            'service_request': set(),
+            'public_request': set(),
+            'ops_count': set(),
+        }
+        for verb in verb_keys:
+            keylist_mapping[verb] = set()
+        for code in code_keys:
+            keylist_mapping[code] = set()
+        for source in source_keys:
+            for level in level_keys:
+                for verb in verb_keys:
+                    for code in code_keys:
+                        keylist_mapping['account_requests'].add(
+                                        (source, 'account', verb, code))
+                        keylist_mapping['container_requests'].add(
+                                        (source, 'container', verb, code))
+                        keylist_mapping['object_requests'].add(
+                                        (source, 'object', verb, code))
+                        keylist_mapping['service_request'].add(
+                                        ('service', level, verb, code))
+                        keylist_mapping['public_request'].add(
+                                        ('public', level, verb, code))
+                        keylist_mapping[verb].add(
+                                        (source, level, verb, code))
+                        keylist_mapping[code].add(
+                                        (source, level, verb, code))
+                        keylist_mapping['ops_count'].add(
+                                        (source, level, verb, code))
+        return keylist_mapping
--- a/swift/stats/account_stats.py
+++ b/swift/stats/account_stats.py
@ -0,0 +1,111 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import time
+from paste.deploy import appconfig
+import shutil
+import hashlib
+
+from swift.account.server import DATADIR as account_server_data_dir
+from swift.common.db import AccountBroker
+from swift.common.internal_proxy import InternalProxy
+from swift.common.utils import renamer, get_logger, readconf, mkdirs
+from swift.common.constraints import check_mount
+from swift.common.daemon import Daemon
+
+
+class AccountStat(Daemon):
+    """
+    Extract storage stats from account databases on the account
+    storage nodes
+    """
+
+    def __init__(self, stats_conf):
+        super(AccountStat, self).__init__(stats_conf)
+        target_dir = stats_conf.get('log_dir', '/var/log/swift')
+        account_server_conf_loc = stats_conf.get('account_server_conf',
+                                             '/etc/swift/account-server.conf')
+        server_conf = appconfig('config:%s' % account_server_conf_loc,
+                                name='account-server')
+        filename_format = stats_conf['source_filename_format']
+        if filename_format.count('*') > 1:
+            raise Exception('source filename format should have at max one *')
+        self.filename_format = filename_format
+        self.target_dir = target_dir
+        mkdirs(self.target_dir)
+        self.devices = server_conf.get('devices', '/srv/node')
+        self.mount_check = server_conf.get('mount_check', 'true').lower() in \
+                              ('true', 't', '1', 'on', 'yes', 'y')
+        self.logger = get_logger(stats_conf, 'swift-account-stats-logger')
+
+    def run_once(self):
+        self.logger.info("Gathering account stats")
+        start = time.time()
+        self.find_and_process()
+        self.logger.info("Gathering account stats complete (%0.2f minutes)" %
+            ((time.time() - start) / 60))
+
+    def find_and_process(self):
+        src_filename = time.strftime(self.filename_format)
+        working_dir = os.path.join(self.target_dir, '.stats_tmp')
+        shutil.rmtree(working_dir, ignore_errors=True)
+        mkdirs(working_dir)
+        tmp_filename = os.path.join(working_dir, src_filename)
+        hasher = hashlib.md5()
+        with open(tmp_filename, 'wb') as statfile:
+            # csv has the following columns:
+            # Account Name, Container Count, Object Count, Bytes Used
+            for device in os.listdir(self.devices):
+                if self.mount_check and not check_mount(self.devices, device):
+                    self.logger.error("Device %s is not mounted, skipping." %
+                        device)
+                    continue
+                accounts = os.path.join(self.devices,
+                                        device,
+                                        account_server_data_dir)
+                if not os.path.exists(accounts):
+                    self.logger.debug("Path %s does not exist, skipping." %
+                        accounts)
+                    continue
+                for root, dirs, files in os.walk(accounts, topdown=False):
+                    for filename in files:
+                        if filename.endswith('.db'):
+                            db_path = os.path.join(root, filename)
+                            broker = AccountBroker(db_path)
+                            if not broker.is_deleted():
+                                (account_name,
+                                _, _, _,
+                                container_count,
+                                object_count,
+                                bytes_used,
+                                _, _) = broker.get_info()
+                                line_data = '"%s",%d,%d,%d\n' % (
+                                    account_name, container_count,
+                                    object_count, bytes_used)
+                                statfile.write(line_data)
+                                hasher.update(line_data)
+        file_hash = hasher.hexdigest()
+        hash_index = src_filename.find('*')
+        if hash_index < 0:
+            # if there is no * in the target filename, the uploader probably
+            # won't work because we are crafting a filename that doesn't
+            # fit the pattern
+            src_filename = '_'.join([src_filename, file_hash])
+        else:
+            parts = src_filename[:hash_index], src_filename[hash_index + 1:]
+            src_filename = ''.join([parts[0], file_hash, parts[1]])
+        renamer(tmp_filename, os.path.join(self.target_dir, src_filename))
+        shutil.rmtree(working_dir, ignore_errors=True)
--- a/swift/stats/log_processor.py
+++ b/swift/stats/log_processor.py
@ -0,0 +1,424 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ConfigParser import ConfigParser
+import zlib
+import time
+import datetime
+import cStringIO
+import collections
+from paste.deploy import appconfig
+import multiprocessing
+import Queue
+import cPickle
+import hashlib
+
+from swift.common.internal_proxy import InternalProxy
+from swift.common.exceptions import ChunkReadTimeout
+from swift.common.utils import get_logger, readconf
+from swift.common.daemon import Daemon
+
+
+class BadFileDownload(Exception):
+    pass
+
+
+class LogProcessor(object):
+    """Load plugins, process logs"""
+
+    def __init__(self, conf, logger):
+        if isinstance(logger, tuple):
+            self.logger = get_logger(*logger)
+        else:
+            self.logger = logger
+
+        self.conf = conf
+        self._internal_proxy = None
+
+        # load the processing plugins
+        self.plugins = {}
+        plugin_prefix = 'log-processor-'
+        for section in (x for x in conf if x.startswith(plugin_prefix)):
+            plugin_name = section[len(plugin_prefix):]
+            plugin_conf = conf.get(section, {})
+            self.plugins[plugin_name] = plugin_conf
+            class_path = self.plugins[plugin_name]['class_path']
+            import_target, class_name = class_path.rsplit('.', 1)
+            module = __import__(import_target, fromlist=[import_target])
+            klass = getattr(module, class_name)
+            self.plugins[plugin_name]['instance'] = klass(plugin_conf)
+            self.logger.debug('Loaded plugin "%s"' % plugin_name)
+
+    @property
+    def internal_proxy(self):
+        if self._internal_proxy is None:
+            stats_conf = self.conf.get('log-processor', {})
+            proxy_server_conf_loc = stats_conf.get('proxy_server_conf',
+                                            '/etc/swift/proxy-server.conf')
+            proxy_server_conf = appconfig(
+                                        'config:%s' % proxy_server_conf_loc,
+                                        name='proxy-server')
+            self._internal_proxy = InternalProxy(proxy_server_conf,
+                                                 self.logger,
+                                                 retries=3)
+        else:
+            return self._internal_proxy
+
+    def process_one_file(self, plugin_name, account, container, object_name):
+        self.logger.info('Processing %s/%s/%s with plugin "%s"' % (account,
+                                                               container,
+                                                               object_name,
+                                                               plugin_name))
+        # get an iter of the object data
+        compressed = object_name.endswith('.gz')
+        stream = self.get_object_data(account, container, object_name,
+                                      compressed=compressed)
+        # look up the correct plugin and send the stream to it
+        return self.plugins[plugin_name]['instance'].process(stream,
+                                                             account,
+                                                             container,
+                                                             object_name)
+
+    def get_data_list(self, start_date=None, end_date=None,
+                      listing_filter=None):
+        total_list = []
+        for plugin_name, data in self.plugins.items():
+            account = data['swift_account']
+            container = data['container_name']
+            listing = self.get_container_listing(account,
+                                                 container,
+                                                 start_date,
+                                                 end_date)
+            for object_name in listing:
+                # The items in this list end up being passed as positional
+                # parameters to process_one_file.
+                x = (plugin_name, account, container, object_name)
+                if x not in listing_filter:
+                    total_list.append(x)
+        return total_list
+
+    def get_container_listing(self, swift_account, container_name,
+                              start_date=None, end_date=None,
+                              listing_filter=None):
+        '''
+        Get a container listing, filtered by start_date, end_date, and
+        listing_filter. Dates, if given, should be in YYYYMMDDHH format
+        '''
+        search_key = None
+        if start_date is not None:
+            date_parts = []
+            try:
+                year, start_date = start_date[:4], start_date[4:]
+                if year:
+                    date_parts.append(year)
+                    month, start_date = start_date[:2], start_date[2:]
+                    if month:
+                        date_parts.append(month)
+                        day, start_date = start_date[:2], start_date[2:]
+                        if day:
+                            date_parts.append(day)
+                            hour, start_date = start_date[:2], start_date[2:]
+                            if hour:
+                                date_parts.append(hour)
+            except IndexError:
+                pass
+            else:
+                search_key = '/'.join(date_parts)
+        end_key = None
+        if end_date is not None:
+            date_parts = []
+            try:
+                year, end_date = end_date[:4], end_date[4:]
+                if year:
+                    date_parts.append(year)
+                    month, end_date = end_date[:2], end_date[2:]
+                    if month:
+                        date_parts.append(month)
+                        day, end_date = end_date[:2], end_date[2:]
+                        if day:
+                            date_parts.append(day)
+                            hour, end_date = end_date[:2], end_date[2:]
+                            if hour:
+                                date_parts.append(hour)
+            except IndexError:
+                pass
+            else:
+                end_key = '/'.join(date_parts)
+        container_listing = self.internal_proxy.get_container_list(
+                                    swift_account,
+                                    container_name,
+                                    marker=search_key)
+        results = []
+        if container_listing is not None:
+            if listing_filter is None:
+                listing_filter = set()
+            for item in container_listing:
+                name = item['name']
+                if end_key and name > end_key:
+                    break
+                if name not in listing_filter:
+                    results.append(name)
+        return results
+
+    def get_object_data(self, swift_account, container_name, object_name,
+                        compressed=False):
+        '''reads an object and yields its lines'''
+        code, o = self.internal_proxy.get_object(swift_account,
+                                           container_name,
+                                           object_name)
+        if code < 200 or code >= 300:
+            return
+        last_part = ''
+        last_compressed_part = ''
+        # magic in the following zlib.decompressobj argument is courtesy of
+        # Python decompressing gzip chunk-by-chunk
+        # http://stackoverflow.com/questions/2423866
+        d = zlib.decompressobj(16 + zlib.MAX_WBITS)
+        try:
+            for chunk in o:
+                if compressed:
+                    try:
+                        chunk = d.decompress(chunk)
+                    except zlib.error:
+                        self.logger.debug('Bad compressed data for %s/%s/%s' %
+                                                                (swift_account,
+                                                                container_name,
+                                                                object_name))
+                        raise BadFileDownload()  # bad compressed data
+                parts = chunk.split('\n')
+                parts[0] = last_part + parts[0]
+                for part in parts[:-1]:
+                    yield part
+                last_part = parts[-1]
+            if last_part:
+                yield last_part
+        except ChunkReadTimeout:
+            raise BadFileDownload()
+
+    def generate_keylist_mapping(self):
+        keylist = {}
+        for plugin in self.plugins:
+            plugin_keylist = self.plugins[plugin]['instance'].keylist_mapping()
+            if not plugin_keylist:
+                continue
+            for k, v in plugin_keylist.items():
+                o = keylist.get(k)
+                if o:
+                    if isinstance(o, set):
+                        if isinstance(v, set):
+                            o.update(v)
+                        else:
+                            o.update([v])
+                    else:
+                        o = set(o)
+                        if isinstance(v, set):
+                            o.update(v)
+                        else:
+                            o.update([v])
+                else:
+                    o = v
+                keylist[k] = o
+        return keylist
+
+
+class LogProcessorDaemon(Daemon):
+    """
+    Gather raw log data and farm proccessing to generate a csv that is
+    uploaded to swift.
+    """
+
+    def __init__(self, conf):
+        c = conf.get('log-processor')
+        super(LogProcessorDaemon, self).__init__(c)
+        self.total_conf = conf
+        self.logger = get_logger(c)
+        self.log_processor = LogProcessor(conf, self.logger)
+        self.lookback_hours = int(c.get('lookback_hours', '120'))
+        self.lookback_window = int(c.get('lookback_window',
+                                   str(self.lookback_hours)))
+        self.log_processor_account = c['swift_account']
+        self.log_processor_container = c.get('container_name',
+                                             'log_processing_data')
+        self.worker_count = int(c.get('worker_count', '1'))
+
+    def run_once(self):
+        self.logger.info("Beginning log processing")
+        start = time.time()
+        if self.lookback_hours == 0:
+            lookback_start = None
+            lookback_end = None
+        else:
+            delta_hours = datetime.timedelta(hours=self.lookback_hours)
+            lookback_start = datetime.datetime.now() - delta_hours
+            lookback_start = lookback_start.strftime('%Y%m%d%H')
+            if self.lookback_window == 0:
+                lookback_end = None
+            else:
+                delta_window = datetime.timedelta(hours=self.lookback_window)
+                lookback_end = datetime.datetime.now() - \
+                               delta_hours + \
+                               delta_window
+                lookback_end = lookback_end.strftime('%Y%m%d%H')
+        self.logger.debug('lookback_start: %s' % lookback_start)
+        self.logger.debug('lookback_end: %s' % lookback_end)
+        try:
+            # Note: this file (or data set) will grow without bound.
+            # In practice, if it becomes a problem (say, after many months of
+            # running), one could manually prune the file to remove older
+            # entries. Automatically pruning on each run could be dangerous.
+            # There is not a good way to determine when an old entry should be
+            # pruned (lookback_hours could be set to anything and could change)
+            processed_files_stream = self.log_processor.get_object_data(
+                                        self.log_processor_account,
+                                        self.log_processor_container,
+                                        'processed_files.pickle.gz',
+                                        compressed=True)
+            buf = '\n'.join(x for x in processed_files_stream)
+            if buf:
+                already_processed_files = cPickle.loads(buf)
+            else:
+                already_processed_files = set()
+        except:
+            already_processed_files = set()
+        self.logger.debug('found %d processed files' % \
+                          len(already_processed_files))
+        logs_to_process = self.log_processor.get_data_list(lookback_start,
+                                                       lookback_end,
+                                                       already_processed_files)
+        self.logger.info('loaded %d files to process' % len(logs_to_process))
+        if not logs_to_process:
+            self.logger.info("Log processing done (%0.2f minutes)" %
+                        ((time.time() - start) / 60))
+            return
+
+        # map
+        processor_args = (self.total_conf, self.logger)
+        results = multiprocess_collate(processor_args, logs_to_process,
+                                       self.worker_count)
+
+        #reduce
+        aggr_data = {}
+        processed_files = already_processed_files
+        for item, data in results:
+            # since item contains the plugin and the log name, new plugins will
+            # "reprocess" the file and the results will be in the final csv.
+            processed_files.add(item)
+            for k, d in data.items():
+                existing_data = aggr_data.get(k, {})
+                for i, j in d.items():
+                    current = existing_data.get(i, 0)
+                    # merging strategy for key collisions is addition
+                    # processing plugins need to realize this
+                    existing_data[i] = current + j
+                aggr_data[k] = existing_data
+
+        # group
+        # reduce a large number of keys in aggr_data[k] to a small number of
+        # output keys
+        keylist_mapping = self.log_processor.generate_keylist_mapping()
+        final_info = collections.defaultdict(dict)
+        for account, data in aggr_data.items():
+            for key, mapping in keylist_mapping.items():
+                if isinstance(mapping, (list, set)):
+                    value = 0
+                    for k in mapping:
+                        try:
+                            value += data[k]
+                        except KeyError:
+                            pass
+                else:
+                    try:
+                        value = data[mapping]
+                    except KeyError:
+                        value = 0
+                final_info[account][key] = value
+
+        # output
+        sorted_keylist_mapping = sorted(keylist_mapping)
+        columns = 'data_ts,account,' + ','.join(sorted_keylist_mapping)
+        out_buf = [columns]
+        for (account, year, month, day, hour), d in final_info.items():
+            data_ts = '%s/%s/%s %s:00:00' % (year, month, day, hour)
+            row = [data_ts]
+            row.append('%s' % account)
+            for k in sorted_keylist_mapping:
+                row.append('%s' % d[k])
+            out_buf.append(','.join(row))
+        out_buf = '\n'.join(out_buf)
+        h = hashlib.md5(out_buf).hexdigest()
+        upload_name = time.strftime('%Y/%m/%d/%H/') + '%s.csv.gz' % h
+        f = cStringIO.StringIO(out_buf)
+        self.log_processor.internal_proxy.upload_file(f,
+                                        self.log_processor_account,
+                                        self.log_processor_container,
+                                        upload_name)
+
+        # cleanup
+        s = cPickle.dumps(processed_files, cPickle.HIGHEST_PROTOCOL)
+        f = cStringIO.StringIO(s)
+        self.log_processor.internal_proxy.upload_file(f,
+                                        self.log_processor_account,
+                                        self.log_processor_container,
+                                        'processed_files.pickle.gz')
+
+        self.logger.info("Log processing done (%0.2f minutes)" %
+                        ((time.time() - start) / 60))
+
+
+def multiprocess_collate(processor_args, logs_to_process, worker_count):
+    '''yield hourly data from logs_to_process'''
+    results = []
+    in_queue = multiprocessing.Queue()
+    out_queue = multiprocessing.Queue()
+    for _ in range(worker_count):
+        p = multiprocessing.Process(target=collate_worker,
+                                    args=(processor_args,
+                                          in_queue,
+                                          out_queue))
+        p.start()
+        results.append(p)
+    for x in logs_to_process:
+        in_queue.put(x)
+    for _ in range(worker_count):
+        in_queue.put(None)
+    count = 0
+    while True:
+        try:
+            item, data = out_queue.get_nowait()
+            count += 1
+            if data:
+                yield item, data
+            if count >= len(logs_to_process):
+                # this implies that one result will come from every request
+                break
+        except Queue.Empty:
+            time.sleep(.1)
+    for r in results:
+        r.join()
+
+
+def collate_worker(processor_args, in_queue, out_queue):
+    '''worker process for multiprocess_collate'''
+    p = LogProcessor(*processor_args)
+    while True:
+        try:
+            item = in_queue.get_nowait()
+            if item is None:
+                break
+        except Queue.Empty:
+            time.sleep(.1)
+        else:
+            ret = p.process_one_file(*item)
+            out_queue.put((item, ret))
--- a/swift/stats/log_uploader.py
+++ b/swift/stats/log_uploader.py
@ -0,0 +1,170 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import with_statement
+import os
+import hashlib
+import time
+import gzip
+import glob
+from paste.deploy import appconfig
+
+from swift.common.internal_proxy import InternalProxy
+from swift.common.daemon import Daemon
+from swift.common import utils
+
+
+class LogUploader(Daemon):
+    '''
+    Given a local directory, a swift account, and a container name, LogParser
+    will upload all files in the local directory to the given account/
+    container.  All but the newest files will be uploaded, and the files' md5
+    sum will be computed. The hash is used to prevent duplicate data from
+    being uploaded multiple times in different files (ex: log lines). Since
+    the hash is computed, it is also used as the uploaded object's etag to
+    ensure data integrity.
+
+    Note that after the file is successfully uploaded, it will be unlinked.
+
+    The given proxy server config is used to instantiate a proxy server for
+    the object uploads.
+    '''
+
+    def __init__(self, uploader_conf, plugin_name):
+        super(LogUploader, self).__init__(uploader_conf)
+        log_dir = uploader_conf.get('log_dir', '/var/log/swift/')
+        swift_account = uploader_conf['swift_account']
+        container_name = uploader_conf['container_name']
+        source_filename_format = uploader_conf['source_filename_format']
+        proxy_server_conf_loc = uploader_conf.get('proxy_server_conf',
+                                            '/etc/swift/proxy-server.conf')
+        proxy_server_conf = appconfig('config:%s' % proxy_server_conf_loc,
+                                      name='proxy-server')
+        new_log_cutoff = int(uploader_conf.get('new_log_cutoff', '7200'))
+        unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \
+                                                    ('true', 'on', '1', 'yes')
+        self.unlink_log = unlink_log
+        self.new_log_cutoff = new_log_cutoff
+        if not log_dir.endswith('/'):
+            log_dir = log_dir + '/'
+        self.log_dir = log_dir
+        self.swift_account = swift_account
+        self.container_name = container_name
+        self.filename_format = source_filename_format
+        self.internal_proxy = InternalProxy(proxy_server_conf)
+        log_name = 'swift-log-uploader-%s' % plugin_name
+        self.logger = utils.get_logger(uploader_conf, plugin_name)
+
+    def run_once(self):
+        self.logger.info("Uploading logs")
+        start = time.time()
+        self.upload_all_logs()
+        self.logger.info("Uploading logs complete (%0.2f minutes)" %
+            ((time.time() - start) / 60))
+
+    def upload_all_logs(self):
+        i = [(self.filename_format.index(c), c) for c in '%Y %m %d %H'.split()]
+        i.sort()
+        year_offset = month_offset = day_offset = hour_offset = None
+        base_offset = len(self.log_dir)
+        for start, c in i:
+            offset = base_offset + start
+            if c == '%Y':
+                year_offset = offset, offset + 4
+                # Add in the difference between len(%Y) and the expanded
+                # version of %Y (????). This makes sure the codes after this
+                # one will align properly in the final filename.
+                base_offset += 2
+            elif c == '%m':
+                month_offset = offset, offset + 2
+            elif c == '%d':
+                day_offset = offset, offset + 2
+            elif c == '%H':
+                hour_offset = offset, offset + 2
+        if not (year_offset and month_offset and day_offset and hour_offset):
+            # don't have all the parts, can't upload anything
+            return
+        glob_pattern = self.filename_format
+        glob_pattern = glob_pattern.replace('%Y', '????', 1)
+        glob_pattern = glob_pattern.replace('%m', '??', 1)
+        glob_pattern = glob_pattern.replace('%d', '??', 1)
+        glob_pattern = glob_pattern.replace('%H', '??', 1)
+        filelist = glob.iglob(os.path.join(self.log_dir, glob_pattern))
+        current_hour = int(time.strftime('%H'))
+        today = int(time.strftime('%Y%m%d'))
+        self.internal_proxy.create_container(self.swift_account,
+                                            self.container_name)
+        for filename in filelist:
+            try:
+                # From the filename, we need to derive the year, month, day,
+                # and hour for the file. These values are used in the uploaded
+                # object's name, so they should be a reasonably accurate
+                # representation of the time for which the data in the file was
+                # collected. The file's last modified time is not a reliable
+                # representation of the data in the file. For example, an old
+                # log file (from hour A) may be uploaded or moved into the
+                # log_dir in hour Z. The file's modified time will be for hour
+                # Z, and therefore the object's name in the system will not
+                # represent the data in it.
+                # If the filename doesn't match the format, it shouldn't be
+                # uploaded.
+                year = filename[slice(*year_offset)]
+                month = filename[slice(*month_offset)]
+                day = filename[slice(*day_offset)]
+                hour = filename[slice(*hour_offset)]
+            except IndexError:
+                # unexpected filename format, move on
+                self.logger.error("Unexpected log: %s" % filename)
+                continue
+            if ((time.time() - os.stat(filename).st_mtime) <
+                                                        self.new_log_cutoff):
+                # don't process very new logs
+                self.logger.debug(
+                    "Skipping log: %s (< %d seconds old)" % (filename,
+                                                        self.new_log_cutoff))
+                continue
+            self.upload_one_log(filename, year, month, day, hour)
+
+    def upload_one_log(self, filename, year, month, day, hour):
+        if os.path.getsize(filename) == 0:
+            self.logger.debug("Log %s is 0 length, skipping" % filename)
+            return
+        self.logger.debug("Processing log: %s" % filename)
+        filehash = hashlib.md5()
+        already_compressed = True if filename.endswith('.gz') else False
+        opener = gzip.open if already_compressed else open
+        f = opener(filename, 'rb')
+        try:
+            for line in f:
+                # filter out bad lines here?
+                filehash.update(line)
+        finally:
+            f.close()
+        filehash = filehash.hexdigest()
+        # By adding a hash to the filename, we ensure that uploaded files
+        # have unique filenames and protect against uploading one file
+        # more than one time. By using md5, we get an etag for free.
+        target_filename = '/'.join([year, month, day, hour, filehash + '.gz'])
+        if self.internal_proxy.upload_file(filename,
+                                          self.swift_account,
+                                          self.container_name,
+                                          target_filename,
+                                          compress=(not already_compressed)):
+            self.logger.debug("Uploaded log %s to %s" %
+                (filename, target_filename))
+            if self.unlink_log:
+                os.unlink(filename)
+        else:
+            self.logger.error("ERROR: Upload of log %s failed!" % filename)
--- a/swift/stats/stats_processor.py
+++ b/swift/stats/stats_processor.py
@ -0,0 +1,68 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from swift.common.utils import get_logger
+
+
+class StatsLogProcessor(object):
+    """Transform account storage stat logs"""
+
+    def __init__(self, conf):
+        self.logger = get_logger(conf)
+
+    def process(self, obj_stream, account, container, object_name):
+        '''generate hourly groupings of data from one stats log file'''
+        account_totals = {}
+        year, month, day, hour, _ = object_name.split('/')
+        for line in obj_stream:
+            if not line:
+                continue
+            try:
+                (account,
+                container_count,
+                object_count,
+                bytes_used) = line.split(',')
+            except (IndexError, ValueError):
+                # bad line data
+                self.logger.debug('Bad line data: %s' % repr(line))
+                continue
+            account = account.strip('"')
+            container_count = int(container_count.strip('"'))
+            object_count = int(object_count.strip('"'))
+            bytes_used = int(bytes_used.strip('"'))
+            aggr_key = (account, year, month, day, hour)
+            d = account_totals.get(aggr_key, {})
+            d['replica_count'] = d.setdefault('replica_count', 0) + 1
+            d['container_count'] = d.setdefault('container_count', 0) + \
+                                   container_count
+            d['object_count'] = d.setdefault('object_count', 0) + \
+                                object_count
+            d['bytes_used'] = d.setdefault('bytes_used', 0) + \
+                              bytes_used
+            account_totals[aggr_key] = d
+        return account_totals
+
+    def keylist_mapping(self):
+        '''
+        returns a dictionary of final keys mapped to source keys
+        '''
+        keylist_mapping = {
+        #   <db key> : <row key> or <set of row keys>
+            'bytes_used': 'bytes_used',
+            'container_count': 'container_count',
+            'object_count': 'object_count',
+            'replica_count': 'replica_count',
+        }
+        return keylist_mapping
--- a/test/unit/account/test_replicator.py
+++ b/test/unit/account/test_replicator.py
@ -0,0 +1,32 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from swift.account import replicator
+
+
+class TestReplicator(unittest.TestCase):
+    """
+    swift.account.replicator is currently just a subclass with some class
+    variables overridden, but at least this test stub will ensure proper Python
+    syntax.
+    """
+
+    def test_placeholder(self):
+        pass
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/unit/common/middleware/test_auth.py
+++ b/test/unit/common/middleware/test_auth.py
@ -67,25 +67,33 @@ def mock_http_connect(response, headers=None, with_exc=False):
            self.headers = headers
            if self.headers is None:
                self.headers = {}
+
        def getresponse(self):
            if self.with_exc:
                raise Exception('test')
            return self
+
        def getheader(self, header):
            return self.headers[header]
+
        def read(self, amt=None):
            return ''
+
        def close(self):
            return
+
    return lambda *args, **kwargs: FakeConn(response, headers, with_exc)


 class Logger(object):
+
    def __init__(self):
        self.error_value = None
        self.exception_value = None
+
    def error(self, msg, *args, **kwargs):
        self.error_value = (msg, args, kwargs)
+
    def exception(self, msg, *args, **kwargs):
        _, exc, _ = sys.exc_info()
        self.exception_value = (msg,
@ -99,7 +107,7 @@ class FakeApp(object):

    def __call__(self, env, start_response):
        self.i_was_called = True
-        req = Request(env)
+        req = Request.blank('', environ=env)
        if 'swift.authorize' in env:
            resp = env['swift.authorize'](req)
            if resp:
@ -110,6 +118,7 @@ class FakeApp(object):
 def start_response(*args):
    pass

+
 class TestAuth(unittest.TestCase):

    def setUp(self):
@ -418,6 +427,5 @@ class TestAuth(unittest.TestCase):
        self.assert_(resp.startswith('403'), resp)


-
 if __name__ == '__main__':
    unittest.main()
--- a/test/unit/common/test_bench.py
+++ b/test/unit/common/test_bench.py
@ -0,0 +1,29 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO: Tests
+
+import unittest
+from swift.common import bench
+
+
+class TestBench(unittest.TestCase):
+
+    def test_placeholder(self):
+        pass
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/unit/common/test_compressing_file_reader.py
+++ b/test/unit/common/test_compressing_file_reader.py
@ -0,0 +1,34 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" Tests for swift.common.compressing_file_reader """
+
+import unittest
+import cStringIO
+
+from swift.common.compressing_file_reader import CompressingFileReader
+
+class TestCompressingFileReader(unittest.TestCase):
+
+    def test_read(self):
+        plain = 'obj\ndata'
+        s = cStringIO.StringIO(plain)
+        expected = '\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff\xcaO\xca\xe2JI,'\
+                   'I\x04\x00\x00\x00\xff\xff\x03\x00P(\xa8\x1f\x08\x00\x00'\
+                   '\x00'
+        x = CompressingFileReader(s)
+        compressed = ''.join(iter(lambda: x.read(), ''))
+        self.assertEquals(compressed, expected)
+        self.assertEquals(x.read(), '')
--- a/test/unit/common/test_daemon.py
+++ b/test/unit/common/test_daemon.py
@ -0,0 +1,29 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO: Tests
+
+import unittest
+from swift.common import daemon
+
+
+class TestDaemon(unittest.TestCase):
+
+    def test_placeholder(self):
+        pass
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/unit/common/test_direct_client.py
+++ b/test/unit/common/test_direct_client.py
@ -17,7 +17,10 @@

 import unittest

-class TestAuditor(unittest.TestCase):
+from swift.common import direct_client
+
+
+class TestDirectClient(unittest.TestCase):

    def test_placeholder(self):
        pass
--- a/test/unit/common/test_exceptions.py
+++ b/test/unit/common/test_exceptions.py
@ -18,7 +18,8 @@
 import unittest
 from swift.common import exceptions

-class TestAuditor(unittest.TestCase):
+
+class TestExceptions(unittest.TestCase):

    def test_placeholder(self):
        pass
--- a/test/unit/common/test_internal_proxy.py
+++ b/test/unit/common/test_internal_proxy.py
@ -0,0 +1,29 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO: Tests
+
+import unittest
+from swift.common import internal_proxy
+
+
+class TestInternalProxy(unittest.TestCase):
+
+    def test_placeholder(self):
+        pass
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@ -247,5 +247,33 @@ class TestUtils(unittest.TestCase):
        self.assert_(callable(
            utils.load_libc_function('some_not_real_function')))

+    def test_readconf(self):
+        conf = '''[section1]
+foo = bar
+
+[section2]
+log_name = yarr'''
+        f = open('/tmp/test', 'wb')
+        f.write(conf)
+        f.close()
+        result = utils.readconf('/tmp/test')
+        expected = {'log_name': None,
+                    'section1': {'foo': 'bar'},
+                    'section2': {'log_name': 'yarr'}}
+        self.assertEquals(result, expected)
+        result = utils.readconf('/tmp/test', 'section1')
+        expected = {'log_name': 'section1', 'foo': 'bar'}
+        self.assertEquals(result, expected)
+        result = utils.readconf('/tmp/test', 'section2').get('log_name')
+        expected = 'yarr'
+        self.assertEquals(result, expected)
+        result = utils.readconf('/tmp/test', 'section1', log_name='foo').get('log_name')
+        expected = 'foo'
+        self.assertEquals(result, expected)
+        result = utils.readconf('/tmp/test', 'section1', defaults={'bar': 'baz'})
+        expected = {'log_name': 'section1', 'foo': 'bar', 'bar': 'baz'}
+        self.assertEquals(result, expected)
+        os.unlink('/tmp/test')
+
 if __name__ == '__main__':
    unittest.main()
--- a/test/unit/container/test_replicator.py
+++ b/test/unit/container/test_replicator.py
@ -0,0 +1,32 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from swift.container import replicator
+
+
+class TestReplicator(unittest.TestCase):
+    """
+    swift.container.replicator is currently just a subclass with some class
+    variables overridden, but at least this test stub will ensure proper Python
+    syntax.
+    """
+
+    def test_placeholder(self):
+        pass
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/unit/stats/init.py
+++ b/test/unit/stats/init.py
--- a/test/unit/stats/test_access_processor.py
+++ b/test/unit/stats/test_access_processor.py
@ -0,0 +1,29 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO: Tests
+
+import unittest
+from swift.stats import access_processor
+
+
+class TestAccessProcessor(unittest.TestCase):
+
+    def test_placeholder(self):
+        pass
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/unit/stats/test_account_stats.py
+++ b/test/unit/stats/test_account_stats.py
@ -0,0 +1,29 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO: Tests
+
+import unittest
+from swift.stats import account_stats
+
+
+class TestAccountStats(unittest.TestCase):
+
+    def test_placeholder(self):
+        pass
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/unit/stats/test_log_processor.py
+++ b/test/unit/stats/test_log_processor.py
@ -0,0 +1,227 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+from swift.stats import log_processor
+
+class DumbLogger(object):
+    def __getattr__(self, n):
+        return self.foo
+
+    def foo(self, *a, **kw):
+        pass
+
+class DumbInternalProxy(object):
+    def get_container_list(self, account, container, marker=None):
+        n = '2010/03/14/13/obj1'
+        if marker is None or n > marker:
+            return [{'name': n}]
+        else:
+            return []
+
+    def get_object(self, account, container, object_name):
+        code = 200
+        if object_name.endswith('.gz'):
+            # same data as below, compressed with gzip -9
+            def data():
+                yield '\x1f\x8b\x08'
+                yield '\x08"\xd79L'
+                yield '\x02\x03te'
+                yield 'st\x00\xcbO'
+                yield '\xca\xe2JI,I'
+                yield '\xe4\x02\x00O\xff'
+                yield '\xa3Y\t\x00\x00\x00'
+        else:
+            def data():
+                yield 'obj\n'
+                yield 'data'
+        return code, data()
+
+class TestLogProcessor(unittest.TestCase):
+    
+    access_test_line = 'Jul  9 04:14:30 saio proxy 1.2.3.4 4.5.6.7 '\
+                    '09/Jul/2010/04/14/30 GET '\
+                    '/v1/acct/foo/bar?format=json&foo HTTP/1.0 200 - '\
+                    'curl tk4e350daf-9338-4cc6-aabb-090e49babfbd '\
+                    '6 95 - txfa431231-7f07-42fd-8fc7-7da9d8cc1f90 - 0.0262'
+    stats_test_line = 'account,1,2,3'
+    proxy_config = {'log-processor': {
+                      
+                    }
+                   }
+
+    def test_access_log_line_parser(self):
+        access_proxy_config = self.proxy_config.copy()
+        access_proxy_config.update({
+                        'log-processor-access': {
+                            'source_filename_format':'%Y%m%d%H*',
+                            'class_path':
+                                'swift.stats.access_processor.AccessLogProcessor'
+                        }})
+        p = log_processor.LogProcessor(access_proxy_config, DumbLogger())
+        result = p.plugins['access']['instance'].log_line_parser(self.access_test_line)
+        self.assertEquals(result, {'code': 200,
+           'processing_time': '0.0262',
+           'auth_token': 'tk4e350daf-9338-4cc6-aabb-090e49babfbd',
+           'month': '07',
+           'second': '30',
+           'year': '2010',
+           'query': 'format=json&foo',
+           'tz': '+0000',
+           'http_version': 'HTTP/1.0',
+           'object_name': 'bar',
+           'etag': '-',
+           'foo': 1,
+           'method': 'GET',
+           'trans_id': 'txfa431231-7f07-42fd-8fc7-7da9d8cc1f90',
+           'client_ip': '1.2.3.4',
+           'format': 1,
+           'bytes_out': 95,
+           'container_name': 'foo',
+           'day': '09',
+           'minute': '14',
+           'account': 'acct',
+           'hour': '04',
+           'referrer': '-',
+           'request': '/v1/acct/foo/bar',
+           'user_agent': 'curl',
+           'bytes_in': 6,
+           'lb_ip': '4.5.6.7'})
+
+    def test_process_one_access_file(self):
+        access_proxy_config = self.proxy_config.copy()
+        access_proxy_config.update({
+                        'log-processor-access': {
+                            'source_filename_format':'%Y%m%d%H*',
+                            'class_path':
+                                'swift.stats.access_processor.AccessLogProcessor'
+                        }})
+        p = log_processor.LogProcessor(access_proxy_config, DumbLogger())
+        def get_object_data(*a, **kw):
+            return [self.access_test_line]
+        p.get_object_data = get_object_data
+        result = p.process_one_file('access', 'a', 'c', 'o')
+        expected = {('acct', '2010', '07', '09', '04'):
+                    {('public', 'object', 'GET', '2xx'): 1,
+                    ('public', 'bytes_out'): 95,
+                    'marker_query': 0,
+                    'format_query': 1,
+                    'delimiter_query': 0,
+                    'path_query': 0,
+                    ('public', 'bytes_in'): 6,
+                    'prefix_query': 0}}
+        self.assertEquals(result, expected)
+
+    def test_get_container_listing(self):
+        p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
+        p._internal_proxy = DumbInternalProxy()
+        result = p.get_container_listing('a', 'foo')
+        expected = ['2010/03/14/13/obj1']
+        self.assertEquals(result, expected)
+        result = p.get_container_listing('a', 'foo', listing_filter=expected)
+        expected = []
+        self.assertEquals(result, expected)
+        result = p.get_container_listing('a', 'foo', start_date='2010031412',
+                                            end_date='2010031414')
+        expected = ['2010/03/14/13/obj1']
+        self.assertEquals(result, expected)
+        result = p.get_container_listing('a', 'foo', start_date='2010031414')
+        expected = []
+        self.assertEquals(result, expected)
+        result = p.get_container_listing('a', 'foo', start_date='2010031410',
+                                            end_date='2010031412')
+        expected = []
+        self.assertEquals(result, expected)
+
+    def test_get_object_data(self):
+        p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
+        p._internal_proxy = DumbInternalProxy()
+        result = list(p.get_object_data('a', 'c', 'o', False))
+        expected = ['obj','data']
+        self.assertEquals(result, expected)
+        result = list(p.get_object_data('a', 'c', 'o.gz', True))
+        self.assertEquals(result, expected)
+
+    def test_get_stat_totals(self):
+        stats_proxy_config = self.proxy_config.copy()
+        stats_proxy_config.update({
+                        'log-processor-stats': {
+                            'class_path':
+                                'swift.stats.stats_processor.StatsLogProcessor'
+                        }})
+        p = log_processor.LogProcessor(stats_proxy_config, DumbLogger())
+        p._internal_proxy = DumbInternalProxy()
+        def get_object_data(*a,**kw):
+            return [self.stats_test_line]
+        p.get_object_data = get_object_data
+        result = p.process_one_file('stats', 'a', 'c', 'y/m/d/h/o')
+        expected = {('account', 'y', 'm', 'd', 'h'):
+                    {'replica_count': 1,
+                    'object_count': 2,
+                    'container_count': 1,
+                    'bytes_used': 3}}
+        self.assertEquals(result, expected)
+
+    def test_generate_keylist_mapping(self):
+        p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
+        result = p.generate_keylist_mapping()
+        expected = {}
+        print p.plugins
+        self.assertEquals(result, expected)
+
+    def test_generate_keylist_mapping_with_dummy_plugins(self):
+        class Plugin1(object):
+            def keylist_mapping(self):
+                return {'a': 'b', 'c': 'd', 'e': ['f', 'g']}
+        class Plugin2(object):
+            def keylist_mapping(self):
+                return {'a': '1', 'e': '2', 'h': '3'}
+        p = log_processor.LogProcessor(self.proxy_config, DumbLogger())
+        p.plugins['plugin1'] = {'instance': Plugin1()}
+        p.plugins['plugin2'] = {'instance': Plugin2()}
+        result = p.generate_keylist_mapping()
+        expected = {'a': set(['b', '1']), 'c': 'd', 'e': set(['2', 'f', 'g']),
+                    'h': '3'}
+        self.assertEquals(result, expected)
+
+    def test_access_keylist_mapping_format(self):
+        proxy_config = self.proxy_config.copy()
+        proxy_config.update({
+                        'log-processor-access': {
+                            'source_filename_format':'%Y%m%d%H*',
+                            'class_path':
+                                'swift.stats.access_processor.AccessLogProcessor'
+                        }})
+        p = log_processor.LogProcessor(proxy_config, DumbLogger())
+        mapping = p.generate_keylist_mapping()
+        for k, v in mapping.items():
+            # these only work for Py2.7+
+            #self.assertIsInstance(k, str)
+            self.assertTrue(isinstance(k, str), type(k))
+
+    def test_stats_keylist_mapping_format(self):
+        proxy_config = self.proxy_config.copy()
+        proxy_config.update({
+                        'log-processor-stats': {
+                            'class_path':
+                                'swift.stats.stats_processor.StatsLogProcessor'
+                        }})
+        p = log_processor.LogProcessor(proxy_config, DumbLogger())
+        mapping = p.generate_keylist_mapping()
+        for k, v in mapping.items():
+            # these only work for Py2.7+
+            #self.assertIsInstance(k, str)
+            self.assertTrue(isinstance(k, str), type(k))
--- a/test/unit/stats/test_log_uploader.py
+++ b/test/unit/stats/test_log_uploader.py
@ -0,0 +1,29 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO: Tests
+
+import unittest
+from swift.stats import log_uploader
+
+
+class TestLogUploader(unittest.TestCase):
+
+    def test_placeholder(self):
+        pass
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/test/unit/stats/test_stats_processor.py
+++ b/test/unit/stats/test_stats_processor.py
@ -0,0 +1,29 @@
+# Copyright (c) 2010 OpenStack, LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO: Tests
+
+import unittest
+from swift.stats import stats_processor
+
+
+class TestStatsProcessor(unittest.TestCase):
+
+    def test_placeholder(self):
+        pass
+
+
+if __name__ == '__main__':
+    unittest.main()