Revert "Generate list of 404s for docs.o.o"
This reverts commit c25e91f496
.
This script parses the Apache logs and writes out a local count of the
404 data to files.openstack.org, and then exports it via
files.openstack.org.
As part of the spec [1] we're trying to remove publishing from local
volumes, in general.
Since this is not widely used, there is only one link to it, it's not
discoverable from the landing page of files.openstack.org (which just
shows the afs directory listing), it has a very long latency making it
not that useful for debugging and grepping the logs there have been no
accesses in the past 2 weeks (as far back as logs go) I propose we
remove it.
If we want to retain this, we should publish the output alongside the
docs AFS volume. That could certainly be done by distributing the
docs keytab to the host and having it write out in a similar cron job.
Another option could be to setup a keypair for remote login and keep
that as a secret in Zuul, and do the same from a periodic job
(complicated by apache logs being root only, so needs some sudo magic
or similar). Or, we could figure out an altogether better, privacy
respecting client analytics solution.
[1] https://docs.opendev.org/opendev/infra-specs/latest/specs/retire-static.html
Depends-On: https://review.opendev.org/709036
Change-Id: Iccf24a72cf82592bae8c699f9f857aa54fc74f10
This commit is contained in:
parent
a2c21661df
commit
55da1e3d06
@ -1,21 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
SOURCE_FILE=$1
|
||||
OUTPUT_DIR=$2
|
||||
INTERMEDIATE_FILE=$(mktemp)
|
||||
|
||||
# Get just the lines with 404s in them
|
||||
grep ' 404 ' $SOURCE_FILE | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' > $INTERMEDIATE_FILE
|
||||
|
||||
if [ -f "$SOURCE_FILE.1" ] ; then
|
||||
# We get roughly the last day's worth of logs by looking at the last two
|
||||
# log files.
|
||||
grep ' 404 ' $SOURCE_FILE.1 | sed -n -e 's/.*"GET \(\/.*\) HTTP\/1\.." 404 .*/\1/p' >> $INTERMEDIATE_FILE
|
||||
fi
|
||||
|
||||
# Process those 404s to count them and return sorted by count
|
||||
sort $INTERMEDIATE_FILE | uniq -c | sort -rn | grep '\(html\|\/$\)' > $OUTPUT_DIR/$(date +%F).txt
|
||||
|
||||
# cleanup
|
||||
rm $INTERMEDIATE_FILE
|
||||
find $OUTPUT_DIR -type f -name '*.txt' -mtime +30 -delete
|
@ -124,28 +124,6 @@ class openstack_project::files (
|
||||
notify => Service['httpd'],
|
||||
}
|
||||
|
||||
file {'/usr/local/bin/404s.sh':
|
||||
ensure => present,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0755',
|
||||
source => 'puppet:///modules/openstack_project/files/404s.sh',
|
||||
}
|
||||
file {'/var/www/docs-404s':
|
||||
ensure => directory,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0755',
|
||||
}
|
||||
cron {'generate_docs_404s':
|
||||
# This seems to be about half an hour after apache rotates logs.
|
||||
hour => '7',
|
||||
minute => '0',
|
||||
environment => 'PATH=/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin',
|
||||
command => '404s.sh /var/log/apache2/docs.openstack.org_access.log /var/www/docs-404s/',
|
||||
require => File['/usr/local/bin/404s.sh'],
|
||||
}
|
||||
|
||||
###########################################################
|
||||
# docs.openstack.org
|
||||
|
||||
@ -181,7 +159,6 @@ class openstack_project::files (
|
||||
before => File['/etc/ssl/certs/docs.openstack.org.pem'],
|
||||
}
|
||||
|
||||
|
||||
###########################################################
|
||||
# developer.openstack.org
|
||||
|
||||
|
@ -23,12 +23,6 @@ NameVirtualHost <%= @vhost_name %>:<%= @port %>
|
||||
Require all granted
|
||||
</Directory>
|
||||
|
||||
Alias /docs-404s /var/www/docs-404s
|
||||
<Directory "/var/www/docs-404s">
|
||||
Options +Indexes
|
||||
Require all granted
|
||||
</Directory>
|
||||
|
||||
ErrorLog /var/log/<%= scope.lookupvar("httpd::params::apache_name") %>/<%= @name %>_error.log
|
||||
LogLevel warn
|
||||
CustomLog /var/log/<%= scope.lookupvar("httpd::params::apache_name") %>/<%= @name %>_access.log combined
|
||||
|
Loading…
Reference in New Issue
Block a user