diff --git a/etc/stackalytics.conf b/etc/stackalytics.conf index 370ad532d..63b2d5dd1 100644 --- a/etc/stackalytics.conf +++ b/etc/stackalytics.conf @@ -18,7 +18,7 @@ # listen_port = 8080 # Number of days to update members -# days_to_update_members = 7 +# days_to_update_members = 30 # The address of file with corrections data # corrections_uri = https://git.openstack.org/cgit/stackforge/stackalytics/plain/etc/corrections.json diff --git a/stackalytics/processor/config.py b/stackalytics/processor/config.py index 3218f7ecb..d98bb68fc 100644 --- a/stackalytics/processor/config.py +++ b/stackalytics/processor/config.py @@ -29,7 +29,7 @@ OPTS = [ help='The address dashboard listens on'), cfg.IntOpt('listen-port', default=8080, help='The port dashboard listens on'), - cfg.IntOpt('days_to_update_members', default=7, + cfg.IntOpt('days_to_update_members', default=30, help='Number of days to update members'), cfg.StrOpt('corrections-uri', default=('https://git.openstack.org/cgit/' diff --git a/stackalytics/processor/mps.py b/stackalytics/processor/mps.py index b4be2a9e5..83d9e9ff7 100644 --- a/stackalytics/processor/mps.py +++ b/stackalytics/processor/mps.py @@ -12,6 +12,7 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +import random import re import time @@ -106,5 +107,7 @@ def log(uri, runtime_storage_inst, days_to_update_members, members_look_ahead): LOG.debug('New member: %s', member['member_id']) yield member + time.sleep(random.random() * 5) + LOG.debug('Last_member_index: %s', last_member_index) runtime_storage_inst.set_by_key('last_member_index', last_member_index) diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py index 59909db93..a81050ef0 100644 --- a/stackalytics/processor/utils.py +++ b/stackalytics/processor/utils.py @@ -17,6 +17,7 @@ import cgi import datetime import gzip import json +import random import re import time @@ -86,9 +87,20 @@ def check_email_validity(email): return False +user_agents = [ + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64) Gecko/20100101 Firefox/32.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_6) AppleWebKit/537.78.2', + 'Mozilla/5.0 (Windows NT 6.3; WOW64) Gecko/20100101 Firefox/32.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X) Chrome/37.0.2062.120', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko' +] + + def read_uri(uri): try: - fd = six.moves.urllib.request.urlopen(uri) + req = six.moves.urllib.request.Request( + url=uri, headers={'User-Agent': random.choice(user_agents)}) + fd = six.moves.urllib.request.urlopen(req) raw = fd.read() fd.close() return raw