From 19712edc2d7009a86482ce28767aca710a126b83 Mon Sep 17 00:00:00 2001 From: Ramy Asselin Date: Wed, 16 Dec 2015 15:49:36 -0800 Subject: [PATCH] Fix elastic-recheck query command This code doesn't work at all. Bring it back to life. Also accept inputs from a config file. Closes-Bug: #1526921 Change-Id: I8f45dc9d42f7547f9d849686739b9a641c176814 --- elastic_recheck/cmd/query.py | 76 +++++++++--------------- elastic_recheck/results.py | 16 +++-- elastic_recheck/tests/unit/test_query.py | 17 ++---- 3 files changed, 43 insertions(+), 66 deletions(-) diff --git a/elastic_recheck/cmd/query.py b/elastic_recheck/cmd/query.py index f310a054..ffaa0e98 100755 --- a/elastic_recheck/cmd/query.py +++ b/elastic_recheck/cmd/query.py @@ -15,20 +15,17 @@ # under the License. import argparse -import base64 +import ConfigParser import itertools import json -import time - -import requests import yaml -from elastic_recheck import log as logging - +import elastic_recheck.elasticRecheck as er +import elastic_recheck.log as logging +import elastic_recheck.results as er_results LOG = logging.getLogger('erquery') -ENDPOINT = 'http://logstash.openstack.org/api' DEFAULT_NUMBER_OF_DAYS = 10 DEFAULT_MAX_QUANTITY = 5 IGNORED_ATTRIBUTES = [ @@ -44,42 +41,6 @@ IGNORED_ATTRIBUTES = [ ] -def _GET(path): - r = requests.get(ENDPOINT + path) - - if r.status_code != requests.codes.ok: - LOG.info('Got HTTP %s, retrying...' % r.status_code) - # retry once - r = requests.get(ENDPOINT + path) - - try: - return r.json() - except Exception: - raise SystemExit(r.text) - - -def _encode(q): - """Encode a JSON dict for inclusion in a URL.""" - return base64.b64encode(json.dumps(q)) - - -def _unix_time_in_microseconds(): - return int(time.time() * 1000) - - -def search(q, days): - search = { - 'search': q, - 'fields': [], - 'offset': 0, - 'timeframe': str(days * 86400), - 'graphmode': 'count', - 'time': { - 'user_interval': 0}, - 'stamp': _unix_time_in_microseconds()} - return _GET('/search/%s' % _encode(search)) - - def analyze_attributes(attributes): analysis = {} for attribute, values in attributes.iteritems(): @@ -102,17 +63,21 @@ def analyze_attributes(attributes): return analysis -def query(query_file_name, days=DEFAULT_NUMBER_OF_DAYS, +def query(query_file_name, days=DEFAULT_NUMBER_OF_DAYS, es_url=er.ES_URL, quantity=DEFAULT_MAX_QUANTITY, verbose=False): + + es = er_results.SearchEngine(es_url) + with open(query_file_name) as f: query_file = yaml.load(f.read()) query = query_file['query'] - r = search(q=query, days=days) - print('total hits: %s' % r['hits']['total']) + r = es.search(query, days=days) + print('total hits: %s' % r.hits['total']) attributes = {} - for hit in r['hits']['hits']: + + for hit in r.hits['hits']: for key, value in hit['_source'].iteritems(): value_hash = json.dumps(value) attributes.setdefault(key, {}).setdefault(value_hash, 0) @@ -125,7 +90,7 @@ def query(query_file_name, days=DEFAULT_NUMBER_OF_DAYS, continue print(attribute) - for percentage, value in itertools.islice(results, None, quantity): + for percentage, value in itertools.islice(results, quantity): if isinstance(value, list): value = ' '.join(unicode(x) for x in value) print(' %d%% %s' % (percentage, value)) @@ -147,9 +112,22 @@ def main(): parser.add_argument( '--verbose', '-v', action='store_true', default=False, help='Report on additional query metadata.') + parser.add_argument('-c', '--conf', help="Elastic Recheck Configuration " + "file to use for data_source options such as " + "elastic search url, logstash url, and database uri.") args = parser.parse_args() - query(args.query_file.name, args.days, args.quantity, args.verbose) + # Start with defaults + es_url = er.ES_URL + + if args.conf: + config = ConfigParser.ConfigParser({'es_url': er.ES_URL}) + config.read(args.conf) + if config.has_section('data_source'): + es_url = config.get('data_source', 'es_url') + + query(args.query_file.name, days=args.days, quantity=args.quantity, + verbose=args.verbose, es_url=es_url) if __name__ == "__main__": diff --git a/elastic_recheck/results.py b/elastic_recheck/results.py index cf79999f..bf5fc1c9 100644 --- a/elastic_recheck/results.py +++ b/elastic_recheck/results.py @@ -32,7 +32,7 @@ class SearchEngine(object): def __init__(self, url): self._url = url - def search(self, query, size=1000, recent=False): + def search(self, query, size=1000, recent=False, days=0): """Search an elasticsearch server. `query` parameter is the complicated query structure that @@ -47,19 +47,25 @@ class SearchEngine(object): a real time query that you only care about the last hour of time. Using recent dramatically reduces the load on the ES cluster. + `days` search only the last number of days. + The returned result is a ResultSet query. """ es = pyelasticsearch.ElasticSearch(self._url) args = {'size': size} - if recent: + if recent or days: # today's index datefmt = 'logstash-%Y.%m.%d' now = datetime.datetime.utcnow() - lasthr = now - datetime.timedelta(hours=1) indexes = [now.strftime(datefmt)] - if (lasthr.strftime(datefmt) != now.strftime(datefmt)): - indexes.append(lasthr.strftime(datefmt)) + if recent: + lasthr = now - datetime.timedelta(hours=1) + if lasthr.strftime(datefmt) != now.strftime(datefmt): + indexes.append(lasthr.strftime(datefmt)) + for day in range(1, days): + lastday = now - datetime.timedelta(days=day) + indexes.append(lastday.strftime(datefmt)) args['index'] = indexes results = es.search(query, **args) diff --git a/elastic_recheck/tests/unit/test_query.py b/elastic_recheck/tests/unit/test_query.py index 040188b2..3a8eb543 100644 --- a/elastic_recheck/tests/unit/test_query.py +++ b/elastic_recheck/tests/unit/test_query.py @@ -17,19 +17,10 @@ import sys import mock from elastic_recheck.cmd import query +from elastic_recheck.results import ResultSet from elastic_recheck.tests import unit -class FakeResponse(object): - def __init__(self, response_text): - super(FakeResponse, self).__init__() - self.text = response_text - self.status_code = 200 - - def json(self): - return json.loads(self.text) - - class TestQueryCmd(unit.UnitTestCase): def setUp(self): super(TestQueryCmd, self).setUp() @@ -43,9 +34,11 @@ class TestQueryCmd(unit.UnitTestCase): def test_query(self): with open('elastic_recheck/tests/unit/logstash/1284371.analysis') as f: expected_stdout = f.read() - with mock.patch('requests.get') as mock_get: + with mock.patch('elastic_recheck.results.SearchEngine.search') as \ + mock_search: with open('elastic_recheck/tests/unit/logstash/1284371.json') as f: - mock_get.return_value = FakeResponse(f.read()) + jsonResponse = json.loads(f.read()) + mock_search.return_value = ResultSet(jsonResponse) query.query('elastic_recheck/tests/unit/queries/1284371.yaml') sys.stdout.seek(0) self.assertEqual(expected_stdout, sys.stdout.read())