From 8d319e7d7e6aa26b6a43c153b8309b73820d2730 Mon Sep 17 00:00:00 2001 From: Dilraj Singh Date: Wed, 17 Jan 2018 10:40:23 +0530 Subject: [PATCH] Added unit tests for better code coverage (#368) (#431) * Added unit test for server.py (#368) Added dependency on mock in requirement.txt Updated server.py to use absolute imports Changed the invocation of server.py to 'python -m app.server' in .travis.yml * Readded the showlocals option for pytest in .travis.yml * Added unit tests for ask.py (#368) * Added unit test for server.py (#368) Added dependency on mock in requirement.txt Updated server.py to use absolute imports Changed the invocation of server.py to 'python -m app.server' in .travis.yml * Added unit tests for ask.py (#368) * Added unit tests for baidu.py, bing.py, duckduckgo.py, google.py mojeek.py. parsijoo.py, quora.py, yahoo.py, youtube.py (#368) * Disabled unit tests for yandex * Added unit test for server.py (#368) Added dependency on mock in requirement.txt Updated server.py to use absolute imports Changed the invocation of server.py to 'python -m app.server' in .travis.yml * Added unit tests for ask.py (#368) * Added unit tests for baidu.py, bing.py, duckduckgo.py, google.py mojeek.py. parsijoo.py, quora.py, yahoo.py, youtube.py (#368) * Disabled unit tests for yandex * Replaced raise AssertionError calls with assert statements * Added unit tests for generalized.py and added bandit.yml (#368) --- .travis.yml | 4 +- app/server.py | 12 +-- app/test_server.py | 34 -------- bandit.yml | 1 + requirements.txt | 1 + test/__init__.py | 0 test/test_ask.py | 58 ++++++++++++++ test/test_baidu.py | 16 ++++ test/test_bing.py | 18 +++++ test/test_duckduckgo.py | 16 ++++ test/test_generalized.py | 88 +++++++++++++++++++++ test/test_google.py | 16 ++++ test/test_mojeek.py | 14 ++++ test/test_parsijoo.py | 22 ++++++ test/test_quora.py | 16 ++++ test/test_server.py | 167 +++++++++++++++++++++++++++++++++++++++ test/test_yahoo.py | 16 ++++ test/test_youtube.py | 18 +++++ 18 files changed, 475 insertions(+), 42 deletions(-) delete mode 100644 app/test_server.py create mode 100644 bandit.yml create mode 100644 test/__init__.py create mode 100644 test/test_ask.py create mode 100644 test/test_baidu.py create mode 100644 test/test_bing.py create mode 100644 test/test_duckduckgo.py create mode 100644 test/test_generalized.py create mode 100644 test/test_google.py create mode 100644 test/test_mojeek.py create mode 100644 test/test_parsijoo.py create mode 100644 test/test_quora.py create mode 100644 test/test_server.py create mode 100644 test/test_yahoo.py create mode 100644 test/test_youtube.py diff --git a/.travis.yml b/.travis.yml index a8e86dd3..a2c45cc4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,8 +11,8 @@ install: before_script: - flake8 . --count --max-complexity=15 --show-source --statistics script: - - python app/server.py > /dev/null & - - pytest --cov=./ + - python -m app.server > /dev/null & + - pytest --cov=./ - kill $(lsof -t -i:7001) after_success: - bash <(curl -s https://codecov.io/bash) diff --git a/app/server.py b/app/server.py index 12100a6a..104fc655 100644 --- a/app/server.py +++ b/app/server.py @@ -1,12 +1,13 @@ import json import os from argparse import ArgumentParser + from defusedxml.minidom import parseString from dicttoxml import dicttoxml from flask import (Flask, Response, abort, jsonify, make_response, render_template, request) -from scrapers import feed_gen, scrapers +from app.scrapers import feed_gen, scrapers DISABLE_CACHE = True # Temporarily disable the MongoDB cache if DISABLE_CACHE: @@ -27,11 +28,6 @@ def store(url, links): 'error': 'Could not parse the page due to Internal Server Error' } -parser = ArgumentParser() -help_msg = "Start the server in development mode with debug=True" -parser.add_argument("--dev", help=help_msg, action="store_true") -args = parser.parse_args() - @app.route('/') def index(): @@ -113,4 +109,8 @@ def set_header(r): if __name__ == '__main__': port = int(os.environ.get('PORT', 7001)) + parser = ArgumentParser() + help_msg = "Start the server in development mode with debug=True" + parser.add_argument("--dev", help=help_msg, action="store_true") + args = parser.parse_args() app.run(host='0.0.0.0', port=port, debug=args.dev) diff --git a/app/test_server.py b/app/test_server.py deleted file mode 100644 index b3968d01..00000000 --- a/app/test_server.py +++ /dev/null @@ -1,34 +0,0 @@ -import os - -import pytest -import requests - -from .scrapers import scrapers, small_test - -REASON = 'Do you have query-server running on http://127.0.0.1:7001 ?' -TRAVIS_CI = os.getenv('TRAVIS', False) # Running in Travis CI? - - -def test_true(): - assert True, "We have a problem!" - - -def test_small_test(): - small_test() - - -@pytest.mark.xfail(not TRAVIS_CI, reason=REASON) -def test_invalid_url_api_call(): - response = requests.get('http://localhost:7001/api/v1/search/invalid_url') - assert response.json()['Status Code'] == 404 - - -def make_engine_api_call(engine_name): - url = 'http://localhost:7001/api/v1/search/' + engine_name - assert requests.get(url).json()['Status Code'] == 400, engine_name - - -@pytest.mark.xfail(not TRAVIS_CI, reason=REASON) -def test_engine_api_calls(engine_names=None): - for engine_name in (engine_names or scrapers): - make_engine_api_call(engine_name) diff --git a/bandit.yml b/bandit.yml new file mode 100644 index 00000000..75d550c3 --- /dev/null +++ b/bandit.yml @@ -0,0 +1 @@ +skips: ['B101'] diff --git a/requirements.txt b/requirements.txt index c07af326..c71eeddd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ pytest-cov>=2.4.0 requests>=2.13.0 webencodings>=0.5 defusedxml>=0.5.0 +mock>=2.0.0 diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/test_ask.py b/test/test_ask.py new file mode 100644 index 00000000..ea466318 --- /dev/null +++ b/test/test_ask.py @@ -0,0 +1,58 @@ +from bs4 import BeautifulSoup + +from app.scrapers import Ask + + +def test_next_start(): + assert 3 == Ask().next_start(2, None) + + +def test_parse_response_for_none(): + html_text = """
+
+

No results for:

+

44754546546545545465465f4654f654654

+

Please try again.

+
+
""" + stub_soup = BeautifulSoup(html_text, 'html.parser') + resp = Ask().parse_response(stub_soup) + assert resp is None + + +def test_parse_response_with_desc(): + html_div = """
+ +

mock_desc

+
""" + stub_soup_div = BeautifulSoup(html_div, 'html.parser') + resp = Ask().parse_response(stub_soup_div) + expected_resp = [ + { + 'link': u'mock_url', + 'title': u'mock_title', + 'desc': u'mock_desc' + } + ] + assert resp == expected_resp + + +def test_parse_response_without_desc(): + html_div = """
+ +
""" + stub_soup_div = BeautifulSoup(html_div, 'html.parser') + resp = Ask().parse_response(stub_soup_div) + expected_resp = [ + { + 'link': u'mock_url', + 'title': u'mock_title' + } + ] + assert resp == expected_resp diff --git a/test/test_baidu.py b/test/test_baidu.py new file mode 100644 index 00000000..6a91ad5a --- /dev/null +++ b/test/test_baidu.py @@ -0,0 +1,16 @@ +from bs4 import BeautifulSoup + +from app.scrapers import Baidu + + +def test_parse_response(): + html_text = """

+ mock_title +

""" + dummy_soup = BeautifulSoup(html_text, 'html.parser') + resp = Baidu().parse_response(dummy_soup) + expected_resp = [{ + 'title': u'mock_title', + 'link': u'mock_url' + }] + assert resp == expected_resp diff --git a/test/test_bing.py b/test/test_bing.py new file mode 100644 index 00000000..99f3b948 --- /dev/null +++ b/test/test_bing.py @@ -0,0 +1,18 @@ +from bs4 import BeautifulSoup + +from app.scrapers import Bing + + +def test_parse_response(): + html_text = """
  • +

    mock_title

    +

    mock_desc

    +
  • """ + dummy_soup = BeautifulSoup(html_text, 'html.parser') + resp = Bing().parse_response(dummy_soup) + expected_resp = [{ + 'title': u'mock_title', + 'link': u'mock_url', + 'desc': u'mock_desc' + }] + assert resp == expected_resp diff --git a/test/test_duckduckgo.py b/test/test_duckduckgo.py new file mode 100644 index 00000000..c319e2af --- /dev/null +++ b/test/test_duckduckgo.py @@ -0,0 +1,16 @@ +from bs4 import BeautifulSoup + +from app.scrapers import DuckDuckGo + + +def test_parse_response(): + html_text = """

    + mock_title +

    """ + dummy_soup = BeautifulSoup(html_text, 'html.parser') + resp = DuckDuckGo().parse_response(dummy_soup) + expected_resp = [{ + 'title': u'mock_title', + 'link': u'mock_url' + }] + assert resp == expected_resp diff --git a/test/test_generalized.py b/test/test_generalized.py new file mode 100644 index 00000000..d0775d6d --- /dev/null +++ b/test/test_generalized.py @@ -0,0 +1,88 @@ +from mock import patch +import pytest + +from app.scrapers.generalized import Scraper + + +@patch('requests.models.Response') +@patch('app.scrapers.generalized.requests.get') +def test_get_page(mock_request_get, mock_response): + mock_request_get.return_value = mock_response + mock_response.url = "Mock Url" + response = Scraper().get_page("dummy_query") + assert response == mock_response + expected_payload = {'q': 'dummy_query', '': ''} + expected_headers = { + 'User-Agent': ( + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 ' + 'Safari/537.36' + ) + } + mock_request_get.assert_called_with( + '', headers=expected_headers, params=expected_payload) + + +def test_parse_response(): + with pytest.raises(NotImplementedError): + Scraper().parse_response(None) + + +def test_next_start(): + dummy_prev_results = ['dummy_value'] + if not Scraper().next_start(3, dummy_prev_results) == 4: + raise AssertionError() + + +@patch('app.scrapers.generalized.Scraper.parse_response') +@patch('app.scrapers.generalized.Scraper.get_page') +@patch('requests.models.Response') +def test_search(mock_resp, mock_get_page, mock_parse_resp): + mock_get_page.return_value = mock_resp + mock_resp.text = "Mock response" + expected_resp = [{ + 'title': 'mock_title', + 'link': 'mock_url' + }] + # assuming parse_response is being implemented by + # classes inheriting Scraper. Thus, returning dummy + # response instead of raising NotImplementedError + mock_parse_resp.return_value = expected_resp + resp = Scraper().search('dummy_query', 1) + assert resp == expected_resp + + +@patch('app.scrapers.generalized.Scraper.get_page') +@patch('requests.models.Response') +def test_search_parsed_response_none(mock_resp, mock_get): + mock_get.return_value = mock_resp + mock_resp.text = "Mock Response" + with patch('app.scrapers.generalized.Scraper.parse_response', + return_value=None): + resp = Scraper().search('dummy_query', 1) + assert resp == [] + + +@patch('app.scrapers.generalized.requests.get') +@patch('app.scrapers.generalized.Scraper.parse_response') +@patch('requests.models.Response') +def test_search_without_count(mock_resp, mock_parse_resp, mock_get): + mock_get.return_value = mock_resp + mock_resp.text = 'mock response' + expected_resp = [{ + 'title': 'mock_title', + 'link': 'mock_url' + }] + expected_payload = {'q': 'dummy_query'} + expected_headers = { + 'User-Agent': ( + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) ' + 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 ' + 'Safari/537.36' + ) + } + mock_parse_resp.return_value = expected_resp + resp = Scraper().search_without_count('dummy_query') + assert resp == expected_resp + mock_get.assert_called_with( + '', headers=expected_headers, params=expected_payload) diff --git a/test/test_google.py b/test/test_google.py new file mode 100644 index 00000000..6c720a25 --- /dev/null +++ b/test/test_google.py @@ -0,0 +1,16 @@ +from bs4 import BeautifulSoup + +from app.scrapers import Google + + +def test_parse_response(): + html_text = """

    + mock_title +

    """ + dummy_soup = BeautifulSoup(html_text, 'html.parser') + expected_resp = [{ + 'title': u'mock_title', + 'link': u'mock_url' + }] + resp = Google().parse_response(dummy_soup) + assert resp == expected_resp diff --git a/test/test_mojeek.py b/test/test_mojeek.py new file mode 100644 index 00000000..3c3178b2 --- /dev/null +++ b/test/test_mojeek.py @@ -0,0 +1,14 @@ +from bs4 import BeautifulSoup + +from app.scrapers import Mojeek + + +def test_parse_response(): + html_text = 'mock_title' + dummy_soup = BeautifulSoup(html_text, 'html.parser') + expected_resp = [{ + 'title': u'mock_title', + 'link': u'mock_url' + }] + resp = Mojeek().parse_response(dummy_soup) + assert resp == expected_resp diff --git a/test/test_parsijoo.py b/test/test_parsijoo.py new file mode 100644 index 00000000..e89b5387 --- /dev/null +++ b/test/test_parsijoo.py @@ -0,0 +1,22 @@ +from bs4 import BeautifulSoup + +from app.scrapers import Parsijoo + + +def test_parse_response(): + html_text = """
    + + """ + " " * 22 + """mock_title + mock_url + """ + " " * 34 + """ mock_desc + mock_similar +
    """ + dummy_soup = BeautifulSoup(html_text, 'html.parser') + expected_resp = [{ + 'title': u'mock_title', + 'link': u'mock_url', + 'desc': u'mock_desc' + }] + resp = Parsijoo().parse_response(dummy_soup) + assert resp == expected_resp diff --git a/test/test_quora.py b/test/test_quora.py new file mode 100644 index 00000000..e3a0e962 --- /dev/null +++ b/test/test_quora.py @@ -0,0 +1,16 @@ +from bs4 import BeautifulSoup + +from app.scrapers import Quora + + +def test_parse_response(): + html_text = "
    " \ + "" \ + "mock_title
    " + dummy_soup = BeautifulSoup(html_text, 'html.parser') + expected_resp = [{ + 'title': u'mock_title', + 'link': u'https://www.quora.com/mock_url' + }] + resp = Quora().parse_response(dummy_soup) + assert resp == expected_resp diff --git a/test/test_server.py b/test/test_server.py new file mode 100644 index 00000000..65541644 --- /dev/null +++ b/test/test_server.py @@ -0,0 +1,167 @@ +import json +import os + +import pytest +import requests +from defusedxml import ElementTree +from mock import patch + +from app.scrapers import small_test +from app.server import app + +REASON = 'Do you have query-server running on http://127.0.0.1:7001 ?' +TRAVIS_CI = os.getenv('TRAVIS', False) # Running in Travis CI? + + +@pytest.mark.xfail(not TRAVIS_CI, reason=REASON) +def test_small_test(): + small_test() + + +@pytest.mark.xfail(not TRAVIS_CI, reason=REASON) +def test_invalid_url_api_call(): + response = requests.get('http://localhost:7001/api/v1/search/invalid_url') + assert response.json()['Status Code'] == 404 + + +def make_engine_api_call(engine_name): + url = 'http://localhost:7001/api/v1/search/' + engine_name + assert requests.get(url).json()['Status Code'] == 400 + + +@pytest.mark.xfail(not TRAVIS_CI, reason=REASON) +def test_engine_api_calls(engine_names=None): + engines = """ask baidu bing dailymotion duckduckgo exalead google + mojeek parsijoo quora yahoo youtube""".split() + for engine_name in (engine_names or engines): + make_engine_api_call(engine_name) + + +def test_api_index(): + assert app.test_client().get('/').status_code == 200 + + +@patch('app.server.abort') +def test_api_search_invalid_qformat(mock_abort): + url = '/api/v1/search/google?query=fossasia&format=invalid' + app.test_client().get(url) + mock_abort.assert_called_with(400, 'Not Found - undefined format') + + +@patch('app.server.bad_request', return_value="Mock Response") +def test_api_search_invalid_engine(mock_bad_request): + url = '/api/v1/search/invalid?query=fossasia' + resp = app.test_client().get(url).get_data().decode('utf-8') + mock_bad_request.assert_called_with( + [404, 'Incorrect search engine', 'invalid']) + assert resp == "Mock Response" + + +@patch('app.server.bad_request', return_value="Mock Response") +def test_api_search_missing_query(mock_bad_request): + # invalid url with query parameter missing + url = '/api/v1/search/google' + resp = app.test_client().get(url).get_data().decode('utf-8') + mock_bad_request.assert_called_with( + [400, 'Not Found - missing query', 'json']) + assert resp == "Mock Response" + + +@patch('app.server.bad_request', return_value="Mock Response") +def test_api_search_for_no_response(mock_bad_request): + url = '/api/v1/search/google?query=fossasia' + with patch('app.server.lookup', return_value=None): + with patch('app.server.feed_gen', return_value=None): + resp = app.test_client().get(url).get_data().decode('utf-8') + mock_bad_request.assert_called_with([404, 'No response', + 'google:fossasia']) + assert resp == "Mock Response" + + +def test_api_search_for_cache_hit(): + url = '/api/v1/search/google?query=fossasia' + mock_result = [{'title': 'mock_title', 'link': 'mock_link'}] + with patch('app.server.lookup', return_value=mock_result): + resp = app.test_client().get(url).get_data().decode('utf-8') + assert json.loads(resp) == mock_result + + +@patch('app.server.feed_gen') +@patch('app.server.lookup') +def test_api_search_for_format(mock_lookup, mock_feed_gen): + for qformat in ['json', 'csv', 'xml']: + url = '/api/v1/search/google?query=fossasia&format=' + qformat + mock_result = [ + { + 'title': 'mock_title', + 'link': 'mock_link', + 'desc': 'mock_desc' + } + ] + mock_lookup.return_value = None + mock_feed_gen.return_value = mock_result + resp = app.test_client().get(url).get_data().decode('utf-8') + expected_resp = expected_response_for_format(qformat) + if qformat == 'json': + resp = json.loads(resp) + elif qformat == 'xml': + resp = resp.replace('\t', '').replace('\n', '') + resp = get_json_equivalent_from_xml_feed(resp) + expected_resp = get_json_equivalent_from_xml_feed(expected_resp) + elif qformat == 'csv': + resp = get_json_equivalent_from_csv_feed(resp) + expected_resp = get_json_equivalent_from_csv_feed(expected_resp) + assert expected_resp == resp + + +def expected_response_for_format(qformat): + if qformat == 'json': + return [ + {'title': 'mock_title', + 'link': 'mock_link', + 'desc': 'mock_desc'} + ] + elif qformat == 'csv': + return '"link","title","desc"\n"mock_link","mock_title","mock_desc"' + elif qformat == 'xml': + return '' \ + 'mock_descmock_link' \ + 'mock_title' + + +def get_json_equivalent_from_csv_feed(feed): + keys_feed1 = feed.split('\n')[0].split(',') + json_result = [] + for row_index, row in enumerate(feed.split('\n')): + if row_index == 0: + continue + entry = {} + for index, value in enumerate(row.split(',')): + entry[keys_feed1[index].replace('"', '')] = value.replace('"', '') + json_result.append(entry) + return json_result + + +def get_json_equivalent_from_xml_feed(feed): + def internal_iter(tree, accum): + if tree is None: + return accum + + if tree.getchildren(): + accum[tree.tag] = {} + for each in tree.getchildren(): + result = internal_iter(each, {}) + if each.tag in accum[tree.tag]: + if not isinstance(accum[tree.tag][each.tag], list): + accum[tree.tag][each.tag] = [ + accum[tree.tag][each.tag] + ] + accum[tree.tag][each.tag].append(result[each.tag]) + else: + accum[tree.tag].update(result) + else: + accum[tree.tag] = tree.text + + return accum + + return internal_iter(ElementTree.fromstring(feed), {}) diff --git a/test/test_yahoo.py b/test/test_yahoo.py new file mode 100644 index 00000000..3ef7ff85 --- /dev/null +++ b/test/test_yahoo.py @@ -0,0 +1,16 @@ +from bs4 import BeautifulSoup + +from app.scrapers import Yahoo + + +def test_parse_response(): + html_text = '

    mock_title

    ' + dummy_soup = BeautifulSoup(html_text, 'html.parser') + expected_resp = [{ + 'title': u'mock_title', + 'link': u'mock_url' + }] + resp = Yahoo().parse_response(dummy_soup) + assert resp == expected_resp diff --git a/test/test_youtube.py b/test/test_youtube.py new file mode 100644 index 00000000..c6a65609 --- /dev/null +++ b/test/test_youtube.py @@ -0,0 +1,18 @@ +from bs4 import BeautifulSoup + +from app.scrapers import Youtube + + +def test_parse_response(): + html_text = 'mock_channelmock_title' + dummy_soup = BeautifulSoup(html_text, 'html.parser') + expected_resp = [{ + 'title': u'mock_title', + 'link': u'https://www.youtube.com/watch?v=mock' + }] + resp = Youtube().parse_response(dummy_soup) + assert resp == expected_resp