diff --git a/README.md b/README.md index 62a44094..23f69d3c 100644 --- a/README.md +++ b/README.md @@ -19,14 +19,14 @@ Instal the latest stable release: pip install panoptes_aggregation ``` -Or for development or testing, you can install the development version directly from GitHub: +Upgrade and existing installation: ```bash -pip install -U git+git://github.com/zooniverse/aggregation-for-caesar.git +pip install -U panoptes_aggregation ``` -Upgrade and existing installation: +Or for development or testing, you can install the latest development version directly from GitHub: ```bash -pip install -U panoptes_aggregation +pip install -U git+https://github.com/zooniverse/aggregation-for-caesar.git ``` #### Install the Graphical User Interface (GUI) @@ -35,6 +35,11 @@ If you would like to use the GUI instead of the command line install the package pip install "panoptes_aggregation[gui]" ``` +Or for the latest development build from GitHub: +```bash +pip install -U git+https://github.com/zooniverse/aggregation-for-caesar.git#egg=panoptes-aggregation[gui] +``` + #### Anaconda build of python If your are using the anaconda version of python some of the dependencies should be installed using the `conda` package manager before installing `panoptes_aggregation`: ```bash diff --git a/Scripts.md b/Scripts.md index 659ab73c..a6802671 100644 --- a/Scripts.md +++ b/Scripts.md @@ -41,7 +41,9 @@ Use the command line tool to make configuration `yaml` files that are used to se ```bash usage: panoptes_aggregation config [-h] [-d DIR] [-v VERSION] - [-m MINOR_VERSION] [-k KEYWORDS] [-vv] + [--min_version MIN_VERSION] + [--max_version MAX_VERSION] [-k KEYWORDS] + [-vv] workflow_csv workflow_id Make configuration files for panoptes data extraction and reduction based on a @@ -60,10 +62,23 @@ Workflow ID and version numbers: workflow_id the workflow ID you would like to extract -v VERSION, --version VERSION - The major workflow version to extract - -m MINOR_VERSION, --minor_version MINOR_VERSION - The minor workflow version used to create the lookup - table for the workflow content + The workflow version to extract. If only a major + version is given (e.g. -v 3) all minor versions will + be extracted at once. If a minor version is provided + (e.g. -v 3.14) only that specific version will be + extracted. + --min_version MIN_VERSION + The minimum workflow version to extract (inclusive). + This can be provided as either a major version (e.g. + --min_version 3) or a major version with a minor + version (e.g. --min_version 3.14). If this flag is + provided the --version flag will be ignored. + --max_version MAX_VERSION + The maximum workflow version to extract (inclusive). + This can be provided as either a major version (e.g. + --max_version 3) or a major version with a minor + version (e.g. --max_version 3.14). If this flag is + provided the --version flag will be ignored. Other keywords: Additional keywords to be passed into the configuration files @@ -85,7 +100,7 @@ Other options: ### Example: Penguin Watch ```bash -panoptes_aggregation config penguin-watch-workflows.csv 6465 -v 52 -m 76 +panoptes_aggregation config penguin-watch-workflows.csv 6465 -v 52.76 ``` This creates four files: @@ -103,6 +118,7 @@ Use the command line tool to extract your data into one flat `csv` file for each ```bash usage: panoptes_aggregation extract [-h] [-d DIR] [-o OUTPUT] [-O] + [-c CPU_COUNT] [-vv] classification_csv extractor_config Extract data from panoptes classifications based on the workflow @@ -125,6 +141,10 @@ What directory and base name should be used for the extractions: Other options: -O, --order Arrange the data columns in alphabetical order before saving + -c CPU_COUNT, --cpu_count CPU_COUNT + How many cpu cores to use during extraction + -vv, --verbose increase output verbosity + ``` ### Example: Penguin Watch @@ -165,8 +185,8 @@ This creates two `csv` files (one for each extractor listed in the config file): Note: this only works for some task types, see the [documentation](https://aggregation-caesar.zooniverse.org/docs) for a full list of supported task types. ```bash -usage: panoptes_aggregation reduce [-h] [-F {first,last,all}] [-O] [-d DIR] - [-o OUTPUT] [-s] +usage: panoptes_aggregation reduce [-h] [-F {first,last,all}] [-O] + [-c CPU_COUNT] [-d DIR] [-o OUTPUT] [-s] extracted_csv reducer_config reduce data from panoptes classifications based on the extracted data @@ -192,6 +212,8 @@ Reducer options: for one subject -O, --order Arrange the data columns in alphabetical order before saving + -c CPU_COUNT, --cpu_count CPU_COUNT + How many cpu cores to use during reduction ``` ### Example: Penguin Watch diff --git a/_static/gui_config.png b/_static/gui_config.png index 493ef85a..6ca0da43 100644 Binary files a/_static/gui_config.png and b/_static/gui_config.png differ diff --git a/_static/gui_extract.png b/_static/gui_extract.png index a2e56302..211350b7 100644 Binary files a/_static/gui_extract.png and b/_static/gui_extract.png differ diff --git a/_static/gui_reducer.png b/_static/gui_reducer.png index c7501264..e19ec4a5 100644 Binary files a/_static/gui_reducer.png and b/_static/gui_reducer.png differ diff --git a/panoptes_aggregation/scripts/aggregation_parser.py b/panoptes_aggregation/scripts/aggregation_parser.py index a920a11c..cb814d91 100755 --- a/panoptes_aggregation/scripts/aggregation_parser.py +++ b/panoptes_aggregation/scripts/aggregation_parser.py @@ -33,7 +33,7 @@ def main(args=None): ) config_numbers = config_parser.add_argument_group( 'Workflow ID and version numbers', - 'Enter the workflow ID, major version number, and minor version number', + 'Enter the workflow ID with a version number or version range', gooey_options={ 'show_border': False, 'columns': 1 @@ -84,14 +84,18 @@ def main(args=None): config_numbers.add_argument( "-v", "--version", - help="The major workflow version to extract", - type=int + help="The workflow version to extract. If only a major version is given (e.g. -v 3) all minor versions will be extracted at once. If a minor version is provided (e.g. -v 3.14) only that specific version will be extracted.", + type=str ) config_numbers.add_argument( - "-m", - "--minor_version", - help="The minor workflow version used to create the lookup table for the workflow content", - type=int + "--min_version", + help="The minimum workflow version to extract (inclusive). This can be provided as either a major version (e.g. --min_version 3) or a major version with a minor version (e.g. --min_version 3.14). If this flag is provided the --version flag will be ignored.", + type=str + ) + config_numbers.add_argument( + "--max_version", + help="The maximum workflow version to extract (inclusive). This can be provided as either a major version (e.g. --max_version 3) or a major version with a minor version (e.g. --max_version 3.14). If this flag is provided the --version flag will be ignored.", + type=str ) config_keywords.add_argument( "-k", @@ -268,7 +272,8 @@ def main(args=None): args.workflow_csv, args.workflow_id, version=args.version, - minor_version=args.minor_version, + min_version=args.min_version, + max_version=args.max_version, keywords=args.keywords, output_dir=args.dir, verbose=args.verbose diff --git a/panoptes_aggregation/scripts/config_workflow_panoptes.py b/panoptes_aggregation/scripts/config_workflow_panoptes.py index d79f47da..79724961 100755 --- a/panoptes_aggregation/scripts/config_workflow_panoptes.py +++ b/panoptes_aggregation/scripts/config_workflow_panoptes.py @@ -3,6 +3,8 @@ import yaml import json import warnings +import packaging.version +import numpy as np warnings.filterwarnings("ignore", message="numpy.dtype size changed") warnings.filterwarnings("ignore", message="numpy.ufunc size changed") @@ -21,7 +23,8 @@ def config_workflow( workflow_csv, workflow_id, version=None, - minor_version=None, + min_version=None, + max_version=None, keywords={}, output_dir=None, verbose=False @@ -30,32 +33,57 @@ def config_workflow( with workflow_csv as workflow_csv_in: workflows = pandas.read_csv(workflow_csv_in, encoding='utf-8') - wdx = (workflows.workflow_id == workflow_id) - if version is None: - version = workflows[wdx].version.max() - if verbose: - warnings.warn('No major workflow version was specified, defaulting to version {0}'.format(version)) + workflows['version_parse'] = np.array([ + packaging.version.parse('{0}.{1}'.format(v, m)) + for v, m in zip(workflows.version, workflows.minor_version) + ]) - wdx &= (workflows.version == version) - if minor_version is None: - minor_version = workflows[wdx].minor_version.max() + wdx = (workflows.workflow_id == workflow_id) + if (version is None) and (min_version is None) and (max_version is None): + # no version specified, take the latest version of the workflow + version = workflows[wdx].version_parse.max() + workflow_version = str(version) if verbose: - warnings.warn('No minor workflow version was specified, defaulting to version {0}'.format(minor_version)) + warnings.warn('No workflow version was specified, defaulting to version {0}'.format(version)) + wdx &= (workflows.version_parse == version) + elif (version is None): + # either min or max version is given + workflow_version = {} + if min_version is not None: + workflow_version['min'] = min_version + min_version = packaging.version.parse(min_version) + wdx &= (workflows.version_parse >= min_version) + if max_version is not None: + workflow_version['max'] = max_version + max_version = packaging.version.parse(max_version) + wdx &= (workflows.version_parse <= max_version) + else: + # version is given + workflow_version = version + version = packaging.version.parse(version) + if version.minor == 0: + next_version = packaging.version.parse(str(version.major + 1)) + wdx &= (workflows.version_parse >= version) + wdx &= (workflows.version_parse < next_version) + else: + wdx &= (workflows.version_parse == version) - wdx &= (workflows.minor_version == minor_version) - assert (wdx.sum() > 0), 'workflow ID and workflow version combination does not exist' - assert (wdx.sum() == 1), 'workflow ID and workflow version combination is not unique' - workflow = workflows[wdx].iloc[0] + assert (wdx.sum() > 0), 'workflow ID and workflow version(s) combination does not exist' + # configure off of the latest workflow when given a range + configure_version = workflows[wdx].version_parse.max() + configure_version_loc = np.argmax(workflows[wdx].version_parse) + if (wdx.sum() > 1) and verbose: + warnings.warn('A workflow range was specified, configuration is based on {0}'.format(configure_version)) + workflow = workflows[wdx].iloc[configure_version_loc] workflow_tasks = json.loads(workflow.tasks) extractor_config = workflow_extractor_config(workflow_tasks, keywords=keywords) - workflow_version = '{0}.{1}'.format(version, minor_version) config = { 'workflow_id': workflow_id, 'workflow_version': workflow_version, 'extractor_config': extractor_config } # configure the extractors - filename = 'Extractor_config_workflow_{0}_V{1}.yaml'.format(workflow_id, workflow_version) + filename = 'Extractor_config_workflow_{0}_V{1}.yaml'.format(workflow_id, configure_version) if output_dir is not None: filename = os.path.join(output_dir, filename) with open(filename, 'w', encoding='utf-8') as stream: @@ -69,7 +97,7 @@ def config_workflow( reducer_config = { 'reducer_config': reducer } - filename = 'Reducer_config_workflow_{0}_V{1}_{2}.yaml'.format(workflow_id, workflow_version, extractor) + filename = 'Reducer_config_workflow_{0}_V{1}_{2}.yaml'.format(workflow_id, configure_version, extractor) if output_dir is not None: filename = os.path.join(output_dir, filename) with open(filename, 'w', encoding='utf-8') as stream: @@ -90,7 +118,7 @@ def config_workflow( dropdown_label_hash = workflow_tasks[task_id][selects][int(selects_idx)][options][star][int(star_idx)]['value'] dropdown_label = strings_extract[dropdown_string_key] strings_extract[dropdown_string_key] = {dropdown_label_hash: dropdown_label} - filename = 'Task_labels_workflow_{0}_V{1}.yaml'.format(workflow_id, workflow_version) + filename = 'Task_labels_workflow_{0}_V{1}.yaml'.format(workflow_id, configure_version) if output_dir is not None: filename = os.path.join(output_dir, filename) with open(filename, 'w', encoding='utf-8') as stream: diff --git a/panoptes_aggregation/scripts/extract_panoptes_csv.py b/panoptes_aggregation/scripts/extract_panoptes_csv.py index e8cdeace..99f01a22 100755 --- a/panoptes_aggregation/scripts/extract_panoptes_csv.py +++ b/panoptes_aggregation/scripts/extract_panoptes_csv.py @@ -1,5 +1,7 @@ from collections import OrderedDict, defaultdict from multiprocessing import Pool +import numpy as np +import packaging.version import copy import json import io @@ -24,10 +26,6 @@ def get_file_instance(file): return file -def get_major_version(s): - return s.split('.')[0] - - def extract_classification( classification_by_task, classification_info, @@ -93,8 +91,26 @@ def extract_csv( extractor_config = config_yaml['extractor_config'] workflow_id = config_yaml['workflow_id'] - version = config_yaml['workflow_version'] - number_of_extractors = sum([len(value) for key, value in extractor_config.items()]) + if isinstance(config_yaml['workflow_version'], dict): + # a version range was given + version_range = config_yaml['workflow_version'] + for key, value in version_range.items(): + version_range[key] = packaging.version.parse(value) + else: + # a single version is given + version = packaging.version.parse(config_yaml['workflow_version']) + if version.minor == 0: + # only a major version given, take all rows with the same major version + # note, the max is inclusive, but there are no workflows with a minor + # version of 0, so that is OK here + next_version = packaging.version.parse(str(version.major + 1)) + else: + next_version = version + version_range = { + 'min': version, + 'max': next_version + } + number_of_extractors = sum([len(value) for _, value in extractor_config.items()]) extracted_data = defaultdict(list) @@ -104,13 +120,16 @@ def extract_csv( wdx = classifications.workflow_id == workflow_id assert (wdx.sum() > 0), 'There are no classifications matching the configured workflow ID' - if '.' in version: - vdx = classifications.workflow_version == version - else: - vdx = classifications.workflow_version.apply(get_major_version) == version - assert (vdx.sum() > 0), 'There are no classificaitons matching the configured version number' - assert ((vdx & wdx).sum() > 0), 'There are no classifications matching the combined workflow ID and version number' + classifications.workflow_version = classifications.workflow_version.apply(packaging.version.parse) + vdx = np.ones_like(classifications.workflow_version, dtype=bool) + if 'min' in version_range: + vdx &= classifications.workflow_version >= version_range['min'] + if 'max' in version_range: + vdx &= classifications.workflow_version <= version_range['max'] + + assert (vdx.sum() > 0), 'There are no classifications matching the configured version number(s)' + assert ((vdx & wdx).sum() > 0), 'There are no classifications matching the combined workflow ID and version number(s)' widgets = [ 'Extracting: ', diff --git a/panoptes_aggregation/tests/scripts_tests/test_aggregation_parser.py b/panoptes_aggregation/tests/scripts_tests/test_aggregation_parser.py index ad4229bd..860dcf54 100644 --- a/panoptes_aggregation/tests/scripts_tests/test_aggregation_parser.py +++ b/panoptes_aggregation/tests/scripts_tests/test_aggregation_parser.py @@ -15,8 +15,9 @@ def test_config_called(self, mock_config_workflow, mock_FileType): mock_FileType.return_value.return_value, 123, keywords={}, - minor_version=None, version=None, + min_version=None, + max_version=None, output_dir=os.getcwd(), verbose=False ) diff --git a/panoptes_aggregation/tests/scripts_tests/test_config_workflow.py b/panoptes_aggregation/tests/scripts_tests/test_config_workflow.py index 081df5ea..8149781e 100644 --- a/panoptes_aggregation/tests/scripts_tests/test_config_workflow.py +++ b/panoptes_aggregation/tests/scripts_tests/test_config_workflow.py @@ -8,29 +8,31 @@ class TestConfigWorkflowCL(unittest.TestCase): def setUp(self): self.workflow_data_dump = StringIO('''workflow_id,version,tasks,strings,minor_version + 4249,13,"{""T0"":{""help"":""T0.help"",""type"":""single"",""answers"":[{""label"":""T0.answers.0.label""},{""label"":""T0.answers.1.label""},{""label"":""T0.answers.2.label""}],""question"":""T0.question"",""required"":true}}","{""T0.help"":"""",""T0.question"":""A single question"",""T0.answers.0.label"":""Yes"",""T0.answers.1.label"":""No""}",1 + 4249,14,"{""T0"":{""help"":""T0.help"",""type"":""single"",""answers"":[{""label"":""T0.answers.0.label""},{""label"":""T0.answers.1.label""},{""label"":""T0.answers.2.label""}],""question"":""T0.question"",""required"":true}}","{""T0.help"":"""",""T0.question"":""A single question"",""T0.answers.0.label"":""Yes"",""T0.answers.1.label"":""No"",""T0.answers.2.label"":""All of the above""}",1 4249,14,"{""T0"":{""help"":""T0.help"",""type"":""single"",""answers"":[{""next"":""T1"",""label"":""T0.answers.0.label""},{""next"":""T1"",""label"":""T0.answers.1.label""},{""label"":""T0.answers.2.label""}],""question"":""T0.question"",""required"":true},""T1"":{""help"":""T1.help"",""type"":""shortcut"",""answers"":[{""label"":""T1.answers.0.label""},{""label"":""T1.answers.1.label""},{""label"":""T1.answers.2.label""}],""question"":""T1.question""}}","{""T0.help"":"""",""T1.help"":"""",""T0.question"":""A single question"",""T1.question"":""A multi question"",""T0.answers.0.label"":""Yes"",""T0.answers.1.label"":""No"",""T0.answers.2.label"":""All of the above"",""T1.answers.0.label"":""Red"",""T1.answers.1.label"":""Blue"",""T1.answers.2.label"":""Green""}",18''') - @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_extractor_config') - @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_reducer_config') - @patch('panoptes_aggregation.scripts.config_workflow_panoptes.open') - @patch('panoptes_aggregation.scripts.config_workflow_panoptes.yaml.dump') - def test_config_workflow_cl(self, mock_yaml_dump, mock_open, mock_reducer_config, mock_extractor_config): - '''Test command line config workflow creates the correct number of yaml files with the correct names''' - mock_extractor_config.return_value = {'question_extractor': [{'task': 'T0'}, {'task': 'T1'}]} - mock_reducer_config.return_value = [{'question_reducer': {}}] - panoptes_aggregation.scripts.config_workflow_panoptes.config_workflow( - self.workflow_data_dump, - 4249 - ) - expected_extractor_config = { + self.mock_extractor_config_14_18 = {'question_extractor': [{'task': 'T0'}, {'task': 'T1'}]} + self.mock_extractor_config_14_1 = {'question_extractor': [{'task': 'T0'}]} + + self.mock_reducer_config_14_18 = [{'question_reducer': {}}] + self.mock_reducer_config_14_1 = [{'question_reducer': {}}] + + self.expected_extractor_config_14_18 = { 'workflow_id': 4249, 'workflow_version': '14.18', - 'extractor_config': mock_extractor_config.return_value + 'extractor_config': self.mock_extractor_config_14_18 } - expected_reducer_config = { - 'reducer_config': mock_reducer_config.return_value[0] + self.expected_extractor_config_14_1 = { + 'workflow_id': 4249, + 'workflow_version': '14.1', + 'extractor_config': self.mock_extractor_config_14_1 } - expected_strings = { + + self.expected_reducer_config_14_18 = {'reducer_config': self.mock_reducer_config_14_18[0]} + self.expected_reducer_config_14_1 = {'reducer_config': self.mock_reducer_config_14_1[0]} + + self.expected_strings_14_18 = { 'T0.question': 'A single question', 'T0.answers.0.label': 'Yes', 'T0.answers.1.label': 'No', @@ -40,42 +42,249 @@ def test_config_workflow_cl(self, mock_yaml_dump, mock_open, mock_reducer_config 'T1.answers.1.label': 'Blue', 'T1.answers.2.label': 'Green' } + self.expected_strings_14_1 = { + 'T0.question': 'A single question', + 'T0.answers.0.label': 'Yes', + 'T0.answers.1.label': 'No', + 'T0.answers.2.label': 'All of the above' + } + + self.open_calls_14_18 = [ + call('Extractor_config_workflow_4249_V14.18.yaml', 'w', encoding='utf-8'), + call('Reducer_config_workflow_4249_V14.18_question_extractor.yaml', 'w', encoding='utf-8'), + call('Task_labels_workflow_4249_V14.18.yaml', 'w', encoding='utf-8') + ] + self.open_calls_14_1 = [ + call('Extractor_config_workflow_4249_V14.1.yaml', 'w', encoding='utf-8'), + call('Reducer_config_workflow_4249_V14.1_question_extractor.yaml', 'w', encoding='utf-8'), + call('Task_labels_workflow_4249_V14.1.yaml', 'w', encoding='utf-8') + ] + + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_extractor_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_reducer_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.open') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.yaml.dump') + def test_config_workflow_cl(self, mock_yaml_dump, mock_open, mock_reducer_config, mock_extractor_config): + '''Test command line config workflow creates the correct number of yaml files with the correct names''' + mock_extractor_config.return_value = self.mock_extractor_config_14_18 + mock_reducer_config.return_value = self.mock_reducer_config_14_18 + + panoptes_aggregation.scripts.config_workflow_panoptes.config_workflow( + self.workflow_data_dump, + 4249 + ) dump_calls = [ call( - expected_extractor_config, + self.expected_extractor_config_14_18, stream=mock_open.return_value.__enter__(), default_flow_style=False, indent=4 ), call( - expected_reducer_config, + self.expected_reducer_config_14_18, stream=mock_open.return_value.__enter__(), default_flow_style=False, indent=4 ), call( - expected_strings, + self.expected_strings_14_18, stream=mock_open.return_value.__enter__(), default_flow_style=False, indent=4 ) ] mock_yaml_dump.assert_has_calls(dump_calls, any_order=False) - open_calls = [ - call('Extractor_config_workflow_4249_V14.18.yaml', 'w', encoding='utf-8'), - call('Reducer_config_workflow_4249_V14.18_question_extractor.yaml', 'w', encoding='utf-8'), - call('Task_labels_workflow_4249_V14.18.yaml', 'w', encoding='utf-8') + mock_open.assert_has_calls(self.open_calls_14_18, any_order=True) + + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_extractor_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_reducer_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.open') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.yaml.dump') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.warnings.warn') + def test_config_workflow_cl_with_major_version(self, mock_warn, mock_yaml_dump, mock_open, mock_reducer_config, mock_extractor_config): + '''Test command line config workflow creates the correct number of yaml files with the correct names with major version specified''' + mock_extractor_config.return_value = self.mock_extractor_config_14_18 + mock_reducer_config.return_value = self.mock_reducer_config_14_18 + panoptes_aggregation.scripts.config_workflow_panoptes.config_workflow( + self.workflow_data_dump, + 4249, + version='14', + verbose=True + ) + expected_extractor_config_14 = { + 'workflow_id': 4249, + 'workflow_version': '14', + 'extractor_config': self.mock_extractor_config_14_18 + } + dump_calls = [ + call( + expected_extractor_config_14, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ), + call( + self.expected_reducer_config_14_18, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ), + call( + self.expected_strings_14_18, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ) ] - mock_open.assert_has_calls(open_calls, any_order=True) + mock_yaml_dump.assert_has_calls(dump_calls, any_order=False) + mock_open.assert_has_calls(self.open_calls_14_18, any_order=True) + warning_calls = [ + call('A workflow range was specified, configuration is based on 14.18'), + ] + mock_warn.assert_has_calls(warning_calls, any_order=False) @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_extractor_config') @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_reducer_config') @patch('panoptes_aggregation.scripts.config_workflow_panoptes.open') @patch('panoptes_aggregation.scripts.config_workflow_panoptes.yaml.dump') - def test_config_workflow_cl_with_dir(self, mock_yaml_dump, mock_open, mock_reducer_config, mock_extractor_config): + def test_config_workflow_cl_with_version(self, mock_yaml_dump, mock_open, mock_reducer_config, mock_extractor_config): + '''Test command line config workflow creates the correct number of yaml files with the correct names when a version is given''' + mock_extractor_config.return_value = self.mock_extractor_config_14_1 + mock_reducer_config.return_value = self.mock_reducer_config_14_1 + panoptes_aggregation.scripts.config_workflow_panoptes.config_workflow( + self.workflow_data_dump, + 4249, + version='14.1' + ) + dump_calls = [ + call( + self.expected_extractor_config_14_1, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ), + call( + self.expected_reducer_config_14_1, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ), + call( + self.expected_strings_14_1, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ) + ] + mock_yaml_dump.assert_has_calls(dump_calls, any_order=False) + mock_open.assert_has_calls(self.open_calls_14_1, any_order=True) + + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_extractor_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_reducer_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.open') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.yaml.dump') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.warnings.warn') + def test_config_workflow_cl_with_min_version(self, mock_warn, mock_yaml_dump, mock_open, mock_reducer_config, mock_extractor_config): + '''Test command line config workflow creates the correct number of yaml files with the correct names when a min_version is given''' + mock_extractor_config.return_value = self.mock_extractor_config_14_18 + mock_reducer_config.return_value = self.mock_reducer_config_14_18 + panoptes_aggregation.scripts.config_workflow_panoptes.config_workflow( + self.workflow_data_dump, + 4249, + min_version='14.1', + verbose=True + ) + expected_extractor_config_14_min = { + 'workflow_id': 4249, + 'workflow_version': { + 'min': '14.1' + }, + 'extractor_config': self.mock_extractor_config_14_18 + } + dump_calls = [ + call( + expected_extractor_config_14_min, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ), + call( + self.expected_reducer_config_14_18, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ), + call( + self.expected_strings_14_18, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ) + ] + mock_yaml_dump.assert_has_calls(dump_calls, any_order=False) + mock_open.assert_has_calls(self.open_calls_14_18, any_order=True) + warning_calls = [ + call('A workflow range was specified, configuration is based on 14.18'), + ] + mock_warn.assert_has_calls(warning_calls, any_order=False) + + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_extractor_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_reducer_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.open') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.yaml.dump') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.warnings.warn') + def test_config_workflow_cl_with_max_version(self, mock_warn, mock_yaml_dump, mock_open, mock_reducer_config, mock_extractor_config): + '''Test command line config workflow creates the correct number of yaml files with the correct names when a max_version is given''' + mock_extractor_config.return_value = self.mock_extractor_config_14_1 + mock_reducer_config.return_value = self.mock_reducer_config_14_1 + panoptes_aggregation.scripts.config_workflow_panoptes.config_workflow( + self.workflow_data_dump, + 4249, + max_version='14.1', + verbose=True + ) + expected_extractor_config_14_min = { + 'workflow_id': 4249, + 'workflow_version': { + 'max': '14.1' + }, + 'extractor_config': self.mock_extractor_config_14_1 + } + dump_calls = [ + call( + expected_extractor_config_14_min, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ), + call( + self.expected_reducer_config_14_1, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ), + call( + self.expected_strings_14_1, + stream=mock_open.return_value.__enter__(), + default_flow_style=False, + indent=4 + ) + ] + mock_yaml_dump.assert_has_calls(dump_calls, any_order=False) + mock_open.assert_has_calls(self.open_calls_14_1, any_order=True) + warning_calls = [ + call('A workflow range was specified, configuration is based on 14.1'), + ] + mock_warn.assert_has_calls(warning_calls, any_order=False) + + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.yaml.dump') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_extractor_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_reducer_config') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.open') + def test_config_workflow_cl_with_dir(self, mock_open, mock_reducer_config, mock_extractor_config, *_): '''Test command line config workflow saves to specified directory''' - mock_extractor_config.return_value = {'question_extractor': [{'task': 'T0'}, {'task': 'T1'}]} - mock_reducer_config.return_value = [{'question_reducer': {}}] + mock_extractor_config.return_value = self.mock_extractor_config_14_18 + mock_reducer_config.return_value = self.mock_reducer_config_14_18 panoptes_aggregation.scripts.config_workflow_panoptes.config_workflow( self.workflow_data_dump, 4249, @@ -88,12 +297,12 @@ def test_config_workflow_cl_with_dir(self, mock_yaml_dump, mock_open, mock_reduc ] mock_open.assert_has_calls(open_calls, any_order=True) + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.yaml.dump') + @patch('panoptes_aggregation.scripts.config_workflow_panoptes.open') @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_extractor_config') @patch('panoptes_aggregation.scripts.config_workflow_panoptes.workflow_reducer_config') - @patch('panoptes_aggregation.scripts.config_workflow_panoptes.open') - @patch('panoptes_aggregation.scripts.config_workflow_panoptes.yaml.dump') @patch('panoptes_aggregation.scripts.config_workflow_panoptes.warnings.warn') - def test_config_workflow_cl_with_verbose(self, mock_warn, mock_yaml_dump, mock_open, mock_reducer_config, mock_extractor_config): + def test_config_workflow_cl_with_verbose(self, mock_warn, mock_reducer_config, mock_extractor_config, *_): '''Test command line config workflow verbose mode creates warnings''' mock_extractor_config.return_value = {'question_extractor': [{'task': 'T0'}, {'task': 'T1'}]} mock_reducer_config.return_value = [{'question_reducer': {}}] @@ -103,8 +312,7 @@ def test_config_workflow_cl_with_verbose(self, mock_warn, mock_yaml_dump, mock_o verbose=True ) warning_calls = [ - call('No major workflow version was specified, defaulting to version 14'), - call('No minor workflow version was specified, defaulting to version 18') + call('No workflow version was specified, defaulting to version 14.18'), ] mock_warn.assert_has_calls(warning_calls, any_order=False) @@ -137,16 +345,6 @@ def test_config_workflow_cl_multiple_reducers(self, mock_yaml_dump, mock_open, m expected_reducer_config_1 = { 'reducer_config': mock_reducer_config.return_value[1] } - expected_strings = { - 'T0.question': 'A single question', - 'T0.answers.0.label': 'Yes', - 'T0.answers.1.label': 'No', - 'T0.answers.2.label': 'All of the above', - 'T1.question': 'A multi question', - 'T1.answers.0.label': 'Red', - 'T1.answers.1.label': 'Blue', - 'T1.answers.2.label': 'Green' - } dump_calls = [ call( expected_extractor_config, @@ -167,7 +365,7 @@ def test_config_workflow_cl_multiple_reducers(self, mock_yaml_dump, mock_open, m indent=4 ), call( - expected_strings, + self.expected_strings_14_18, stream=mock_open.return_value.__enter__(), default_flow_style=False, indent=4 diff --git a/panoptes_aggregation/tests/scripts_tests/test_extract_csv.py b/panoptes_aggregation/tests/scripts_tests/test_extract_csv.py index 9fff6722..4622e962 100644 --- a/panoptes_aggregation/tests/scripts_tests/test_extract_csv.py +++ b/panoptes_aggregation/tests/scripts_tests/test_extract_csv.py @@ -13,8 +13,10 @@ WINDOWS = False classification_data_dump_two_tasks = '''classification_id,user_name,user_id,workflow_id,workflow_version,created_at,annotations,subject_ids,metadata -1,1,1,4249,14.18,2017-05-31 12:33:46 UTC,"[{""task"":""T0""},{""task"":""T1""}]",1,"{}" -2,2,2,4249,14.18,2017-05-31 12:33:51 UTC,"[{""task"":""T0""},{""task"":""T1""}]",1,"{}" +1,1,1,4249,13.1,2017-05-20 1:33:46 UTC,"[{""task"":""T0""},{""task"":""T1""}]",1,"{}" +2,1,1,4249,14.1,2017-05-25 2:33:46 UTC,"[{""task"":""T0""},{""task"":""T1""}]",1,"{}" +3,1,1,4249,14.18,2017-05-31 12:33:46 UTC,"[{""task"":""T0""},{""task"":""T1""}]",1,"{}" +4,2,2,4249,14.18,2017-05-31 12:33:51 UTC,"[{""task"":""T0""},{""task"":""T1""}]",1,"{}" ''' extractor_config_yaml_question = '''{ @@ -24,14 +26,46 @@ }''' extracted_csv_question = '''classification_id,user_name,user_id,workflow_id,task,created_at,subject_id,extractor,data.blue,data.green,data.no,data.yes -1,1,1,4249,T0,2017-05-31 12:33:46 UTC,1,question_extractor,,,,1.0 -1,1,1,4249,T1,2017-05-31 12:33:46 UTC,1,question_extractor,1.0,1.0,, -2,2,2,4249,T0,2017-05-31 12:33:51 UTC,1,question_extractor,,,1.0, -2,2,2,4249,T1,2017-05-31 12:33:51 UTC,1,question_extractor,,,, +3,1,1,4249,T0,2017-05-31 12:33:46 UTC,1,question_extractor,,,,1.0 +3,1,1,4249,T1,2017-05-31 12:33:46 UTC,1,question_extractor,1.0,1.0,, +4,2,2,4249,T0,2017-05-31 12:33:51 UTC,1,question_extractor,,,1.0, +4,2,2,4249,T1,2017-05-31 12:33:51 UTC,1,question_extractor,,,, +''' + +extractor_config_yaml_question_min = '''{ + 'workflow_id': 4249, + 'workflow_version': { + min: '14.1' + }, + 'extractor_config': {'question_extractor': [{'task': 'T0'}, {'task': 'T1'}]} +}''' + +extracted_csv_question_min = '''classification_id,user_name,user_id,workflow_id,task,created_at,subject_id,extractor,data.blue,data.green,data.no,data.yes +2,1,1,4249,T0,2017-05-25 2:33:46 UTC,1,question_extractor,,,,1.0 +2,1,1,4249,T1,2017-05-25 2:33:46 UTC,1,question_extractor,1.0,1.0,, +3,1,1,4249,T0,2017-05-31 12:33:46 UTC,1,question_extractor,,,,1.0 +3,1,1,4249,T1,2017-05-31 12:33:46 UTC,1,question_extractor,1.0,1.0,, +4,2,2,4249,T0,2017-05-31 12:33:51 UTC,1,question_extractor,,,1.0, +4,2,2,4249,T1,2017-05-31 12:33:51 UTC,1,question_extractor,,,, +''' + +extractor_config_yaml_question_max = '''{ + 'workflow_id': 4249, + 'workflow_version': { + max: '14.1' + }, + 'extractor_config': {'question_extractor': [{'task': 'T0'}, {'task': 'T1'}]} +}''' + +extracted_csv_question_max = '''classification_id,user_name,user_id,workflow_id,task,created_at,subject_id,extractor,data.blue,data.green,data.yes +1,1,1,4249,T0,2017-05-20 1:33:46 UTC,1,question_extractor,,,1.0 +1,1,1,4249,T1,2017-05-20 1:33:46 UTC,1,question_extractor,1.0,1.0, +2,1,1,4249,T0,2017-05-25 2:33:46 UTC,1,question_extractor,,,1.0 +2,1,1,4249,T1,2017-05-25 2:33:46 UTC,1,question_extractor,1.0,1.0, ''' classification_data_dump_one_task = '''classification_id,user_name,user_id,workflow_id,workflow_version,created_at,annotations,subject_ids,metadata -1,1,1,4249,14.18,2017-05-31 12:33:46 UTC,"[{""task"":""T0""}]",1,"{}" +1,1,1,4249,14.1,2017-05-31 12:33:46 UTC,"[{""task"":""T0""}]",1,"{}" 2,2,2,4249,14.18,2017-05-31 12:33:51 UTC,"[{""task"":""T0""}]",1,"{}" ''' @@ -57,13 +91,13 @@ }''' extracted_csv_two_T1 = '''classification_id,user_name,user_id,workflow_id,task,created_at,subject_id,extractor,data.no,data.yes -1,1,1,4249,T1,2017-05-31 12:33:46 UTC,1,shape_extractor_point,,1.0 -2,2,2,4249,T1,2017-05-31 12:33:51 UTC,1,shape_extractor_point,1.0, +3,1,1,4249,T1,2017-05-31 12:33:46 UTC,1,shape_extractor_point,,1.0 +4,2,2,4249,T1,2017-05-31 12:33:51 UTC,1,shape_extractor_point,1.0, ''' extracted_csv_two_T0 = '''classification_id,user_name,user_id,workflow_id,task,created_at,subject_id,extractor,data.no,data.yes -1,1,1,4249,T0,2017-05-31 12:33:46 UTC,1,shape_extractor_rectangle,,1.0 -2,2,2,4249,T0,2017-05-31 12:33:51 UTC,1,shape_extractor_rectangle,1.0, +3,1,1,4249,T0,2017-05-31 12:33:46 UTC,1,shape_extractor_rectangle,,1.0 +4,2,2,4249,T0,2017-05-31 12:33:51 UTC,1,shape_extractor_rectangle,1.0, ''' extractor_config_yaml_fail = '''{ @@ -87,53 +121,9 @@ def __call__(self, *args, **kwargs): mock_question_extractor = MagicMock() -mock_question_extractor.side_effect = [ - {'yes': 1}, - {'blue': 1, 'green': 1}, - {'no': 1}, - {}, - {'yes': 1}, - {'blue': 1, 'green': 1}, - {'no': 1}, - {}, -] - mock_shape_extractor = MagicMock() -mock_shape_extractor.side_effect = [ - {'yes': 1}, - {'yes': 1}, - {'no': 1}, - {'no': 1}, -] - - mock_survey_extractor = MagicMock() -mock_survey_extractor.side_effect = [ - [ - { - 'choice': 'dog', - 'answers_howmany': {'1': 1} - }, - { - 'choice': 'cat', - 'answers_howmany': {'3': 1} - }, - ], - [ - { - 'choice': 'cat', - 'answers_howmany': {'4': 1} - }, - ] -] - mock_bad_extractor = MagicMock() -mock_bad_extractor.side_effect = [ - Exception(), - Exception(), - Exception(), - Exception() -] mock_extractors_dict = { 'question_extractor': mock_question_extractor, @@ -148,20 +138,36 @@ def setUp(self): self.classification_data_dump_two_tasks = StringIO(classification_data_dump_two_tasks) self.config_yaml_question = StringIO(extractor_config_yaml_question) self.extracts_dataframe_question = pandas.read_csv(StringIO(extracted_csv_question)) + self.classification_data_dump_one_task = StringIO(classification_data_dump_one_task) + + self.config_yaml_question_min = StringIO(extractor_config_yaml_question_min) + self.extracts_dataframe_question_min = pandas.read_csv(StringIO(extracted_csv_question_min)) + + self.config_yaml_question_max = StringIO(extractor_config_yaml_question_max) + self.extracts_dataframe_question_max = pandas.read_csv(StringIO(extracted_csv_question_max)) + self.config_yaml_survey = StringIO(extractor_config_yaml_survey) self.extracts_dataframe_survey = pandas.read_csv(StringIO(extracted_csv_survey)) + self.config_yaml_two = StringIO(extractor_config_yaml_two) self.extracts_dataframe_two_T0 = pandas.read_csv(StringIO(extracted_csv_two_T0)) self.extracts_dataframe_two_T1 = pandas.read_csv(StringIO(extracted_csv_two_T1)) + self.config_yaml_fail = StringIO(extractor_config_yaml_fail) @patch('panoptes_aggregation.scripts.extract_panoptes_csv.progressbar.ProgressBar') @patch('panoptes_aggregation.scripts.extract_panoptes_csv.pandas.DataFrame.to_csv') @patch.dict('panoptes_aggregation.scripts.extract_panoptes_csv.extractors.extractors', mock_extractors_dict) @patch('panoptes_aggregation.scripts.extract_panoptes_csv.flatten_data', CaptureValues(extract_panoptes_csv.flatten_data)) - def test_extract_csv_object(self, mock_to_csv, mock_pbar): + def test_extract_csv_object(self, mock_to_csv, *_): '''Test one (object) extractor makes one csv file''' + mock_question_extractor.side_effect = [ + {'yes': 1}, + {'blue': 1, 'green': 1}, + {'no': 1}, + {} + ] output_file_names = extract_panoptes_csv.extract_csv( self.classification_data_dump_two_tasks, self.config_yaml_question, @@ -178,8 +184,14 @@ def test_extract_csv_object(self, mock_to_csv, mock_pbar): @patch('panoptes_aggregation.scripts.extract_panoptes_csv.pandas.DataFrame.to_csv') @patch.dict('panoptes_aggregation.scripts.extract_panoptes_csv.extractors.extractors', mock_extractors_dict) @patch('panoptes_aggregation.scripts.extract_panoptes_csv.flatten_data', CaptureValues(extract_panoptes_csv.flatten_data)) - def test_extract_csv_object_n2(self, mock_to_csv, mock_pbar): + def test_extract_csv_object_n2(self, mock_to_csv, *_): '''Test one (object) extractor makes one csv file with cpu_count==2''' + mock_question_extractor.side_effect = [ + {'yes': 1}, + {'blue': 1, 'green': 1}, + {'no': 1}, + {} + ] output_file_names = extract_panoptes_csv.extract_csv( self.classification_data_dump_two_tasks, self.config_yaml_question, @@ -189,12 +201,78 @@ def test_extract_csv_object_n2(self, mock_to_csv, mock_pbar): self.assertEqual(output_file_names, [output_path]) mock_to_csv.assert_called_once_with(output_path, index=False, encoding='utf-8') + @patch('panoptes_aggregation.scripts.extract_panoptes_csv.progressbar.ProgressBar') + @patch('panoptes_aggregation.scripts.extract_panoptes_csv.pandas.DataFrame.to_csv') + @patch.dict('panoptes_aggregation.scripts.extract_panoptes_csv.extractors.extractors', mock_extractors_dict) + @patch('panoptes_aggregation.scripts.extract_panoptes_csv.flatten_data', CaptureValues(extract_panoptes_csv.flatten_data)) + def test_extract_csv_object_min_version(self, mock_to_csv, *_): + '''Test one (object) extractor makes one csv file with min_version''' + mock_question_extractor.side_effect = [ + {'yes': 1}, + {'blue': 1, 'green': 1}, + {'yes': 1}, + {'blue': 1, 'green': 1}, + {'no': 1}, + {} + ] + output_file_names = extract_panoptes_csv.extract_csv( + self.classification_data_dump_two_tasks, + self.config_yaml_question_min, + cpu_count=1 + ) + output_path = os.path.join(os.getcwd(), 'question_extractor_extractions.csv') + self.assertEqual(output_file_names, [output_path]) + result_dataframe = extract_panoptes_csv.flatten_data.return_values[0] + assert_frame_equal(result_dataframe, self.extracts_dataframe_question_min, check_like=True) + mock_to_csv.assert_called_once_with(output_path, index=False, encoding='utf-8') + + @patch('panoptes_aggregation.scripts.extract_panoptes_csv.progressbar.ProgressBar') + @patch('panoptes_aggregation.scripts.extract_panoptes_csv.pandas.DataFrame.to_csv') + @patch.dict('panoptes_aggregation.scripts.extract_panoptes_csv.extractors.extractors', mock_extractors_dict) + @patch('panoptes_aggregation.scripts.extract_panoptes_csv.flatten_data', CaptureValues(extract_panoptes_csv.flatten_data)) + def test_extract_csv_object_max_version(self, mock_to_csv, *_): + '''Test one (object) extractor makes one csv file with max_version''' + mock_question_extractor.side_effect = [ + {'yes': 1}, + {'blue': 1, 'green': 1}, + {'yes': 1}, + {'blue': 1, 'green': 1} + ] + output_file_names = extract_panoptes_csv.extract_csv( + self.classification_data_dump_two_tasks, + self.config_yaml_question_max, + cpu_count=1 + ) + output_path = os.path.join(os.getcwd(), 'question_extractor_extractions.csv') + self.assertEqual(output_file_names, [output_path]) + result_dataframe = extract_panoptes_csv.flatten_data.return_values[0] + assert_frame_equal(result_dataframe, self.extracts_dataframe_question_max, check_like=True) + mock_to_csv.assert_called_once_with(output_path, index=False, encoding='utf-8') + @patch('panoptes_aggregation.scripts.extract_panoptes_csv.progressbar.ProgressBar') @patch('panoptes_aggregation.scripts.extract_panoptes_csv.pandas.DataFrame.to_csv') @patch.dict('panoptes_aggregation.scripts.extract_panoptes_csv.extractors.extractors', mock_extractors_dict) @patch('panoptes_aggregation.scripts.extract_panoptes_csv.order_columns', CaptureValues(extract_panoptes_csv.order_columns)) - def test_extract_csv_list(self, mock_to_csv, mock_pbar): + def test_extract_csv_list(self, mock_to_csv, *_): '''Test one (list) extractor makes one csv file''' + mock_survey_extractor.side_effect = [ + [ + { + 'choice': 'dog', + 'answers_howmany': {'1': 1} + }, + { + 'choice': 'cat', + 'answers_howmany': {'3': 1} + }, + ], + [ + { + 'choice': 'cat', + 'answers_howmany': {'4': 1} + }, + ] + ] output_file_names = extract_panoptes_csv.extract_csv( self.classification_data_dump_one_task, self.config_yaml_survey, @@ -211,8 +289,14 @@ def test_extract_csv_list(self, mock_to_csv, mock_pbar): @patch('panoptes_aggregation.scripts.extract_panoptes_csv.pandas.DataFrame.to_csv') @patch.dict('panoptes_aggregation.scripts.extract_panoptes_csv.extractors.extractors', mock_extractors_dict) @patch('panoptes_aggregation.scripts.extract_panoptes_csv.flatten_data', CaptureValues(extract_panoptes_csv.flatten_data)) - def test_extract_csv_object_shape(self, mock_to_csv, mock_pbar): + def test_extract_csv_object_shape(self, mock_to_csv, *_): '''Test two (object) extractors makes two csv files''' + mock_shape_extractor.side_effect = [ + {'yes': 1}, + {'yes': 1}, + {'no': 1}, + {'no': 1}, + ] output_file_names = extract_panoptes_csv.extract_csv( self.classification_data_dump_two_tasks, self.config_yaml_two, @@ -239,8 +323,12 @@ def test_extract_csv_object_shape(self, mock_to_csv, mock_pbar): @patch('panoptes_aggregation.scripts.extract_panoptes_csv.pandas.DataFrame.to_csv') @patch.dict('panoptes_aggregation.scripts.extract_panoptes_csv.extractors.extractors', mock_extractors_dict) @patch('panoptes_aggregation.scripts.extract_panoptes_csv.print') - def test_extract_csv_bad_classification_verbose(self, mock_print, mock_to_csv, mock_pbar): + def test_extract_csv_bad_classification_verbose(self, mock_print, *_): '''Test bad classification with verbose on''' + mock_bad_extractor.side_effect = [ + Exception(), + Exception() + ] output_file_names = extract_panoptes_csv.extract_csv( self.classification_data_dump_one_task, self.config_yaml_fail, @@ -254,8 +342,12 @@ def test_extract_csv_bad_classification_verbose(self, mock_print, mock_to_csv, m @patch('panoptes_aggregation.scripts.extract_panoptes_csv.pandas.DataFrame.to_csv') @patch.dict('panoptes_aggregation.scripts.extract_panoptes_csv.extractors.extractors', mock_extractors_dict) @patch('panoptes_aggregation.scripts.extract_panoptes_csv.print') - def test_extract_csv_bad_classification_no_verbose(self, mock_print, mock_to_csv, mock_pbar): + def test_extract_csv_bad_classification_no_verbose(self, mock_print, *_): '''Test bad classification with verbose off''' + mock_bad_extractor.side_effect = [ + Exception(), + Exception() + ] output_file_names = extract_panoptes_csv.extract_csv( self.classification_data_dump_one_task, self.config_yaml_fail, diff --git a/panoptes_aggregation/version/__init__.py b/panoptes_aggregation/version/__init__.py index 8c3336cc..d6497a81 100644 --- a/panoptes_aggregation/version/__init__.py +++ b/panoptes_aggregation/version/__init__.py @@ -1 +1 @@ -__version__ = '3.7.0' +__version__ = '4.0.0'