Skip to content

Commit

Permalink
REST API: Modify assumptions for process_type
Browse files Browse the repository at this point in the history
The `process_type` attribute has changed over the years; currently it
must have some descriptor for processes and be None for data types.
Apparently this has not only been the case, and thus old databases may
have both data and process nodes with either empty strings ('') and/or
None entries in their `process_type` attributes.

Additionally, there were some problems with how the unregistered entry
points were considered that made it impossible to query for them.

In order to consider all of this when filtering and doing statistics,
it has been decided to:

1) Group all instances of a given node_type that have either '' or None
as their process_type in the same `full_type` (`node_type|` ) and hence
always query for both when the `process_type` is missing.

2) Remove the `aiida.descriptor:` and the `no-entry-point` from the
`process_type` part of unregistered processes. This was interfeering
when the `full_type` was given to return the filtering options to query
for these processes.

Tests were adapted to test this new compatibility aspects.
  • Loading branch information
ramirezfranciscof authored and sphuber committed Nov 13, 2020
1 parent 810dc56 commit 9683716
Show file tree
Hide file tree
Showing 2 changed files with 234 additions and 52 deletions.
45 changes: 35 additions & 10 deletions aiida/restapi/common/identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,26 @@ def get_full_type_filters(full_type):
node_type = escape_for_sql_like(node_type) + LIKE_OPERATOR_CHARACTER
filters['node_type'] = {'like': node_type}
else:
filters['node_type'] = escape_for_sql_like(node_type)
filters['node_type'] = {'==': node_type}

if LIKE_OPERATOR_CHARACTER in process_type:
# Remove the trailing `LIKE_OPERATOR_CHARACTER`, escape the string and reattach the character
# Remove the trailing `LIKE_OPERATOR_CHARACTER` ()
# If that was the only specification, just ignore this filter (looking for any process_type)
# If there was more: escape the string and reattach the character
process_type = process_type[:-1]
process_type = escape_for_sql_like(process_type) + LIKE_OPERATOR_CHARACTER
filters['process_type'] = {'like': process_type}
if process_type:
process_type = escape_for_sql_like(process_type) + LIKE_OPERATOR_CHARACTER
filters['process_type'] = {'like': process_type}
else:
if process_type:
filters['process_type'] = escape_for_sql_like(process_type)
filters['process_type'] = {'==': process_type}
else:
# A `process_type=''` is used to represents both `process_type='' and `process_type=None`.
# This is because there is no simple way to single out null `process_types`, and therefore
# we consider them together with empty-string process_types.
# Moreover, the existence of both is most likely a bug of migrations and thus both share
# this same "erroneous" origin.
filters['process_type'] = {'or': [{'==': ''}, {'==': None}]}

return filters

Expand Down Expand Up @@ -188,6 +198,13 @@ class Namespace(MutableMapping):
'process.workflow.workchain.': 'process.workflow.workchain.WorkChainNode.|aiida.workflows:{plugin_name}.%',
}

process_full_type_mapping_unplugged = {
'process.calculation.calcjob.': 'process.calculation.calcjob.CalcJobNode.|{plugin_name}.%',
'process.calculation.calcfunction.': 'process.calculation.calcfunction.CalcFunctionNode.|{plugin_name}.%',
'process.workflow.workfunction.': 'process.workflow.workfunction.WorkFunctionNode.|{plugin_name}.%',
'process.workflow.workchain.': 'process.workflow.workchain.WorkChainNode.|{plugin_name}.%',
}

def __str__(self):
import json
return json.dumps(self.get_description(), sort_keys=True, indent=4)
Expand Down Expand Up @@ -226,7 +243,12 @@ def _infer_full_type(self, full_type):
for basepath, full_type_template in self.process_full_type_mapping.items():
if full_type.startswith(basepath):
plugin_name = strip_prefix(full_type, basepath)
full_type = full_type_template.format(plugin_name=plugin_name)
if plugin_name.startswith(DEFAULT_NAMESPACE_LABEL):
temp_type_template = self.process_full_type_mapping_unplugged[basepath]
plugin_name = strip_prefix(plugin_name, DEFAULT_NAMESPACE_LABEL + '.')
full_type = temp_type_template.format(plugin_name=plugin_name)
else:
full_type = full_type_template.format(plugin_name=plugin_name)
return full_type

full_type += f'.{LIKE_OPERATOR_CHARACTER}{FULL_TYPE_CONCATENATOR}'
Expand Down Expand Up @@ -267,8 +289,9 @@ def get_description(self):
'full_type': self._full_type,
'label': self._label,
'path': self._path,
'subspaces': []
'subspaces': [],
}

for _, port in self._subspaces.items():
result['subspaces'].append(port.get_description())

Expand Down Expand Up @@ -338,6 +361,8 @@ def get_node_namespace():
from aiida.plugins.entry_point import is_valid_entry_point_string, parse_entry_point_string

builder = orm.QueryBuilder().append(orm.Node, project=['node_type', 'process_type']).distinct()

# All None instances of process_type are turned into ''
unique_types = {(node_type, process_type if process_type else '') for node_type, process_type in builder.all()}

# First we create a flat list of all "leaf" node types.
Expand All @@ -349,18 +374,18 @@ def get_node_namespace():
namespace = None

if process_type:
# Process nodes
# Only process nodes
parts = node_type.rsplit('.', 2)
if is_valid_entry_point_string(process_type):
_, entry_point_name = parse_entry_point_string(process_type)
label = entry_point_name.rpartition('.')[-1]
namespace = '.'.join(parts[:-2] + [entry_point_name])
else:
label = process_type.rsplit('.', 1)[-1]
namespace = '.'.join(parts[:-2] + [DEFAULT_NAMESPACE_LABEL, label])
namespace = '.'.join(parts[:-2] + [DEFAULT_NAMESPACE_LABEL, process_type])

else:
# Data nodes
# Data nodes and process nodes without process type (='' or =None)
parts = node_type.rsplit('.', 2)
try:
label = parts[-2]
Expand Down
241 changes: 199 additions & 42 deletions tests/restapi/test_identifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,62 +8,219 @@
# For further information please visit http://www.aiida.net #
###########################################################################
"""Tests for the `aiida.restapi.common.identifiers` module."""
from aiida.backends.testbase import AiidaTestCase
from threading import Thread

import pytest
import requests

from aiida import orm
from aiida.restapi.common.identifiers import get_full_type_filters, FULL_TYPE_CONCATENATOR, LIKE_OPERATOR_CHARACTER


class TestIdentifiers(AiidaTestCase):
"""Tests for the :py:mod:`~aiida.restapi.common.identifiers` module."""
def test_get_full_type_filters():
"""Coverage test for the `get_full_type_filters` function."""

def test_get_full_type_filters(self):
"""Test the `get_full_type_filters` function."""
# Equals on both
filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}process_type')
assert filters['node_type'] == {'==': 'node_type'}
assert filters['process_type'] == {'==': 'process_type'}

with self.assertRaises(TypeError):
get_full_type_filters(10)
# Like on `node_type`
filters = get_full_type_filters(f'node_type{LIKE_OPERATOR_CHARACTER}{FULL_TYPE_CONCATENATOR}process_type')
assert filters['node_type'] == {'like': 'node\\_type%'}
assert filters['process_type'] == {'==': 'process_type'}

with self.assertRaises(ValueError):
get_full_type_filters('string_without_full_type_concatenator')
# Like on `process_type`
filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}process_type{LIKE_OPERATOR_CHARACTER}')
assert filters['node_type'] == {'==': 'node_type'}
assert filters['process_type'] == {'like': 'process\\_type%'}

with self.assertRaises(ValueError):
get_full_type_filters(
'too_many_{like}{like}{concat}process_type'.format(
like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR
)
)
# Like on both
filters = get_full_type_filters(
'node_type{like}{concat}process_type{like}'.format(like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR)
)
assert filters['node_type'] == {'like': 'node\\_type%'}
assert filters['process_type'] == {'like': 'process\\_type%'}

with self.assertRaises(ValueError):
get_full_type_filters(
'node_type{concat}too_many_{like}{like}'.format(
like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR
)
)
# Test patologic case of process_type = '' / None
filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}')
assert filters['node_type'] == {'==': 'node_type'}
assert filters['process_type'] == {'or': [{'==': ''}, {'==': None}]}

filters = get_full_type_filters(f'node_type{LIKE_OPERATOR_CHARACTER}{FULL_TYPE_CONCATENATOR}')
assert filters['node_type'] == {'like': 'node\\_type%'}
assert filters['process_type'] == {'or': [{'==': ''}, {'==': None}]}

with self.assertRaises(ValueError):
get_full_type_filters(f'not_at_{LIKE_OPERATOR_CHARACTER}_the_end{FULL_TYPE_CONCATENATOR}process_type')

with self.assertRaises(ValueError):
get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}not_at_{LIKE_OPERATOR_CHARACTER}_the_end')
def test_get_filters_errors():
"""Test the `get_full_type_filters` function."""

# Equals on both
filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}process_type')
self.assertEqual(filters['node_type'], 'node\\_type')
self.assertEqual(filters['process_type'], 'process\\_type')
with pytest.raises(TypeError):
get_full_type_filters(10)

# Like on `node_type`
filters = get_full_type_filters(f'node_type{LIKE_OPERATOR_CHARACTER}{FULL_TYPE_CONCATENATOR}process_type')
self.assertEqual(filters['node_type'], {'like': 'node\\_type%'})
self.assertEqual(filters['process_type'], 'process\\_type')
with pytest.raises(ValueError):
get_full_type_filters('string_without_full_type_concatenator')

# Like on `process_type`
filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}process_type{LIKE_OPERATOR_CHARACTER}')
self.assertEqual(filters['node_type'], 'node\\_type')
self.assertEqual(filters['process_type'], {'like': 'process\\_type%'})
with pytest.raises(ValueError):
get_full_type_filters(
'too_many_{like}{like}{concat}process_type'.format(
like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR
)
)

# Like on both
filters = get_full_type_filters(
'node_type{like}{concat}process_type{like}'.format(
with pytest.raises(ValueError):
get_full_type_filters(
'node_type{concat}too_many_{like}{like}'.format(
like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR
)
)
self.assertEqual(filters['node_type'], {'like': 'node\\_type%'})
self.assertEqual(filters['process_type'], {'like': 'process\\_type%'})

with pytest.raises(ValueError):
get_full_type_filters(f'not_at_{LIKE_OPERATOR_CHARACTER}_the_end{FULL_TYPE_CONCATENATOR}process_type')

with pytest.raises(ValueError):
get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}not_at_{LIKE_OPERATOR_CHARACTER}_the_end')


@pytest.mark.parametrize(
'process_class', [orm.CalcFunctionNode, orm.CalcJobNode, orm.WorkFunctionNode, orm.WorkChainNode]
)
@pytest.mark.usefixtures('clear_database_before_test')
def test_full_type_unregistered(process_class, restapi_server, server_url):
"""Functionality test for the compatibility with old process_type entries.
This will only check the integrity of the shape of the tree, there is no checking on how the
data should be represented internally in term of full types, labels, etc. The only thing that
must work correctly is the internal consistency of `full_type` as it is interpreted by the
get_full_type_filters and the querybuilder.
"""
# pylint: disable=too-many-statements
calc_unreg11 = process_class()
calc_unreg11.set_process_type('unregistered_type1.process1')
calc_unreg11.store()

calc_unreg21 = process_class()
calc_unreg21.set_process_type('unregistered_type2.process1')
calc_unreg21.store()

calc_unreg22 = process_class()
calc_unreg22.set_process_type('unregistered_type2.process2')
calc_unreg22.store()

server = restapi_server()
server_thread = Thread(target=server.serve_forever)

try:
server_thread.start()
type_count_response = requests.get(f'{server_url}/nodes/full_types', timeout=10)
finally:
server.shutdown()

# All nodes = only processes
# The main branch for all nodes does not currently return a queryable full_type
current_namespace = type_count_response.json()['data']
assert len(current_namespace['subspaces']) == 1

# All processes = only one kind of process (Calculation/Workflow)
current_namespace = current_namespace['subspaces'][0]
query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
assert len(current_namespace['subspaces']) == 1
assert query_all.count() == 3

# All subprocesses = only one kind of subprocess (calcfunc/workfunc or calcjob/workchain)
current_namespace = current_namespace['subspaces'][0]
query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
assert len(current_namespace['subspaces']) == 1
assert query_all.count() == 3

# One branch for each registered plugin and one for all unregistered
# (there are only unregistered here)
current_namespace = current_namespace['subspaces'][0]
query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
assert len(current_namespace['subspaces']) == 1
assert query_all.count() == 3

# There are only two process types: unregistered_type1 (1) and unregistered_type2 (2)
# The unregistered branch does not currently return a queryable full_type
current_namespace = current_namespace['subspaces'][0]
assert len(current_namespace['subspaces']) == 2

type_namespace = current_namespace['subspaces'][0]
query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
assert len(type_namespace['subspaces']) == 1
assert query_type.count() == 1

type_namespace = current_namespace['subspaces'][1]
query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
assert len(type_namespace['subspaces']) == 2
assert query_type.count() == 2

# Finally we check each specific subtype (1 for unregistered_type1 and 2 for unregistered_type2)
# These are lead nodes without any further subspace
type_namespace = current_namespace['subspaces'][0]['subspaces'][0]
query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
assert len(type_namespace['subspaces']) == 0
assert query_type.count() == 1

type_namespace = current_namespace['subspaces'][1]['subspaces'][0]
query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
assert len(type_namespace['subspaces']) == 0
assert query_type.count() == 1

type_namespace = current_namespace['subspaces'][1]['subspaces'][1]
query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
assert len(type_namespace['subspaces']) == 0
assert query_type.count() == 1


@pytest.mark.parametrize('node_class', [orm.CalcFunctionNode, orm.Dict])
@pytest.mark.usefixtures('clear_database_before_test')
def test_full_type_backwards_compatibility(node_class, restapi_server, server_url):
"""Functionality test for the compatibility with old process_type entries.
This will only check the integrity of the shape of the tree, there is no checking on how the
data should be represented internally in term of full types, labels, etc. The only thing that
must work correctly is the internal consistency of `full_type` as it is interpreted by the
get_full_type_filters and the querybuilder.
"""
node_empty = node_class()
node_empty.process_type = ''
node_empty.store()

node_nones = node_class()
node_nones.process_type = None
node_nones.store()

server = restapi_server()
server_thread = Thread(target=server.serve_forever)

try:
server_thread.start()
type_count_response = requests.get(f'{server_url}/nodes/full_types', timeout=10)
finally:
server.shutdown()

# All nodes (contains either a process branch or data branch)
# The main branch for all nodes does not currently return a queryable full_type
current_namespace = type_count_response.json()['data']
assert len(current_namespace['subspaces']) == 1

# All subnodes (contains a workflow, calculation or data_type branch)
current_namespace = current_namespace['subspaces'][0]
query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
assert len(current_namespace['subspaces']) == 1
assert query_all.count() == 2

# If this is a process node, there is an extra branch before the leaf
# (calcfunction, workfunction, calcjob or workchain)
if issubclass(node_class, orm.ProcessNode):
current_namespace = current_namespace['subspaces'][0]
query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
assert len(current_namespace['subspaces']) == 1
assert query_all.count() == 2

# This will be the last leaf: the specific data type or the empty process_type
current_namespace = current_namespace['subspaces'][0]
query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
assert len(current_namespace['subspaces']) == 0
assert query_all.count() == 2

0 comments on commit 9683716

Please sign in to comment.