REST API: Modify assumptions for process_type

The `process_type` attribute has changed over the years; currently it must have some descriptor for processes and be None for data types. Apparently this has not only been the case, and thus old databases may have both data and process nodes with either empty strings ('') and/or None entries in their `process_type` attributes. Additionally, there were some problems with how the unregistered entry points were considered that made it impossible to query for them. In order to consider all of this when filtering and doing statistics, it has been decided to: 1) Group all instances of a given node_type that have either '' or None as their process_type in the same `full_type` (`node_type|` ) and hence always query for both when the `process_type` is missing. 2) Remove the `aiida.descriptor:` and the `no-entry-point` from the `process_type` part of unregistered processes. This was interfeering when the `full_type` was given to return the filtering options to query for these processes. Tests were adapted to test this new compatibility aspects.
aiidateam · Nov 13, 2020 · 9683716 · 9683716
1 parent 810dc56
commit 9683716
Show file tree

Hide file tree

Showing 2 changed files with 234 additions and 52 deletions.
diff --git a/aiida/restapi/common/identifiers.py b/aiida/restapi/common/identifiers.py
@@ -103,16 +103,26 @@ def get_full_type_filters(full_type):
         node_type = escape_for_sql_like(node_type) + LIKE_OPERATOR_CHARACTER
         filters['node_type'] = {'like': node_type}
     else:
-        filters['node_type'] = escape_for_sql_like(node_type)
+        filters['node_type'] = {'==': node_type}
 
     if LIKE_OPERATOR_CHARACTER in process_type:
-        # Remove the trailing `LIKE_OPERATOR_CHARACTER`, escape the string and reattach the character
+        # Remove the trailing `LIKE_OPERATOR_CHARACTER` ()
+        # If that was the only specification, just ignore this filter (looking for any process_type)
+        # If there was more: escape the string and reattach the character
         process_type = process_type[:-1]
-        process_type = escape_for_sql_like(process_type) + LIKE_OPERATOR_CHARACTER
-        filters['process_type'] = {'like': process_type}
+        if process_type:
+            process_type = escape_for_sql_like(process_type) + LIKE_OPERATOR_CHARACTER
+            filters['process_type'] = {'like': process_type}
     else:
         if process_type:
-            filters['process_type'] = escape_for_sql_like(process_type)
+            filters['process_type'] = {'==': process_type}
+        else:
+            # A `process_type=''` is used to represents both `process_type='' and `process_type=None`.
+            # This is because there is no simple way to single out null `process_types`, and therefore
+            # we consider them together with empty-string process_types.
+            # Moreover, the existence of both is most likely a bug of migrations and thus both share
+            # this same "erroneous" origin.
+            filters['process_type'] = {'or': [{'==': ''}, {'==': None}]}
 
     return filters
 
@@ -188,6 +198,13 @@ class Namespace(MutableMapping):
         'process.workflow.workchain.': 'process.workflow.workchain.WorkChainNode.|aiida.workflows:{plugin_name}.%',
     }
 
+    process_full_type_mapping_unplugged = {
+        'process.calculation.calcjob.': 'process.calculation.calcjob.CalcJobNode.|{plugin_name}.%',
+        'process.calculation.calcfunction.': 'process.calculation.calcfunction.CalcFunctionNode.|{plugin_name}.%',
+        'process.workflow.workfunction.': 'process.workflow.workfunction.WorkFunctionNode.|{plugin_name}.%',
+        'process.workflow.workchain.': 'process.workflow.workchain.WorkChainNode.|{plugin_name}.%',
+    }
+
     def __str__(self):
         import json
         return json.dumps(self.get_description(), sort_keys=True, indent=4)
@@ -226,7 +243,12 @@ def _infer_full_type(self, full_type):
             for basepath, full_type_template in self.process_full_type_mapping.items():
                 if full_type.startswith(basepath):
                     plugin_name = strip_prefix(full_type, basepath)
-                    full_type = full_type_template.format(plugin_name=plugin_name)
+                    if plugin_name.startswith(DEFAULT_NAMESPACE_LABEL):
+                        temp_type_template = self.process_full_type_mapping_unplugged[basepath]
+                        plugin_name = strip_prefix(plugin_name, DEFAULT_NAMESPACE_LABEL + '.')
+                        full_type = temp_type_template.format(plugin_name=plugin_name)
+                    else:
+                        full_type = full_type_template.format(plugin_name=plugin_name)
                     return full_type
 
         full_type += f'.{LIKE_OPERATOR_CHARACTER}{FULL_TYPE_CONCATENATOR}'
@@ -267,8 +289,9 @@ def get_description(self):
             'full_type': self._full_type,
             'label': self._label,
             'path': self._path,
-            'subspaces': []
+            'subspaces': [],
         }
+
         for _, port in self._subspaces.items():
             result['subspaces'].append(port.get_description())
 
@@ -338,6 +361,8 @@ def get_node_namespace():
     from aiida.plugins.entry_point import is_valid_entry_point_string, parse_entry_point_string
 
     builder = orm.QueryBuilder().append(orm.Node, project=['node_type', 'process_type']).distinct()
+
+    # All None instances of process_type are turned into ''
     unique_types = {(node_type, process_type if process_type else '') for node_type, process_type in builder.all()}
 
     # First we create a flat list of all "leaf" node types.
@@ -349,18 +374,18 @@ def get_node_namespace():
         namespace = None
 
         if process_type:
-            # Process nodes
+            # Only process nodes
             parts = node_type.rsplit('.', 2)
             if is_valid_entry_point_string(process_type):
                 _, entry_point_name = parse_entry_point_string(process_type)
                 label = entry_point_name.rpartition('.')[-1]
                 namespace = '.'.join(parts[:-2] + [entry_point_name])
             else:
                 label = process_type.rsplit('.', 1)[-1]
-                namespace = '.'.join(parts[:-2] + [DEFAULT_NAMESPACE_LABEL, label])
+                namespace = '.'.join(parts[:-2] + [DEFAULT_NAMESPACE_LABEL, process_type])
 
         else:
-            # Data nodes
+            # Data nodes and process nodes without process type (='' or =None)
             parts = node_type.rsplit('.', 2)
             try:
                 label = parts[-2]

diff --git a/tests/restapi/test_identifiers.py b/tests/restapi/test_identifiers.py
@@ -8,62 +8,219 @@
 # For further information please visit http://www.aiida.net               #
 ###########################################################################
 """Tests for the `aiida.restapi.common.identifiers` module."""
-from aiida.backends.testbase import AiidaTestCase
+from threading import Thread
+
+import pytest
+import requests
+
+from aiida import orm
 from aiida.restapi.common.identifiers import get_full_type_filters, FULL_TYPE_CONCATENATOR, LIKE_OPERATOR_CHARACTER
 
 
-class TestIdentifiers(AiidaTestCase):
-    """Tests for the :py:mod:`~aiida.restapi.common.identifiers` module."""
+def test_get_full_type_filters():
+    """Coverage test for the `get_full_type_filters` function."""
 
-    def test_get_full_type_filters(self):
-        """Test the `get_full_type_filters` function."""
+    # Equals on both
+    filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}process_type')
+    assert filters['node_type'] == {'==': 'node_type'}
+    assert filters['process_type'] == {'==': 'process_type'}
 
-        with self.assertRaises(TypeError):
-            get_full_type_filters(10)
+    # Like on `node_type`
+    filters = get_full_type_filters(f'node_type{LIKE_OPERATOR_CHARACTER}{FULL_TYPE_CONCATENATOR}process_type')
+    assert filters['node_type'] == {'like': 'node\\_type%'}
+    assert filters['process_type'] == {'==': 'process_type'}
 
-        with self.assertRaises(ValueError):
-            get_full_type_filters('string_without_full_type_concatenator')
+    # Like on `process_type`
+    filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}process_type{LIKE_OPERATOR_CHARACTER}')
+    assert filters['node_type'] == {'==': 'node_type'}
+    assert filters['process_type'] == {'like': 'process\\_type%'}
 
-        with self.assertRaises(ValueError):
-            get_full_type_filters(
-                'too_many_{like}{like}{concat}process_type'.format(
-                    like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR
-                )
-            )
+    # Like on both
+    filters = get_full_type_filters(
+        'node_type{like}{concat}process_type{like}'.format(like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR)
+    )
+    assert filters['node_type'] == {'like': 'node\\_type%'}
+    assert filters['process_type'] == {'like': 'process\\_type%'}
 
-        with self.assertRaises(ValueError):
-            get_full_type_filters(
-                'node_type{concat}too_many_{like}{like}'.format(
-                    like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR
-                )
-            )
+    # Test patologic case of process_type = '' / None
+    filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}')
+    assert filters['node_type'] == {'==': 'node_type'}
+    assert filters['process_type'] == {'or': [{'==': ''}, {'==': None}]}
+
+    filters = get_full_type_filters(f'node_type{LIKE_OPERATOR_CHARACTER}{FULL_TYPE_CONCATENATOR}')
+    assert filters['node_type'] == {'like': 'node\\_type%'}
+    assert filters['process_type'] == {'or': [{'==': ''}, {'==': None}]}
 
-        with self.assertRaises(ValueError):
-            get_full_type_filters(f'not_at_{LIKE_OPERATOR_CHARACTER}_the_end{FULL_TYPE_CONCATENATOR}process_type')
 
-        with self.assertRaises(ValueError):
-            get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}not_at_{LIKE_OPERATOR_CHARACTER}_the_end')
+def test_get_filters_errors():
+    """Test the `get_full_type_filters` function."""
 
-        # Equals on both
-        filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}process_type')
-        self.assertEqual(filters['node_type'], 'node\\_type')
-        self.assertEqual(filters['process_type'], 'process\\_type')
+    with pytest.raises(TypeError):
+        get_full_type_filters(10)
 
-        # Like on `node_type`
-        filters = get_full_type_filters(f'node_type{LIKE_OPERATOR_CHARACTER}{FULL_TYPE_CONCATENATOR}process_type')
-        self.assertEqual(filters['node_type'], {'like': 'node\\_type%'})
-        self.assertEqual(filters['process_type'], 'process\\_type')
+    with pytest.raises(ValueError):
+        get_full_type_filters('string_without_full_type_concatenator')
 
-        # Like on `process_type`
-        filters = get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}process_type{LIKE_OPERATOR_CHARACTER}')
-        self.assertEqual(filters['node_type'], 'node\\_type')
-        self.assertEqual(filters['process_type'], {'like': 'process\\_type%'})
+    with pytest.raises(ValueError):
+        get_full_type_filters(
+            'too_many_{like}{like}{concat}process_type'.format(
+                like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR
+            )
+        )
 
-        # Like on both
-        filters = get_full_type_filters(
-            'node_type{like}{concat}process_type{like}'.format(
+    with pytest.raises(ValueError):
+        get_full_type_filters(
+            'node_type{concat}too_many_{like}{like}'.format(
                 like=LIKE_OPERATOR_CHARACTER, concat=FULL_TYPE_CONCATENATOR
             )
         )
-        self.assertEqual(filters['node_type'], {'like': 'node\\_type%'})
-        self.assertEqual(filters['process_type'], {'like': 'process\\_type%'})
+
+    with pytest.raises(ValueError):
+        get_full_type_filters(f'not_at_{LIKE_OPERATOR_CHARACTER}_the_end{FULL_TYPE_CONCATENATOR}process_type')
+
+    with pytest.raises(ValueError):
+        get_full_type_filters(f'node_type{FULL_TYPE_CONCATENATOR}not_at_{LIKE_OPERATOR_CHARACTER}_the_end')
+
+
+@pytest.mark.parametrize(
+    'process_class', [orm.CalcFunctionNode, orm.CalcJobNode, orm.WorkFunctionNode, orm.WorkChainNode]
+)
+@pytest.mark.usefixtures('clear_database_before_test')
+def test_full_type_unregistered(process_class, restapi_server, server_url):
+    """Functionality test for the compatibility with old process_type entries.
+
+    This will only check the integrity of the shape of the tree, there is no checking on how the
+    data should be represented internally in term of full types, labels, etc. The only thing that
+    must work correctly is the internal consistency of `full_type` as it is interpreted by the
+    get_full_type_filters and the querybuilder.
+    """
+    # pylint: disable=too-many-statements
+    calc_unreg11 = process_class()
+    calc_unreg11.set_process_type('unregistered_type1.process1')
+    calc_unreg11.store()
+
+    calc_unreg21 = process_class()
+    calc_unreg21.set_process_type('unregistered_type2.process1')
+    calc_unreg21.store()
+
+    calc_unreg22 = process_class()
+    calc_unreg22.set_process_type('unregistered_type2.process2')
+    calc_unreg22.store()
+
+    server = restapi_server()
+    server_thread = Thread(target=server.serve_forever)
+
+    try:
+        server_thread.start()
+        type_count_response = requests.get(f'{server_url}/nodes/full_types', timeout=10)
+    finally:
+        server.shutdown()
+
+    # All nodes = only processes
+    # The main branch for all nodes does not currently return a queryable full_type
+    current_namespace = type_count_response.json()['data']
+    assert len(current_namespace['subspaces']) == 1
+
+    # All processes = only one kind of process (Calculation/Workflow)
+    current_namespace = current_namespace['subspaces'][0]
+    query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
+    assert len(current_namespace['subspaces']) == 1
+    assert query_all.count() == 3
+
+    # All subprocesses = only one kind of subprocess (calcfunc/workfunc or calcjob/workchain)
+    current_namespace = current_namespace['subspaces'][0]
+    query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
+    assert len(current_namespace['subspaces']) == 1
+    assert query_all.count() == 3
+
+    # One branch for each registered plugin and one for all unregistered
+    # (there are only unregistered here)
+    current_namespace = current_namespace['subspaces'][0]
+    query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
+    assert len(current_namespace['subspaces']) == 1
+    assert query_all.count() == 3
+
+    # There are only two  process types: unregistered_type1 (1) and unregistered_type2 (2)
+    # The unregistered branch does not currently return a queryable full_type
+    current_namespace = current_namespace['subspaces'][0]
+    assert len(current_namespace['subspaces']) == 2
+
+    type_namespace = current_namespace['subspaces'][0]
+    query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
+    assert len(type_namespace['subspaces']) == 1
+    assert query_type.count() == 1
+
+    type_namespace = current_namespace['subspaces'][1]
+    query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
+    assert len(type_namespace['subspaces']) == 2
+    assert query_type.count() == 2
+
+    # Finally we check each specific subtype (1 for unregistered_type1 and 2 for unregistered_type2)
+    # These are lead nodes without any further subspace
+    type_namespace = current_namespace['subspaces'][0]['subspaces'][0]
+    query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
+    assert len(type_namespace['subspaces']) == 0
+    assert query_type.count() == 1
+
+    type_namespace = current_namespace['subspaces'][1]['subspaces'][0]
+    query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
+    assert len(type_namespace['subspaces']) == 0
+    assert query_type.count() == 1
+
+    type_namespace = current_namespace['subspaces'][1]['subspaces'][1]
+    query_type = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(type_namespace['full_type']))
+    assert len(type_namespace['subspaces']) == 0
+    assert query_type.count() == 1
+
+
+@pytest.mark.parametrize('node_class', [orm.CalcFunctionNode, orm.Dict])
+@pytest.mark.usefixtures('clear_database_before_test')
+def test_full_type_backwards_compatibility(node_class, restapi_server, server_url):
+    """Functionality test for the compatibility with old process_type entries.
+
+    This will only check the integrity of the shape of the tree, there is no checking on how the
+    data should be represented internally in term of full types, labels, etc. The only thing that
+    must work correctly is the internal consistency of `full_type` as it is interpreted by the
+    get_full_type_filters and the querybuilder.
+    """
+    node_empty = node_class()
+    node_empty.process_type = ''
+    node_empty.store()
+
+    node_nones = node_class()
+    node_nones.process_type = None
+    node_nones.store()
+
+    server = restapi_server()
+    server_thread = Thread(target=server.serve_forever)
+
+    try:
+        server_thread.start()
+        type_count_response = requests.get(f'{server_url}/nodes/full_types', timeout=10)
+    finally:
+        server.shutdown()
+
+    # All nodes (contains either a process branch or data branch)
+    # The main branch for all nodes does not currently return a queryable full_type
+    current_namespace = type_count_response.json()['data']
+    assert len(current_namespace['subspaces']) == 1
+
+    # All subnodes (contains a workflow, calculation or data_type branch)
+    current_namespace = current_namespace['subspaces'][0]
+    query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
+    assert len(current_namespace['subspaces']) == 1
+    assert query_all.count() == 2
+
+    # If this is a process node, there is an extra branch before the leaf
+    # (calcfunction, workfunction, calcjob or workchain)
+    if issubclass(node_class, orm.ProcessNode):
+        current_namespace = current_namespace['subspaces'][0]
+        query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
+        assert len(current_namespace['subspaces']) == 1
+        assert query_all.count() == 2
+
+    # This will be the last leaf: the specific data type or the empty process_type
+    current_namespace = current_namespace['subspaces'][0]
+    query_all = orm.QueryBuilder().append(orm.Node, filters=get_full_type_filters(current_namespace['full_type']))
+    assert len(current_namespace['subspaces']) == 0
+    assert query_all.count() == 2