diff --git a/geonode/harvesting/api/serializers.py b/geonode/harvesting/api/serializers.py index 7d918db190c..340c9fd487e 100644 --- a/geonode/harvesting/api/serializers.py +++ b/geonode/harvesting/api/serializers.py @@ -83,7 +83,7 @@ def get_links(self, obj): }, request=self.context["request"], ), - "harvestable-resources": reverse( + "harvestable_resources": reverse( "harvestable-resources-list", kwargs={ "harvester_id": obj.id, @@ -118,10 +118,6 @@ class Meta: "links", ) - def validate_harvester_type_specific_configuration(self, value): - logger.debug(f"inside validate_harvester_type_specific_configuration instance: {self.instance}") - return value - def validate(self, data): """Perform object-level validation @@ -138,7 +134,7 @@ def validate(self, data): """ worker_config_field = "harvester_type_specific_configuration" - worker_type_field = "worker_type" + worker_type_field = "harvester_type" worker_type = data.get( worker_type_field, getattr(self.instance, worker_type_field, None)) worker_config = data.get( @@ -157,7 +153,6 @@ def validate(self, data): ) return data - # FIXME: ensure supplied worker-specific config validates our json-schema def create(self, validated_data): desired_status = validated_data.get("status", models.Harvester.STATUS_READY) if desired_status != models.Harvester.STATUS_READY: @@ -263,13 +258,24 @@ class Meta: "unique_identifier", "title", "should_be_harvested", - "available", "last_updated", "status", + "remote_resource_type", + ] + read_only_fields = [ + "title", + "last_updated", + "status", + "remote_resource_type", ] def create(self, validated_data): - # TODO: check if there is no other property being set other than `should_be_harvested` + # NOTE: We are implementing `create()` rather than `update` intentionally, even if the + # user is not allowed to create new records (check the `views.py` module) - the rationale + # being that since we keep a harvestable_resource's `id` private it would be more involved + # to deal with its update than with its creation. We are providing a custom `UpdateListModelMixin` class + # that allows for bulk update of multiple instances simultaneously. This mixin class is instantiating + # this serializer class without providing an instance and then calling its `save()` method harvestable_resource = models.HarvestableResource.objects.get( harvester=self.context["harvester"], unique_identifier=validated_data["unique_identifier"] diff --git a/geonode/harvesting/config.py b/geonode/harvesting/config.py index 7eb5e76322d..fa2631477d0 100644 --- a/geonode/harvesting/config.py +++ b/geonode/harvesting/config.py @@ -23,22 +23,32 @@ """ +import typing + from django.conf import settings -_default_harvesters = [ + +_DEFAULT_HARVESTERS: typing.Final = [ "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester", - # "geonode.harvesting.harvesters.geonode.GeonodeCswHarvester", "geonode.harvesting.harvesters.wms.OgcWmsHarvester", + # "geonode.harvesting.harvesters.geonode.GeonodeCswHarvester", ] -try: - _configured_harvester_classes = getattr(settings, "HARVESTER_CLASSES") - HARVESTER_CLASSES = ( - _default_harvesters + - [i for i in _configured_harvester_classes if i not in _default_harvesters] - ) -except AttributeError: - HARVESTER_CLASSES = _default_harvesters - -HARVESTED_RESOURCE_FILE_MAX_MEMORY_SIZE = getattr( - settings, "HARVESTED_RESOURCE_FILE_MAX_MEMORY_SIZE", settings.FILE_UPLOAD_MAX_MEMORY_SIZE) + +def _get_harvester_class_paths(custom_class_paths: typing.List[str]) -> typing.List[str]: + result = _DEFAULT_HARVESTERS[:] + for i in custom_class_paths: + if i not in result: + result.append(i) + return result + + +def get_setting(setting_key: str) -> typing.Any: + result = { + "HARVESTER_CLASSES": _get_harvester_class_paths( + getattr(settings, "HARVESTER_CLASSES", []) + ), + "HARVESTED_RESOURCE_FILE_MAX_MEMORY_SIZE": getattr( + settings, "HARVESTED_RESOURCE_MAX_MEMORY_SIZE", settings.FILE_UPLOAD_MAX_MEMORY_SIZE) + }.get(setting_key, getattr(settings, setting_key, None)) + return result diff --git a/geonode/harvesting/harvesters/base.py b/geonode/harvesting/harvesters/base.py index d0d6b4ad300..cef76da7cc8 100644 --- a/geonode/harvesting/harvesters/base.py +++ b/geonode/harvesting/harvesters/base.py @@ -359,7 +359,8 @@ def download_resource_file(url: str, target_name: str) -> str: file_size = response.headers.get("Content-Length") content_type = response.headers.get("Content-Type") charset = response.apparent_encoding - if file_size is not None and int(file_size) < config.HARVESTED_RESOURCE_FILE_MAX_MEMORY_SIZE: + size_threshold = config.get_setting("HARVESTED_RESOURCE_FILE_MAX_MEMORY_SIZE") + if file_size is not None and int(file_size) < size_threshold: logger.debug("Downloading to an in-memory buffer...") file_ = uploadedfile.InMemoryUploadedFile( None, None, target_name, content_type, file_size, charset) diff --git a/geonode/harvesting/migrations/0029_dynamic_harvester_type_choices.py b/geonode/harvesting/migrations/0029_dynamic_harvester_type_choices.py new file mode 100644 index 00000000000..7fb877a112a --- /dev/null +++ b/geonode/harvesting/migrations/0029_dynamic_harvester_type_choices.py @@ -0,0 +1,31 @@ +# Generated by Django 3.2.4 on 2021-06-28 18:26 +# Hand edited in order to set choices for `modelsHarvester.harvester_type` to come from settings in a dynamic fashion +# This shall prevent Django autogenerating new migration files for `geonode.harvesting` +# whenever new custom harvester classes are added to the settings + +from django.db import migrations, models + +from .. import config + + +class Migration(migrations.Migration): + + dependencies = [ + ('harvesting', '0028_harvester_num_harvestable_resources'), + ] + + operations = [ + migrations.AlterField( + model_name='harvester', + name='harvester_type', + field=models.CharField( + choices=[(value, value) for value in config.get_setting("HARVESTER_CLASSES")], + default='geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester', + help_text=( + 'Harvester class used to perform harvesting sessions. New harvester types can be added by an admin by changing the ' + 'main GeoNode `settings.py` file' + ), + max_length=255 + ), + ), + ] diff --git a/geonode/harvesting/models.py b/geonode/harvesting/models.py index 3ac033cc861..edbcb5e3942 100644 --- a/geonode/harvesting/models.py +++ b/geonode/harvesting/models.py @@ -33,7 +33,7 @@ ) from . import utils -from .config import HARVESTER_CLASSES +from .config import get_setting logger = logging.getLogger(__name__) @@ -127,8 +127,8 @@ class Harvester(models.Model): "Harvester class used to perform harvesting sessions. New harvester types " "can be added by an admin by changing the main GeoNode `settings.py` file" ), - choices=(((i, i) for i in HARVESTER_CLASSES)), - default=HARVESTER_CLASSES[0] + choices=(((i, i) for i in get_setting("HARVESTER_CLASSES"))), + default=get_setting("HARVESTER_CLASSES")[0] ) harvester_type_specific_configuration = models.JSONField( default=dict, @@ -157,7 +157,9 @@ class Harvester(models.Model): editable=False, ) num_harvestable_resources = models.IntegerField( - default=0) + blank=True, + default=0 + ) def __str__(self): return f"{self.name}({self.id})" diff --git a/geonode/harvesting/harvesters/tests.py b/geonode/harvesting/tests/__init__.py similarity index 100% rename from geonode/harvesting/harvesters/tests.py rename to geonode/harvesting/tests/__init__.py diff --git a/geonode/harvesting/tests/factories.py b/geonode/harvesting/tests/factories.py new file mode 100644 index 00000000000..f6134f5fe69 --- /dev/null +++ b/geonode/harvesting/tests/factories.py @@ -0,0 +1,60 @@ +############################################## +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + + +import uuid +import datetime +from geonode.harvesting import resourcedescriptor +from geonode.harvesting.harvesters.base import HarvestedResourceInfo, BriefRemoteResource + +contact_example = resourcedescriptor.RecordDescriptionContact( + role='role', + name="Test" +) +identification_example = resourcedescriptor.RecordIdentification( + name='Test', + title='Test', + date=datetime.datetime.now(), + date_type='type', + originator=contact_example, + graphic_overview_uri='', + place_keywords=['keyword'], + other_keywords=('test',), + license=['test'] +) +distribution_example = resourcedescriptor.RecordDistribution() +resource_description_example = resourcedescriptor.RecordDescription( + uuid=uuid.uuid4(), + point_of_contact=contact_example, + author=contact_example, + date_stamp=datetime.datetime.now(), + identification=identification_example, + distribution=distribution_example +) + +resource_info_example = HarvestedResourceInfo( + resource_descriptor=resource_description_example, + additional_information={} +) + +brief_remote_resource_example = BriefRemoteResource( + unique_identifier='id', + title='Test', + resource_type='Layer' +) diff --git a/geonode/harvesting/tests.py b/geonode/harvesting/tests/harvesters/__init__.py similarity index 90% rename from geonode/harvesting/tests.py rename to geonode/harvesting/tests/harvesters/__init__.py index 6b4db6084e8..5d1d824fbb7 100644 --- a/geonode/harvesting/tests.py +++ b/geonode/harvesting/tests/harvesters/__init__.py @@ -1,4 +1,4 @@ -######################################################################### +############################################## # # Copyright (C) 2021 OSGeo # diff --git a/geonode/harvesting/tests/harvesters/base.py b/geonode/harvesting/tests/harvesters/base.py new file mode 100644 index 00000000000..531c809cc53 --- /dev/null +++ b/geonode/harvesting/tests/harvesters/base.py @@ -0,0 +1,98 @@ +############################################## +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import datetime +from django.contrib.auth import get_user_model +from geonode.tests.base import GeoNodeBaseTestSupport +from geonode.harvesting.models import Harvester, HarvestableResource +from geonode.harvesting.tests.harvesters.test_harvester import TestHarvester +from geonode.layers.models import Dataset + + +class TestBaseHarvester(GeoNodeBaseTestSupport): + """ + Test Base harvester + """ + remote_url = 'test.com' + name = 'This is geonode harvester' + user = get_user_model().objects.get(username='AnonymousUser') + harvester_type = 'geonode.harvesting.tests.harvesters.test_harvester.TestHarvester' + + def setUp(self): + super().setUp() + self.worker = TestHarvester( + remote_url=self.remote_url, + harvester_id=1 + ) + + def test_worker_from_harvester(self): + """ + Test worker that generated from harvester + """ + harvester = Harvester.objects.create( + remote_url=self.remote_url, + name=self.name, + default_owner=self.user, + harvester_type=self.harvester_type + ) + worker = harvester.get_harvester_worker() + self.assertEqual(worker.__class__, TestHarvester) + self.assertEqual(worker.remote_url, self.remote_url) + self.assertEqual(harvester.default_owner, self.user) + + def test_worker_from_django_record(self): + """ + Test worker that generated from worker using harvester record + """ + harvester = Harvester.objects.create( + remote_url=self.remote_url, + name=self.name, + default_owner=self.user, + harvester_type=self.harvester_type + ) + worker = TestHarvester.from_django_record(harvester) + self.assertEqual(worker.__class__, TestHarvester) + self.assertEqual(worker.remote_url, self.remote_url) + self.assertEqual(harvester.default_owner, self.user) + + def test_worker_methods(self): + """ + Test functions in worker + """ + self.assertEqual(self.worker.remote_url, self.remote_url) + self.assertEqual(self.worker.harvester_id, 1) + self.assertTrue(self.worker.allows_copying_resources) + self.assertTrue(self.worker.check_availability()) + self.assertEqual(self.worker.get_num_available_resources(), 1) + self.assertEqual(len(self.worker.list_resources()), 1) + self.assertEqual(self.worker.get_geonode_resource_type('type'), Dataset) + + harvestable_resource = HarvestableResource( + unique_identifier='1', + title='Test Resource', + harvester=Harvester( + remote_url=self.remote_url, + name=self.name, + default_owner=self.user, + harvester_type=self.harvester_type + ), + last_refreshed=datetime.datetime.now() + ) + self.assertIsNone(self.worker.get_resource( + harvestable_resource, 1)) diff --git a/geonode/harvesting/tests/harvesters/geonode.py b/geonode/harvesting/tests/harvesters/geonode.py new file mode 100644 index 00000000000..981dcc89e2c --- /dev/null +++ b/geonode/harvesting/tests/harvesters/geonode.py @@ -0,0 +1,203 @@ +######################################################################### +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +from mock import patch +from django.contrib.auth import get_user_model +from geonode.tests.base import GeoNodeBaseTestSupport +from geonode.harvesting.models import Harvester +from geonode.harvesting.harvesters.geonode import ( + GeonodeLegacyHarvester, GeoNodeResourceType) +from geonode.harvesting.harvesters.base import BriefRemoteResource + +test_resources = { + GeoNodeResourceType.DATASET: 1, + GeoNodeResourceType.DOCUMENT: 2, + GeoNodeResourceType.MAP: 3, +} + + +def geonode_get_total_records(cls, resource_type: GeoNodeResourceType): + """ + Fake _get_total_records function on GeonodeLegacyHarvester + """ + return test_resources[resource_type] + + +def geonode_list_resources_by_type( + cls, + resource_type: GeoNodeResourceType, + offset: int): + """ + Fake _list_resources_by_type function on GeonodeLegacyHarvester + """ + return [ + BriefRemoteResource( + unique_identifier='ID', + title='Title', + resource_type=resource_type.value, + ) + ] + + +class TestGeonodeHarvester(GeoNodeBaseTestSupport): + """ + Test GeonodeLegacyHarvester + """ + remote_url = 'test.com' + name = 'This is geonode harvester' + user = get_user_model().objects.get(username='AnonymousUser') + harvester_type = 'geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester' + + def setUp(self): + super().setUp() + self.worker = GeonodeLegacyHarvester( + remote_url=self.remote_url, + harvester_id=1 + ) + + def test_base_api_url(self): + """ + Test the return of base_api_url + """ + self.assertEqual(self.worker.base_api_url, f"{self.remote_url}/api") + + def test_allows_copying_resources(self): + """ + Test the return of allows_copying_resources + """ + self.assertTrue(self.worker.allows_copying_resources) + + @patch( + "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester._get_total_records", + geonode_get_total_records) + def test_get_num_available_resources_by_type(self): + """ + Test function of _get_num_available_resources_by_type + """ + worker = GeonodeLegacyHarvester( + remote_url=self.remote_url, + harvester_id=1 + ) + self.assertEqual(worker._get_num_available_resources_by_type(), test_resources) + self.assertEqual( + worker._get_total_records(GeoNodeResourceType.DATASET), + test_resources[GeoNodeResourceType.DATASET]) + self.assertEqual( + worker._get_total_records(GeoNodeResourceType.DOCUMENT), + test_resources[GeoNodeResourceType.DOCUMENT]) + self.assertEqual( + worker._get_total_records(GeoNodeResourceType.MAP), + test_resources[GeoNodeResourceType.MAP]) + + @patch( + "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester._get_total_records", + geonode_get_total_records) + def test_get_num_available_resources(self): + """ + Test function of get_num_available_resources for each of type in GeonodeLegacyHarvester + """ + params = { + 'remote_url': self.remote_url, + 'harvester_id': 1 + } + # test worker that harvest all type + worker = GeonodeLegacyHarvester(**params) + self.assertEqual(worker.get_num_available_resources(), 6) + + # test worker with skip document + worker = GeonodeLegacyHarvester( + **params, + harvest_documents=False + ) + self.assertEqual(worker.get_num_available_resources(), 4) + + # test worker with skip layer + worker = GeonodeLegacyHarvester( + **params, + harvest_datasets=False + ) + self.assertEqual(worker.get_num_available_resources(), 5) + + # test worker with skip maps + worker = GeonodeLegacyHarvester( + **params, + harvest_maps=False + ) + self.assertEqual(worker.get_num_available_resources(), 3) + + @patch( + "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester._list_resources_by_type", + geonode_list_resources_by_type) + def test_list_resources_by_type(self): + """ + Test _list_resources_by_type function for every type in GeonodeLegacyHarvester + """ + self.assertEqual( + self.worker._list_resources_by_type(GeoNodeResourceType.DATASET, 0)[0].resource_type, + GeoNodeResourceType.DATASET.value) + self.assertEqual( + self.worker._list_dataset_resources(1)[0].resource_type, + GeoNodeResourceType.DATASET.value) + + self.assertEqual( + self.worker._list_resources_by_type(GeoNodeResourceType.DOCUMENT, 0)[0].resource_type, + GeoNodeResourceType.DOCUMENT.value) + self.assertEqual( + self.worker._list_document_resources(1)[0].resource_type, + GeoNodeResourceType.DOCUMENT.value) + + self.assertEqual( + self.worker._list_resources_by_type(GeoNodeResourceType.MAP, 0)[0].resource_type, + GeoNodeResourceType.MAP.value) + self.assertEqual( + self.worker._list_map_resources(1)[0].resource_type, + GeoNodeResourceType.MAP.value) + + def test_extract_unique_identifier(self): + """ + Test _extract_unique_identifier function + """ + self.assertEqual(self.worker._extract_unique_identifier({ + 'id': 1 + }), 1) + + def test_worker_from_django_record(self): + """ + Test worker that genearted by harvester + """ + harvester = Harvester.objects.create( + remote_url=self.remote_url, + name=self.name, + default_owner=self.user, + harvester_type=self.harvester_type, + harvester_type_specific_configuration={ + 'harvest_documents': False, + 'harvest_datasets': True, + 'resource_title_filter': '' + } + ) + worker = GeonodeLegacyHarvester.from_django_record(harvester) + self.assertEqual(worker.__class__, GeonodeLegacyHarvester) + self.assertEqual(worker.remote_url, self.remote_url) + self.assertEqual(harvester.default_owner, self.user) + + self.assertFalse(worker.harvest_documents) + self.assertTrue(worker.harvest_datasets) + self.assertTrue(worker.harvest_maps) + self.assertEqual(worker.resource_title_filter, '') diff --git a/geonode/harvesting/tests/harvesters/test_harvester.py b/geonode/harvesting/tests/harvesters/test_harvester.py new file mode 100644 index 00000000000..22a45d67eb9 --- /dev/null +++ b/geonode/harvesting/tests/harvesters/test_harvester.py @@ -0,0 +1,67 @@ +############################################## +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import typing +from geonode.harvesting.harvesters.base import ( + BaseHarvesterWorker, + BriefRemoteResource, + HarvestedResourceInfo +) +from geonode.harvesting.models import Harvester, HarvestableResource +from geonode.layers.models import Dataset +from geonode.harvesting.tests.factories import brief_remote_resource_example + + +class TestHarvester(BaseHarvesterWorker): + """ + Override base harvester as test harvester + """ + + @property + def allows_copying_resources(self) -> bool: + return True + + @classmethod + def from_django_record(cls, record: Harvester): + return cls( + record.remote_url, + record.id) + + def get_num_available_resources(self) -> int: + return 1 + + def list_resources( + self, + offset: typing.Optional[int] = 0 + ) -> typing.List[BriefRemoteResource]: + return [brief_remote_resource_example] + + def check_availability(self, timeout_seconds: typing.Optional[int] = 5) -> bool: + return True + + def get_resource( + self, + harvestable_resource: "HarvestableResource", # noqa + harvesting_session_id: int + ) -> typing.Optional[HarvestedResourceInfo]: + return None + + def get_geonode_resource_type(self, remote_resource_type: str): + """Return resource type class from resource type string.""" + return Dataset diff --git a/geonode/harvesting/tests/test_admin.py b/geonode/harvesting/tests/test_admin.py new file mode 100644 index 00000000000..a1e716ef283 --- /dev/null +++ b/geonode/harvesting/tests/test_admin.py @@ -0,0 +1,73 @@ +from unittest import mock + +from django.urls import reverse +from django.contrib.auth import get_user_model +from rest_framework import status + +from geonode.tests.base import GeoNodeBaseTestSupport + +from .. import models + + +class HarvesterAdminTestCase(GeoNodeBaseTestSupport): + harvester_type = 'geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester' + + def setUp(self): + self.user = get_user_model().objects.get(username='admin') + self.client.login(username="admin", password="admin") + + self.harvester = models.Harvester.objects.create( + remote_url="http://fake1.com", + name="harvester1", + default_owner=self.user, + harvester_type=self.harvester_type + ) + + @mock.patch( + "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester.check_availability") + def test_add_harvester(self, mock_check_availability): + mock_check_availability.return_value = True + data = { + 'remote_url': "http://fake.com", + 'name': 'harvester', + 'harvester_type_specific_configuration': '{}', + 'harvester_type': self.harvester_type, + 'status': models.Harvester.STATUS_READY, + 'update_frequency': 60, + 'check_availability_frequency': 30, + 'default_owner': self.user.pk + + } + self.assertFalse(models.Harvester.objects.filter(name=data["name"]).exists()) + response = self.client.post(reverse('admin:harvesting_harvester_add'), data) + self.assertEqual(response.status_code, status.HTTP_302_FOUND) # response from admin + harvester = models.Harvester.objects.get(name=data["name"]) + self.assertEqual(harvester.name, data['name']) + self.assertEqual(harvester.remote_url, data['remote_url']) + self.assertEqual(harvester.status, models.Harvester.STATUS_READY) + self.assertEqual(harvester.remote_available, True) + + @mock.patch( + "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester.check_availability") + def test_update_harvester_availability(self, mock_check_availability): + mock_check_availability.return_value = True + data = {'action': 'update_harvester_availability', + '_selected_action': [self.harvester.pk]} + response = self.client.post(reverse('admin:harvesting_harvester_changelist'), data) + self.assertEqual(response.status_code, status.HTTP_302_FOUND) # response from admin + self.harvester.refresh_from_db() + self.assertEqual(self.harvester.remote_available, True) + + @mock.patch( + "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester.check_availability") + def test_perform_harvesting(self, mock_check_availability): + mock_check_availability.return_value = True + data = {'action': 'perform_harvesting', + '_selected_action': [self.harvester.pk]} + self.harvester.status = models.Harvester.STATUS_READY + self.harvester.save() + + response = self.client.post(reverse('admin:harvesting_harvester_changelist'), data) + self.assertEqual(response.status_code, status.HTTP_302_FOUND) # response from admin + self.harvester.refresh_from_db() + self.assertEqual(self.harvester.status, models.Harvester.STATUS_PERFORMING_HARVESTING) diff --git a/geonode/harvesting/tests/test_api_serializers.py b/geonode/harvesting/tests/test_api_serializers.py new file mode 100644 index 00000000000..ad82e5d55ce --- /dev/null +++ b/geonode/harvesting/tests/test_api_serializers.py @@ -0,0 +1,312 @@ +from unittest import mock +from urllib.parse import urlparse + +from django.contrib.auth import get_user_model +from django.utils.timezone import now +from rest_framework.exceptions import ValidationError +from rest_framework.test import ( + APIRequestFactory, +) + +from geonode.tests.base import GeoNodeBaseTestSupport + +from .. import models +from ..api import serializers + + +_REQUEST_FACTORY = APIRequestFactory() + + +class BriefHarvesterSerializerTestCase(GeoNodeBaseTestSupport): + remote_url = 'test.com' + name = 'This is geonode harvester' + user = get_user_model().objects.get(username='AnonymousUser') + harvester_type = "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester" + + @classmethod + def setUpTestData(cls): + cls.harvester = models.Harvester.objects.create( + remote_url=cls.remote_url, + name=cls.name, + default_owner=cls.user, + harvester_type=cls.harvester_type + ) + + def test_serializer_is_able_to_serialize_model_instance(self): + api_endpoint = "/api/v2/harvesters/" + request = _REQUEST_FACTORY.get(api_endpoint) + serializer = serializers.BriefHarvesterSerializer( + self.harvester, context={"request": request}) + serialized = serializer.data + self.assertEqual(serialized["remote_url"], self.harvester.remote_url) + self.assertEqual(serialized["name"], self.harvester.name) + self.assertEqual(urlparse(serialized["links"]["self"]).path, f"{api_endpoint}{self.harvester.pk}/") + self.assertIsNotNone(serialized["links"]["harvestable_resources"]) + + +class HarvesterSerializerTestCase(GeoNodeBaseTestSupport): + remote_url = 'test.com' + name = 'This is geonode harvester' + user = get_user_model().objects.get(username='AnonymousUser') + harvester_type = "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester" + + @classmethod + def setUpTestData(cls): + cls.harvester = models.Harvester.objects.create( + remote_url=cls.remote_url, + name=cls.name, + default_owner=cls.user, + harvester_type=cls.harvester_type + ) + + def test_serializer_is_able_to_serialize_model_instance(self): + api_endpoint = "/api/v2/harvesters/" + request = _REQUEST_FACTORY.get(api_endpoint) + serializer = serializers.BriefHarvesterSerializer( + self.harvester, context={"request": request}) + serialized = serializer.data + self.assertEqual(serialized["remote_url"], self.harvester.remote_url) + self.assertEqual(serialized["name"], self.harvester.name) + self.assertEqual(urlparse(serialized["links"]["self"]).path, f"{api_endpoint}{self.harvester.pk}/") + self.assertIsNotNone(serialized["links"]["harvestable_resources"]) + + @mock.patch("geonode.harvesting.api.serializers.utils") + def test_validate_also_validates_worker_specific_config(self, mock_utils): + input_data = { + "name": "phony", + "remote_url": "http://fake.com", + "user": 1, + "harvester_type": "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester", + "harvester_type_specific_configuration": {"something": "fake config"}, + } + + request = _REQUEST_FACTORY.post("/api/v2/harvesters/") + request.user = self.user + + serializer = serializers.HarvesterSerializer(data=input_data, context={"request": request}) + serializer.is_valid(raise_exception=True) + mock_utils.validate_worker_configuration.assert_called() + + @mock.patch("geonode.harvesting.api.serializers.utils") + def test_validate_does_not_allow_changing_status_and_worker_specific_config(self, mock_utils): + input_data = { + "name": "phony", + "remote_url": "http://fake.com", + "user": 1, + "harvester_type_specific_configuration": {"something": "fake config"}, + "status": "updating-harvestable-resources", + } + + request = _REQUEST_FACTORY.post("/api/v2/harvesters/") + request.user = self.user + + serializer = serializers.HarvesterSerializer(data=input_data, context={"request": request}) + with self.assertRaises(ValidationError): + serializer.is_valid(raise_exception=True) + + def test_create_does_not_allow_setting_custom_status(self): + input_data = { + "name": "phony", + "remote_url": "http://fake.com", + "user": 1, + "status": "updating-harvestable-resources", + } + + request = _REQUEST_FACTORY.post("/api/v2/harvesters/") + request.user = self.user + + serializer = serializers.HarvesterSerializer(data=input_data, context={"request": request}) + serializer.is_valid(raise_exception=True) + with self.assertRaises(ValidationError): + serializer.save() + + @mock.patch("geonode.harvesting.api.serializers.tasks") + @mock.patch("geonode.harvesting.api.serializers.utils") + def test_create_checks_availability_of_remote_and_updates_harvestable_resources(self, mock_utils, mock_tasks): + input_data = { + "name": "phony", + "remote_url": "http://fake.com", + "user": 1, + } + + request = _REQUEST_FACTORY.post("/api/v2/harvesters/") + request.user = self.user + + serializer = serializers.HarvesterSerializer(data=input_data, context={"request": request}) + serializer.is_valid(raise_exception=True) + serializer.save() + mock_utils.update_harvester_availability.assert_called() + mock_tasks.update_harvestable_resources.apply_async.assert_called() + + def test_update_errors_out_if_current_status_is_not_ready(self): + request = _REQUEST_FACTORY.patch(f"/api/v2/harvesters/{self.harvester.pk}") + self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) + serializer = serializers.HarvesterSerializer( + self.harvester, + data={"status": "updating-harvestable-resources"}, + context={"request": request}, + partial=True, + ) + serializer.is_valid(raise_exception=True) + self.harvester.status = models.Harvester.STATUS_UPDATING_HARVESTABLE_RESOURCES + self.harvester.save() + with self.assertRaises(ValidationError): + serializer.save() + + def test_update_errors_out_when_client_tries_to_set_status_ready(self): + request = _REQUEST_FACTORY.patch(f"/api/v2/harvesters/{self.harvester.pk}") + self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) + serializer = serializers.HarvesterSerializer( + self.harvester, + data={"status": models.Harvester.STATUS_READY}, + context={"request": request}, + partial=True, + ) + serializer.is_valid(raise_exception=True) + with self.assertRaises(ValidationError): + serializer.save() + + @mock.patch("geonode.harvesting.api.serializers.tasks") + def test_update_calls_update_harvestable_resources_task(self, mock_tasks): + request = _REQUEST_FACTORY.patch(f"/api/v2/harvesters/{self.harvester.pk}") + self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) + serializer = serializers.HarvesterSerializer( + self.harvester, + data={"status": models.Harvester.STATUS_UPDATING_HARVESTABLE_RESOURCES}, + context={"request": request}, + partial=True, + ) + serializer.is_valid(raise_exception=True) + serializer.save() + mock_tasks.update_harvestable_resources.signature.assert_called_with(args=(self.harvester.pk,)) + mock_tasks.update_harvestable_resources.signature.return_value.apply_async.assert_called() + + @mock.patch("geonode.harvesting.api.serializers.tasks") + def test_update_calls_harvesting_dispatcher_task(self, mock_tasks): + request = _REQUEST_FACTORY.patch(f"/api/v2/harvesters/{self.harvester.pk}") + self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) + serializer = serializers.HarvesterSerializer( + self.harvester, + data={"status": models.Harvester.STATUS_PERFORMING_HARVESTING}, + context={"request": request}, + partial=True, + ) + serializer.is_valid(raise_exception=True) + serializer.save() + mock_tasks.harvesting_dispatcher.signature.assert_called_with(args=(self.harvester.pk,)) + mock_tasks.harvesting_dispatcher.signature.return_value.apply_async.assert_called() + + @mock.patch("geonode.harvesting.api.serializers.tasks") + def test_update_calls_update_harvester_availability_task(self, mock_tasks): + request = _REQUEST_FACTORY.patch(f"/api/v2/harvesters/{self.harvester.pk}") + self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) + serializer = serializers.HarvesterSerializer( + self.harvester, + data={"status": models.Harvester.STATUS_CHECKING_AVAILABILITY}, + context={"request": request}, + partial=True, + ) + serializer.is_valid(raise_exception=True) + serializer.save() + mock_tasks.check_harvester_available.signature.assert_called_with(args=(self.harvester.pk,)) + mock_tasks.check_harvester_available.signature.return_value.apply_async.assert_called() + + @mock.patch("geonode.harvesting.api.serializers.tasks") + def test_update_updates_harvestable_resources_whenever_worker_config_changes(self, mock_tasks): + request = _REQUEST_FACTORY.patch(f"/api/v2/harvesters/{self.harvester.pk}") + self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) + self.assertEqual(len(self.harvester.harvester_type_specific_configuration), 0) + serializer = serializers.HarvesterSerializer( + self.harvester, + data={ + "harvester_type_specific_configuration": {"harvest_datasets": False} + }, + context={"request": request}, + partial=True, + ) + serializer.is_valid(raise_exception=True) + serializer.save() + mock_tasks.update_harvestable_resources.signature.assert_called_with(args=(self.harvester.pk,)) + mock_tasks.update_harvestable_resources.signature.return_value.apply_async.assert_called() + + +class BriefHarvestingSessionSerializerTestCase(GeoNodeBaseTestSupport): + + @classmethod + def setUpTestData(cls): + remote_url = 'test.com' + name = 'This is geonode harvester' + user = get_user_model().objects.get(username='AnonymousUser') + harvester_type = "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester" + cls.harvester = models.Harvester.objects.create( + remote_url=remote_url, + name=name, + default_owner=user, + harvester_type=harvester_type + ) + cls.harvesting_session = models.HarvestingSession.objects.create( + harvester=cls.harvester + ) + + def test_serializer_is_able_to_serialize_model_instance(self): + api_endpoint = "/api/v2/harvesting-sessions/" + request = _REQUEST_FACTORY.get(api_endpoint) + serializer = serializers.BriefHarvestingSessionSerializer( + self.harvesting_session, context={"request": request}) + serialized = serializer.data + self.assertIsNotNone(serialized["started"]) + + +class HarvestableResourceSerializerTestCase(GeoNodeBaseTestSupport): + unique_identifier = "some-identifier" + title = "something" + remote_resource_type = "documents" + default_should_be_harvested = False + + @classmethod + def setUpTestData(cls): + cls.harvester = models.Harvester.objects.create( + remote_url='test.com', + name='This is geonode harvester', + default_owner=get_user_model().objects.get(username='AnonymousUser'), + harvester_type="geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester" + ) + cls.harvestable_resource = models.HarvestableResource.objects.create( + unique_identifier=cls.unique_identifier, + title=cls.title, + harvester=cls.harvester, + should_be_harvested=cls.default_should_be_harvested, + remote_resource_type=cls.remote_resource_type, + last_refreshed=now() + ) + + def test_serializer_is_able_to_serialize_model_instance(self): + api_endpoint = f"/api/v2/harvesters/{self.harvester.id}/harvestable-resources/" + request = _REQUEST_FACTORY.get(api_endpoint) + serializer = serializers.HarvestableResourceSerializer( + self.harvestable_resource, context={"request": request}) + serialized = serializer.data + self.assertIsNotNone(serialized["unique_identifier"], self.unique_identifier) + self.assertIsNotNone(serialized["title"], self.title) + self.assertIsNotNone(serialized["remote_resource_type"], self.remote_resource_type) + self.assertIsNotNone(serialized["should_be_harvested"], self.default_should_be_harvested) + + def test_serializer_is_allowed_to_change_instance_should_be_harvested_attribute(self): + self.assertEqual(self.harvestable_resource.should_be_harvested, self.default_should_be_harvested) + api_endpoint = f"/api/v2/harvesters/{self.harvester.id}/harvestable-resources/" + request = _REQUEST_FACTORY.patch(api_endpoint) + serializer = serializers.HarvestableResourceSerializer( + data={ + "unique_identifier": self.unique_identifier, + "should_be_harvested": not self.default_should_be_harvested + }, + context={ + "request": request, + "harvester": self.harvester, + } + ) + serializer.is_valid(raise_exception=True) + print(f"validated_data: {serializer.validated_data}") + serializer.save() + self.harvestable_resource.refresh_from_db() + self.assertEqual(self.harvestable_resource.should_be_harvested, not self.default_should_be_harvested) diff --git a/geonode/harvesting/tests/test_api_views.py b/geonode/harvesting/tests/test_api_views.py new file mode 100644 index 00000000000..98c6ea11790 --- /dev/null +++ b/geonode/harvesting/tests/test_api_views.py @@ -0,0 +1,100 @@ +import datetime +from django.contrib.auth import get_user_model +from rest_framework import status +from geonode.tests.base import GeoNodeBaseTestSupport + +from .. import models + + +class HarvesterViewSetTestCase(GeoNodeBaseTestSupport): + user = get_user_model().objects.get(username='AnonymousUser') + harvester_type = "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester" + + @classmethod + def setUpTestData(cls): + harvester1 = models.Harvester.objects.create( + remote_url="http://fake1.com", + name="harvester1", + default_owner=cls.user, + harvester_type=cls.harvester_type + ) + harvester2 = models.Harvester.objects.create( + remote_url="http://fake2.com", + name="harvester2", + default_owner=cls.user, + harvester_type=cls.harvester_type + ) + cls.harvesters = [ + harvester1, + harvester2 + ] + + harvester_resource1 = models.HarvestableResource.objects.create( + unique_identifier="resource_1", + title="resource 1", + harvester=harvester1, + last_refreshed=datetime.datetime.now() + ) + harvester_resource2 = models.HarvestableResource.objects.create( + unique_identifier="resource_2", + title="resource 2", + harvester=harvester2, + last_refreshed=datetime.datetime.now() + ) + cls.resources = { + harvester1.id: harvester_resource1, + harvester2.id: harvester_resource2, + } + + session1 = models.HarvestingSession.objects.create( + harvester=harvester1, + total_records_found=10, + records_harvested=10 + ) + session2 = models.HarvestingSession.objects.create( + harvester=harvester2, + total_records_found=5, + records_harvested=5 + ) + cls.sessions = [session1, session2] + + def test_get_harvester_list(self): + response = self.client.get("/api/v2/harvesters/") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["total"], len(self.harvesters)) + for index, harvester in enumerate(self.harvesters): + self.assertEqual(response.data["harvesters"][index]["id"], self.harvesters[index].pk) + self.assertEqual(response.data["harvesters"][index]["name"], self.harvesters[index].name) + + def test_post_harvester_list_non_admin(self): + response = self.client.post('/api/v2/harvesters/', {}) + self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) + + def test_get_harvester_detail_for_non_admin(self): + self.client.logout() + for index, harvester in enumerate(self.harvesters): + response = self.client.get("/api/v2/harvesters/{}/".format(harvester.id)) + self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) + + def test_get_harvester_detail_for_admin(self): + self.client.login(username="admin", password="admin") + for index, harvester in enumerate(self.harvesters): + response = self.client.get("/api/v2/harvesters/{}/".format(harvester.id)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["harvester"]["id"], harvester.pk) + self.assertEqual(response.data["harvester"]["name"], harvester.name) + + def test_get_harvester_resources(self): + for index, harvester in enumerate(self.harvesters): + response = self.client.get("/api/v2/harvesters/{}/harvestable-resources/".format(harvester.id)) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["harvestable_resources"][0]["unique_identifier"], self.resources[harvester.id].unique_identifier) + self.assertEqual(response.data["harvestable_resources"][0]["title"], self.resources[harvester.id].title) + + def test_get_harvester_sessions(self): + response = self.client.get("/api/v2/harvesting-sessions/") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data["total"], len(self.sessions)) + for index, harvester in enumerate(self.sessions): + self.assertEqual(response.data["harvesting_sessions"][index]["id"], self.sessions[index].pk) + self.assertEqual(response.data["harvesting_sessions"][index]["records_harvested"], self.sessions[index].records_harvested) diff --git a/geonode/harvesting/tests/test_config.py b/geonode/harvesting/tests/test_config.py new file mode 100644 index 00000000000..ae355bbec81 --- /dev/null +++ b/geonode/harvesting/tests/test_config.py @@ -0,0 +1,51 @@ +######################################################################### +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +from django.test import override_settings + +from geonode.harvesting import config +from geonode.tests.base import GeoNodeBaseSimpleTestSupport + + +class ConfigTestCase(GeoNodeBaseSimpleTestSupport): + + @override_settings(HARVESTER_CLASSES=[]) + def test_default_config_harvester_classes(self): + self.assertEqual(config.get_setting("HARVESTER_CLASSES"), config._DEFAULT_HARVESTERS) + + def test_custom_harvester_classes(self): + phony_class_paths = [ + "fake_harvester1", + "fake_harvester2", + ] + with self.settings(HARVESTER_CLASSES=phony_class_paths): + self.assertEqual( + config.get_setting("HARVESTER_CLASSES"), config._DEFAULT_HARVESTERS + phony_class_paths) + + def test_harvester_classes_dont_repeat(self): + phony_class_paths = [ + "fake_harvester1", + "fake_harvester2", + ] + repeated_paths = [ + "fake_harvester1", + ] + with self.settings(HARVESTER_CLASSES=phony_class_paths + repeated_paths): + self.assertEqual( + config.get_setting("HARVESTER_CLASSES"), config._DEFAULT_HARVESTERS + phony_class_paths) diff --git a/geonode/harvesting/tests/test_models.py b/geonode/harvesting/tests/test_models.py new file mode 100644 index 00000000000..2e3594d6340 --- /dev/null +++ b/geonode/harvesting/tests/test_models.py @@ -0,0 +1,113 @@ +############################################## +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import datetime + +from django.contrib.auth import get_user_model +from geonode.tests.base import GeoNodeBaseTestSupport + +from .. import models + + +class HarvesterTestCase(GeoNodeBaseTestSupport): + remote_url = 'test.com' + name = 'This is geonode harvester' + user = get_user_model().objects.get(username='AnonymousUser') + harvester_type = "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester" + + def setUp(self): + super().setUp() + self.harvester = models.Harvester.objects.create( + remote_url=self.remote_url, + name=self.name, + default_owner=self.user, + harvester_type=self.harvester_type + ) + + def test_get_worker_works(self): + worker = self.harvester.get_harvester_worker() + self.assertEqual(worker.remote_url, self.remote_url) + + def test_setup_periodic_tasks(self): + self.assertIsNotNone(self.harvester.periodic_task) + self.assertIsNotNone(self.harvester.availability_check_task) + self.assertEqual(self.harvester.periodic_task.name, self.harvester.name) + self.assertEqual(self.harvester.periodic_task.interval.every, self.harvester.update_frequency) + self.assertEqual(self.harvester.availability_check_task.name, f"Check availability of {self.name}") + self.assertEqual(self.harvester.availability_check_task.interval.every, self.harvester.check_availability_frequency) + + +class HarvesterSessionTestCase(GeoNodeBaseTestSupport): + remote_url = 'test.com' + name = 'This is geonode harvester' + user = get_user_model().objects.get(username='AnonymousUser') + harvester_type = "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester" + + def setUp(self): + super().setUp() + self.harvester = models.Harvester.objects.create( + remote_url=self.remote_url, + name=self.name, + default_owner=self.user, + harvester_type=self.harvester_type + ) + self.harvesting_session = models.HarvestingSession.objects.create( + harvester=self.harvester + ) + + def test_check_attributes(self): + """ + Test attributes of harvester_session after created. + """ + self.assertIsNotNone(self.harvesting_session.pk) + self.assertEqual(self.harvesting_session.harvester, self.harvester) + self.assertEqual(self.harvesting_session.total_records_found, 0) + self.assertEqual(self.harvesting_session.records_harvested, 0) + + +class HarvestableResourceTestCase(GeoNodeBaseTestSupport): + unique_identifier = 'id' + title = 'Test' + remote_url = 'test.com' + name = 'This is geonode harvester' + user = get_user_model().objects.get(username='AnonymousUser') + harvester_type = "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester" + + def setUp(self): + super().setUp() + self.harvester = models.Harvester.objects.create( + remote_url=self.remote_url, + name=self.name, + default_owner=self.user, + harvester_type=self.harvester_type + ) + self.harvestable_resource = models.HarvestableResource.objects.create( + unique_identifier=self.unique_identifier, + title=self.title, + harvester=self.harvester, + last_refreshed=datetime.datetime.now() + ) + + def test_check_attributes(self): + self.assertIsNotNone(self.harvestable_resource.pk) + self.assertEqual(self.harvestable_resource.harvester, self.harvester) + self.assertEqual(self.harvestable_resource.title, self.title) + self.assertEqual(self.harvestable_resource.unique_identifier, self.unique_identifier) + self.assertFalse(self.harvestable_resource.should_be_harvested) + self.assertEqual(self.harvestable_resource.status, models.HarvestableResource.STATUS_READY) diff --git a/geonode/harvesting/tests/test_tasks.py b/geonode/harvesting/tests/test_tasks.py new file mode 100644 index 00000000000..8733305a4c2 --- /dev/null +++ b/geonode/harvesting/tests/test_tasks.py @@ -0,0 +1,293 @@ +############################################## +# +# Copyright (C) 2021 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### +import mock +from django.contrib.auth import get_user_model +from geonode.tests.base import ( + GeoNodeBaseTestSupport +) + +from .. import ( + models, + tasks, +) + + +class TasksTestCase(GeoNodeBaseTestSupport): + + @classmethod + def setUpTestData(cls): + cls.harvester_remote_url = "fake url" + cls.harvester_name = "harvester1" + cls.harvester_owner = get_user_model().objects.get(username="AnonymousUser") + cls.harvester_type = "geonode.harvesting.harvesters.geonode.GeonodeLegacyHarvester" + cls.harvester = models.Harvester.objects.create( + remote_url=cls.harvester_remote_url, + name=cls.harvester_name, + status=models.Harvester.STATUS_UPDATING_HARVESTABLE_RESOURCES, + default_owner=cls.harvester_owner, + harvester_type=cls.harvester_type, + ) + cls.harvesting_session = models.HarvestingSession.objects.create(harvester=cls.harvester) + + @mock.patch("geonode.harvesting.tasks.models") + @mock.patch("geonode.harvesting.tasks.utils") + @mock.patch("geonode.harvesting.tasks.chord") + def test_harvesting_dispatcher_creates_harvesting_session_when_harvester_available( + self, + mock_harvesting_chord, + mock_harvesting_utils, + mock_harvesting_models + ): + """Test that, when the remote server is available, the dispatcher proceeds to create a harvesting session + and to call the celery chord that set harvesting in motion + + """ + + mock_harvesting_models.HarvestingSession.objects.create.return_value.id.return_value = "fake_session_id" + mock_harvesting_chord.return_value.apply_async.return_value = None + mock_harvesting_utils.update_harvester_availability.return_value = True + + tasks.harvesting_dispatcher(self.harvester.id) + + mock_harvesting_models.HarvestingSession.objects.create.assert_called() + mock_harvesting_chord.assert_called() + + @mock.patch("geonode.harvesting.tasks.models") + @mock.patch("geonode.harvesting.tasks.utils") + @mock.patch("geonode.harvesting.tasks.chord") + def test_harvesting_dispatcher_does_not_create_harvesting_session_when_harvester_not_available( + self, + mock_harvesting_chord, + mock_harvesting_utils, + mock_harvesting_models + ): + """Test that, when the remote server is not available, no harvesting session is created + and no celery chord is called . + + """ + + mock_harvesting_models.HarvestingSession.objects.create.return_value.id.return_value = "fake_session_id" + mock_harvesting_chord.return_value.apply_async.return_value = None + mock_harvesting_utils.update_harvester_availability.return_value = False + + tasks.harvesting_dispatcher(self.harvester.id) + + mock_harvesting_models.HarvestingSession.objects.create.assert_not_called() + mock_harvesting_chord.assert_not_called() + + def test_harvest_resource_updates_geonode_when_remote_resource_exists(self): + """Test that `worker.get_resource()` is called by the `_harvest_resource()` task and that the related workflow is called too. + + Verify that `worker.get_resource()` is always called. Then verify that if the result of `worker.get_resource()` is + not `None`, the `worker.update_geonode_resource()` is called and `worker.update_harvesting_session()` is called too. + + """ + + harvestable_resource_id = "fake id" + with mock.patch("geonode.harvesting.tasks.models") as mock_models: + mock_worker = mock.MagicMock() + mock_worker.get_resource.return_value = "fake_gotten_resource" + mock_worker.should_copy_resource.return_value = False + mock_harvestable_resource = mock.MagicMock(models.HarvestableResource) + mock_harvestable_resource.harvester.get_harvester_worker.return_value = mock_worker + mock_models.HarvestableResource.objects.get.return_value = mock_harvestable_resource + + tasks._harvest_resource(harvestable_resource_id, self.harvesting_session.id) + + mock_models.HarvestableResource.objects.get.assert_called_with(pk=harvestable_resource_id) + mock_worker.get_resource.assert_called() + mock_worker.update_geonode_resource.assert_called() + mock_worker.update_harvesting_session.assert_called() + + def test_harvest_resource_does_not_update_geonode_when_remote_resource_does_not_exist(self): + """Test that the worker does not try to update existing GeoNode resources when the remote resource cannot be harvested.""" + + harvestable_resource_id = "fake id" + with mock.patch("geonode.harvesting.tasks.models") as mock_models: + mock_worker = mock.MagicMock() + mock_worker.get_resource.return_value = None # this means the remote resource was not harvested + mock_worker.should_copy_resource.return_value = False + mock_harvestable_resource = mock.MagicMock(models.HarvestableResource) + mock_harvestable_resource.harvester.get_harvester_worker.return_value = mock_worker + mock_models.HarvestableResource.objects.get.return_value = mock_harvestable_resource + + tasks._harvest_resource(harvestable_resource_id, self.harvesting_session.id) + + mock_models.HarvestableResource.objects.get.assert_called_with(pk=harvestable_resource_id) + mock_worker.get_resource.assert_called() + mock_worker.update_geonode_resource.assert_not_called() + + def test_finish_harvesting_updates_harvester_status(self): + tasks._finish_harvesting(self.harvester.id, self.harvesting_session.id) + self.harvester.refresh_from_db() + self.harvesting_session.refresh_from_db() + self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) + self.assertIsNotNone(self.harvesting_session.ended) + + def test_handle_harvesting_error_cleans_up_harvest_execution(self): + tasks._handle_harvesting_error(None, harvester_id=self.harvester.id, harvesting_session_id=self.harvesting_session.id) + self.harvester.refresh_from_db() + self.harvesting_session.refresh_from_db() + self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) + self.assertIsNotNone(self.harvesting_session.ended) + + @mock.patch("geonode.harvesting.tasks.utils") + def test_check_harvester_available(self, mock_harvesting_utils): + tasks.check_harvester_available(self.harvester.id) + mock_harvesting_utils.update_harvester_availability.assert_called_with(self.harvester) + + @mock.patch("geonode.harvesting.tasks._handle_harvestable_resources_update_error") + @mock.patch("geonode.harvesting.tasks._finish_harvestable_resources_update") + @mock.patch("geonode.harvesting.tasks._update_harvestable_resources_batch") + @mock.patch("geonode.harvesting.tasks.chord") + @mock.patch("geonode.harvesting.tasks.models") + def test_update_harvestable_resources_sends_batched_requests(self, mock_models, mock_chord, mock_batch, mock_finalizer, mock_error_handler): + """Verify that the `update_harvestable_resources` task creates a celery chord with the batched task, a finalizer and an error handler.""" + mock_worker = mock.MagicMock() + mock_worker.get_num_available_resources.return_value = 1 + mock_harvester = mock.MagicMock(models.Harvester) + mock_models.Harvester.objects.get.return_value = mock_harvester + mock_harvester.get_harvester_worker.return_value = mock_worker + + tasks.update_harvestable_resources("fake harvester id") + + mock_batch.signature.assert_called() + mock_finalizer.signature.assert_called() + mock_error_handler.signature.assert_called() + mock_chord.assert_called() + mock_chord.return_value.apply_async.assert_called() + + def test_update_harvestable_resources_batch(self): + pass + + +# class TestTaskHarvester(GeoNodeBaseTestSupport): +# """ +# Tests for the harvester model. +# """ +# remote_url = 'test.com' +# name = 'This is geonode harvester' +# user = get_user_model().objects.get(username='AnonymousUser') +# harvester_type = 'geonode.harvesting.tests.harvesters.test_harvester.TestHarvester' +# +# def setUp(self): +# super().setUp() +# self.harvester = models.Harvester.objects.create( +# remote_url=self.remote_url, +# name=self.name, +# default_owner=self.user, +# harvester_type=self.harvester_type +# ) +# +# def test_harvesting_dispatcher(self): +# """ +# Call harvesting_dispatcher create sessions +# """ +# tasks.harvesting_dispatcher(self.harvester.id) +# self.assertIsNotNone(self.harvester.harvesting_sessions.first()) +# +# def test_harvest_resource_failed(self): +# """ +# Call _harvest_resource when the resource is not found +# """ +# harvestable_resource = models.HarvestableResource.objects.create( +# unique_identifier='id', +# title='Test', +# harvester=self.harvester, +# last_refreshed=datetime.datetime.now() +# ) +# harvesting_session = models.HarvestingSession.objects.create( +# harvester=self.harvester +# ) +# tasks._harvest_resource(harvestable_resource.id, harvesting_session.id) +# harvestable_resource.refresh_from_db() +# self.assertFalse(harvestable_resource.last_harvesting_succeeded) +# self.assertTrue('Harvesting failed' in harvestable_resource.last_harvesting_message) +# +# def test_harvest_resource_success(self): +# """ +# Call _harvest_resource when the resource is found +# """ +# with mock.patch.object(TestHarvester, 'get_resource', return_value=resource_info_example): +# harvestable_resource = models.HarvestableResource.objects.create( +# unique_identifier='id', +# title='Test', +# harvester=self.harvester, +# last_refreshed=datetime.datetime.now() +# ) +# harvesting_session = models.HarvestingSession.objects.create( +# harvester=self.harvester +# ) +# tasks._harvest_resource(harvestable_resource.id, harvesting_session.id) +# harvestable_resource.refresh_from_db() +# self.assertTrue(harvestable_resource.last_harvesting_succeeded) +# self.assertIsNotNone(harvestable_resource.geonode_resource) +# +# def test_finish_harvesting(self): +# """ +# Call _finish_harvesting make status ready +# """ +# self.harvester.status = models.Harvester.STATUS_CHECKING_AVAILABILITY +# self.harvester.save() +# self.assertEqual(self.harvester.status, models.Harvester.STATUS_CHECKING_AVAILABILITY) +# +# harvesting_session = models.HarvestingSession.objects.create( +# harvester=self.harvester +# ) +# tasks._finish_harvesting(self.harvester.id, harvesting_session.id) +# self.harvester.refresh_from_db() +# self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) +# +# def test_check_harvester_available(self): +# """ +# Call check_harvester_available +# """ +# tasks.check_harvester_available(self.harvester.id) +# self.harvester.refresh_from_db() +# self.assertEqual(self.harvester.status, models.Harvester.STATUS_READY) +# self.assertIsNotNone(self.harvester.last_checked_availability) +# self.assertTrue(self.harvester.remote_available) +# +# def test_update_harvestable_resources(self): +# """ +# Call update_harvestable_resources +# """ +# tasks.update_harvestable_resources(self.harvester.id) +# self.harvester.refresh_from_db() +# self.assertEqual(self.harvester.status, models.Harvester.STATUS_UPDATING_HARVESTABLE_RESOURCES) +# +# def test_update_harvestable_resources_batch(self): +# """ +# Call _update_harvestable_resources_batch +# """ +# tasks._update_harvestable_resources_batch(self.harvester.id, 0, 1) +# self.harvester.refresh_from_db() +# self.assertEqual( +# self.harvester.harvestable_resources.count(), 1) +# +# def test_finish_harvestable_resources_update(self): +# """ +# Call _finish_harvestable_resources_update +# """ +# tasks._finish_harvestable_resources_update(self.harvester.id) +# self.harvester.refresh_from_db() +# self.assertIsNotNone(self.harvester.last_checked_harvestable_resources) +# self.assertTrue( +# 'Harvestable resources successfully checked' in self.harvester.last_check_harvestable_resources_message +# ) diff --git a/geonode/harvesting/tests/test_utils.py b/geonode/harvesting/tests/test_utils.py new file mode 100644 index 00000000000..51d578adcc1 --- /dev/null +++ b/geonode/harvesting/tests/test_utils.py @@ -0,0 +1,24 @@ +from unittest import mock + +from django.test import SimpleTestCase + +from .. import utils + + +class UtilsTestCase(SimpleTestCase): + + @mock.patch("geonode.harvesting.utils.jsonschema") + @mock.patch("geonode.harvesting.utils.import_string") + def test_validate_worker_configuration(self, mock_import_string, mock_jsonschema): + extra_config_schema = "fake_config_schema" + mock_worker_class = mock.MagicMock() + mock_worker_class.get_extra_config_schema.return_value = extra_config_schema + mock_import_string.return_value = mock_worker_class + + harvester_type = "fake_harvester_type" + configuration = "fake_configuration" + utils.validate_worker_configuration(harvester_type, configuration) + + mock_import_string.assert_called_with(harvester_type) + mock_worker_class.get_extra_config_schema.assert_called() + mock_jsonschema.validate.assert_called_with(configuration, extra_config_schema) diff --git a/geonode/harvesting/utils.py b/geonode/harvesting/utils.py index 3028be0ffed..da1890e2947 100644 --- a/geonode/harvesting/utils.py +++ b/geonode/harvesting/utils.py @@ -18,12 +18,11 @@ ######################################################################### import typing -import jsonschema - -from lxml import etree from django.utils.timezone import now from django.utils.module_loading import import_string +import jsonschema +from lxml import etree # explicitly disable resolving XML entities in order to prevent malicious attacks diff --git a/geonode/templates/admin/base_site.html b/geonode/templates/admin/base_site.html index fbe03040986..f443a2b390e 100644 --- a/geonode/templates/admin/base_site.html +++ b/geonode/templates/admin/base_site.html @@ -15,7 +15,7 @@

{{ site_header|default:_('G position: relative; float: left; clear: both; - padding: 100px 20px 120px; + padding: 120px 20px 120px; width: 100%; -moz-box-sizing: border-box; -webkit-box-sizing: border-box;