Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provider list fallback and list of providers in both servers' /links-endpoints #455

Merged
merged 7 commits into from
Aug 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/update_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ echo "\n-o- Commit update - API Reference -o-"
git add docs/api_reference
git commit -m "Release ${GITHUB_REF#refs/tags/} - API Reference"

echo "-o- Update version -o-"
echo "\n-o- Update version -o-"
invoke setver -v ${GITHUB_REF#refs/tags/}

echo "\n-o- Generate changelog -o-"
Expand Down
22 changes: 14 additions & 8 deletions optimade/server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
structures,
versions,
)
from optimade.server.routers.utils import get_providers, BASE_URL_PREFIXES
from optimade.server.routers.utils import BASE_URL_PREFIXES


if CONFIG.config_file is None:
Expand Down Expand Up @@ -68,21 +68,27 @@

if not CONFIG.use_real_mongo:
import bson.json_util
from bson.objectid import ObjectId
import optimade.server.data as data
from .routers import ENTRY_COLLECTIONS
from optimade.server.routers import ENTRY_COLLECTIONS
from optimade.server.routers.utils import get_providers

def load_entries(endpoint_name: str, endpoint_collection: MongoCollection):
LOGGER.debug(f"Loading test {endpoint_name}...")
LOGGER.debug("Loading test %s...", endpoint_name)

endpoint_collection.collection.insert_many(getattr(data, endpoint_name, []))
if endpoint_name == "links":
LOGGER.debug(
"Adding Materials-Consortia providers to links from optimade.org"
" Adding Materials-Consortia providers to links from optimade.org"
)
endpoint_collection.collection.insert_many(
bson.json_util.loads(bson.json_util.dumps(get_providers()))
)
LOGGER.debug(f"Done inserting test {endpoint_name}...")
providers = get_providers()
for doc in providers:
endpoint_collection.collection.replace_one(
filter={"_id": ObjectId(doc["_id"]["$oid"])},
replacement=bson.json_util.loads(bson.json_util.dumps(doc)),
upsert=True,
)
LOGGER.debug("Done inserting test %s!", endpoint_name)

for name, collection in ENTRY_COLLECTIONS.items():
load_entries(name, collection)
Expand Down
32 changes: 22 additions & 10 deletions optimade/server/main_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,36 @@

if not CONFIG.use_real_mongo and CONFIG.index_links_path.exists():
import bson.json_util
from .routers.links import links_coll
from .routers.utils import mongo_id_for_database
from bson.objectid import ObjectId
from optimade.server.routers.links import links_coll
from optimade.server.routers.utils import mongo_id_for_database, get_providers

LOGGER.debug("Loading index links...")
with open(CONFIG.index_links_path) as f:
data = json.load(f)

processed = []
processed = []
for db in data:
db["_id"] = {"$oid": mongo_id_for_database(db["id"], db["type"])}
processed.append(db)

for db in data:
db["_id"] = {"$oid": mongo_id_for_database(db["id"], db["type"])}
processed.append(db)
LOGGER.debug(
" Inserting index links into collection from %s...", CONFIG.index_links_path
)
links_coll.collection.insert_many(
bson.json_util.loads(bson.json_util.dumps(processed))
)

LOGGER.debug("Inserting index links into collection...")
links_coll.collection.insert_many(
bson.json_util.loads(bson.json_util.dumps(processed))
LOGGER.debug(" Adding Materials-Consortia providers to links from optimade.org...")
providers = get_providers()
for doc in providers:
links_coll.collection.replace_one(
filter={"_id": ObjectId(doc["_id"]["$oid"])},
replacement=bson.json_util.loads(bson.json_util.dumps(doc)),
upsert=True,
)
LOGGER.debug("Done inserting index links...")

LOGGER.debug("Done inserting index links!")


# Add various middleware
Expand Down
58 changes: 44 additions & 14 deletions optimade/server/routers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,40 +312,70 @@ def mongo_id_for_database(database_id: str, database_type: str) -> str:
return str(ObjectId(oid.encode("UTF-8")))


def get_providers():
"""Retrieve Materials-Consortia providers (from https://providers.optimade.org/v1/links)"""
def get_providers() -> list:
"""Retrieve Materials-Consortia providers (from https://providers.optimade.org/v1/links).

Fallback order if providers.optimade.org is not available:

1. Try Materials-Consortia/providers on GitHub.
2. Try submodule `providers`' list of providers.
3. Log warning that providers list from Materials-Consortia is not included in the
`/links`-endpoint.

Returns:
List of raw JSON-decoded providers including MongoDB object IDs.

"""
import requests

try:
from optimade.server.data import providers
import simplejson as json
except ImportError:
providers = None
import json

if providers is None:
try:
import simplejson as json
except ImportError:
import json
provider_list_urls = [
"https://providers.optimade.org/v1/links",
"https://raw.githubusercontent.com/Materials-Consortia/providers"
"/master/src/links/v1/providers.json",
]

for provider_list_url in provider_list_urls:
try:
providers = requests.get("https://providers.optimade.org/v1/links").json()
providers = requests.get(provider_list_url).json()
except (
requests.exceptions.ConnectionError,
requests.exceptions.ConnectTimeout,
json.JSONDecodeError,
):
raise BadRequest(
status_code=500,
detail="Could not retrieve providers list from https://providers.optimade.org",
pass
else:
break
else:
try:
from optimade.server.data import providers
except ImportError:
from optimade.server.logger import LOGGER

LOGGER.warning(
"""Could not retrieve a list of providers!

Tried the following resources:

{}
The list of providers will not be included in the `/links`-endpoint.
""".format(
"".join([f" * {_}\n" for _ in provider_list_urls])
)
)
return []

providers_list = []
for provider in providers.get("data", []):
# Remove/skip "exmpl"
if provider["id"] == "exmpl":
continue

provider.update(provider.pop("attributes"))
provider.update(provider.pop("attributes", {}))

# Add MongoDB ObjectId
provider["_id"] = {
Expand Down
119 changes: 119 additions & 0 deletions tests/server/routers/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""Tests specifically for optimade.servers.routers.utils."""
from typing import Mapping, Optional, Tuple, Union
from unittest import mock

from requests.exceptions import ConnectionError

import pytest


def mocked_providers_list_response(
url: Union[str, bytes] = "",
param: Optional[Union[Mapping[str, str], Tuple[str, str]]] = None,
**kwargs,
):
"""This function will be used to mock requests.get

It will _always_ return a successful response, returning the submodule's provider.json.

NOTE: This function is loosely inspired by the stackoverflow response here:
https://stackoverflow.com/questions/15753390/how-can-i-mock-requests-and-the-response
"""
try:
from optimade.server.data import providers
except ImportError:
pytest.fail(
"Cannot import providers from optimade.server.data, "
"please initialize the `providers` submodule!"
)

class MockResponse:
def __init__(self, data: Union[list, dict], status_code: int):
self.data = data
self.status_code = status_code

def json(self) -> Union[list, dict]:
return self.data

return MockResponse(providers, 200)


def test_get_providers():
"""Make sure valid responses are handled as expected."""
try:
from optimade.server.data import providers
except ImportError:
pytest.fail(
"Cannot import providers from optimade.server.data, "
"please initialize the `providers` submodule!"
)

from optimade.server.routers.utils import get_providers, mongo_id_for_database

side_effects = [
mocked_providers_list_response,
ConnectionError,
]

for side_effect in side_effects:
with mock.patch("requests.get", side_effect=side_effect):
providers_list = [
_ for _ in providers.get("data", []) if _["id"] != "exmpl"
]
for provider in providers_list:
provider.update(
{
"_id": {
"$oid": mongo_id_for_database(
provider["id"], provider["type"]
)
}
}
)
assert get_providers() == providers_list


def test_get_providers_warning(caplog, top_dir):
"""Make sure a warning is logged as a last resort."""
import copy
from optimade.server.routers.utils import get_providers

provider_list_urls = [
"https://providers.optimade.org/v1/links",
"https://raw.githubusercontent.com/Materials-Consortia/providers"
"/master/src/links/v1/providers.json",
]

providers_cache = False
try:
from optimade.server import data

if getattr(data, "providers", None) is not None:
providers_cache = copy.deepcopy(data.providers)

caplog.clear()
with mock.patch("requests.get", side_effect=ConnectionError):
del data.providers # pylint: disable=no-member
assert get_providers() == []

warning_message = """Could not retrieve a list of providers!

Tried the following resources:

{}
The list of providers will not be included in the `/links`-endpoint.
""".format(
"".join([f" * {_}\n" for _ in provider_list_urls])
)
assert warning_message in caplog.messages

finally:
if providers_cache:
from optimade.server import data

data.providers = providers_cache

# Trying to import providers to make sure it's there again now
from optimade.server.data import providers

assert providers == providers_cache