Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Displaying a way to break the chain_db when mongo-only collections are present #799

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ v0.3.0

* ``zip`` and ``state`` only apply to ``USA`` institutions
* added group item in people schema
* ``KeyError`` for ``ChainDB`` now prints the offending key
* ``KeyError`` for ``ChainDocument`` now prints the offending key
None

* preslist now includes end-dates when meeting is longer than one day
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorials/broker.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ directory. Do not paste them into the _build folder, but rather the base directo

plt.plot(range(10), range(10))
plt.savefig('hello_world.png')
doc = db['projects']['regro']
doc = db['test_db']['projects']['regro']
db.add_file(doc, 'hw_file', 'hello_world.png')

This will:
Expand All @@ -68,7 +68,7 @@ Importantly, this will only retrieve the path to the file.
from regolith.broker import Broker

db = Broker.from_rc()
doc = db['projects']['regro']
doc = db['test_db']['projects']['regro']
path = db.get_file_path(doc, 'hw_file')

This can be used inside tex documents via the ``FigureBuilder`` class/CLI. In order to do so,
Expand Down
9 changes: 7 additions & 2 deletions regolith/broker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from regolith.database import dump_database, open_dbs
from regolith.runcontrol import DEFAULT_RC, load_rcfile, filter_databases
from regolith.storage import store_client, push
from regolith.mongoclient import load_mongo_col


def load_db(rc_file="regolithrc.json"):
Expand All @@ -23,7 +24,7 @@ class Broker:
>>> # Load the db
>>> db = Broker.from_rc()
>>> # Get a docment from the broker
>>> ergs =db['group']['ergs']
>>> ergs =db['test_db']['group']['ergs']
>>> # Store a file
>>> db.add_file(ergs, 'myfile', '/path/to/file/hello.txt')
>>> # Get a file from the store
Expand All @@ -36,6 +37,10 @@ def __init__(self, rc=DEFAULT_RC):
with store_client(rc) as sclient:
self.store = sclient
rc.client = open_dbs(rc)
for name, dbs in rc.client.dbs.items():
for coll in dbs:
if not isinstance(dbs[coll], dict):
dbs[coll] = load_mongo_col(dbs[coll])
self._dbs = rc.client.dbs
self.md = rc.client.chained_db
self.db_client = rc.client
Expand Down Expand Up @@ -88,4 +93,4 @@ def get_file_path(self, document, name):
return None

def __getitem__(self, item):
return self.md[item]
return self._dbs[item]
3 changes: 1 addition & 2 deletions regolith/builders/activitylogbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from regolith.builders.basebuilder import LatexBuilderBase
from regolith.builders.cpbuilder import is_pending
from regolith.fsclient import _id_key
from regolith.dates import month_to_int, is_current, get_dates
from regolith.sorters import position_key, doc_date_key
from regolith.stylers import sentencecase, month_fullnames
Expand All @@ -25,7 +24,7 @@
awards,
filter_patents,
filter_licenses,
get_id_from_name, merge_collections_all, filter_committees)
get_id_from_name, merge_collections_all, filter_committees, _id_key)


class ActivitylogBuilder(LatexBuilderBase):
Expand Down
6 changes: 4 additions & 2 deletions regolith/builders/appraisalbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from regolith.builders.basebuilder import LatexBuilderBase
from regolith.builders.cpbuilder import is_pending, CPBuilder
from regolith.fsclient import _id_key
from regolith.dates import month_to_int, is_current, get_dates
from regolith.sorters import position_key, doc_date_key
from regolith.stylers import sentencecase, month_fullnames
Expand All @@ -24,7 +23,10 @@
filter_patents,
filter_licenses,
merge_collections_superior,
get_id_from_name, merge_collections_all)
get_id_from_name,
merge_collections_all,
_id_key
)


class AppraisalBuilder(LatexBuilderBase):
Expand Down
2 changes: 1 addition & 1 deletion regolith/builders/cpbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

from regolith.builders.basebuilder import LatexBuilderBase
from regolith.dates import is_current, get_dates
from regolith.fsclient import _id_key
from regolith.sorters import position_key
from regolith.tools import (
all_docs_from_collection,
filter_grants,
fuzzy_retrieval,
merge_collections_all,
_id_key
)


Expand Down
2 changes: 1 addition & 1 deletion regolith/builders/cvbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from datetime import date

from regolith.builders.basebuilder import LatexBuilderBase
from regolith.fsclient import _id_key
from regolith.sorters import ene_date_key, position_key
from regolith.stylers import sentencecase, month_fullnames
from regolith.tools import (
Expand All @@ -17,6 +16,7 @@
dereference_institution,
merge_collections_superior,
filter_presentations, remove_duplicate_docs,
_id_key
)


Expand Down
2 changes: 1 addition & 1 deletion regolith/builders/htmlbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from regolith.builders.basebuilder import BuilderBase
from regolith.dates import get_dates
from regolith.fsclient import _id_key
from regolith.sorters import ene_date_key, position_key
from regolith.tools import (
all_docs_from_collection,
Expand All @@ -13,6 +12,7 @@
make_bibtex_file,
document_by_value,
dereference_institution,
_id_key
)


Expand Down
4 changes: 2 additions & 2 deletions regolith/builders/internalhtmlbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from regolith.builders.basebuilder import BuilderBase
from regolith.dates import get_dates
from regolith.fsclient import _id_key
from regolith.sorters import position_key, ene_date_key
from regolith.tools import (
all_docs_from_collection,
Expand All @@ -15,7 +14,8 @@
make_bibtex_file,
document_by_value,
dereference_institution,
fuzzy_retrieval
fuzzy_retrieval,
_id_key
)


Expand Down
2 changes: 1 addition & 1 deletion regolith/builders/manuscriptreviewbuilder.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Builder for Current and Pending Reports."""

from regolith.builders.basebuilder import LatexBuilderBase
from regolith.fsclient import _id_key
from regolith.tools import (
all_docs_from_collection,
_id_key
)


Expand Down
2 changes: 1 addition & 1 deletion regolith/builders/postdocadbuilder.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Builder for Current and Pending Reports."""

from regolith.builders.basebuilder import LatexBuilderBase
from regolith.fsclient import _id_key
from regolith.tools import (
all_docs_from_collection,
_id_key
)


Expand Down
6 changes: 4 additions & 2 deletions regolith/builders/preslistbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,16 @@
from copy import deepcopy

from regolith.builders.basebuilder import LatexBuilderBase
from regolith.fsclient import _id_key
from regolith.sorters import position_key
from regolith.tools import (
all_docs_from_collection,
fuzzy_retrieval,
get_person_contact,
number_suffix,
group_member_ids, latex_safe, filter_presentations
group_member_ids,
latex_safe,
filter_presentations,
_id_key
)
from regolith.stylers import sentencecase, month_fullnames
from regolith.dates import get_dates
Expand Down
5 changes: 3 additions & 2 deletions regolith/builders/proposalreviewbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
from nameparser import HumanName

from regolith.builders.basebuilder import LatexBuilderBase
from regolith.fsclient import _id_key
from regolith.tools import (
all_docs_from_collection,
filter_grants,
fuzzy_retrieval, dereference_institution,
fuzzy_retrieval,
dereference_institution,
_id_key
)


Expand Down
2 changes: 1 addition & 1 deletion regolith/builders/readinglistsbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from habanero import Crossref

from regolith.builders.basebuilder import LatexBuilderBase
from regolith.fsclient import _id_key
from regolith.sorters import position_key
from regolith.tools import (
all_docs_from_collection,
get_formatted_crossref_reference,
_id_key
)

class ReadingListsBuilder(LatexBuilderBase):
Expand Down
101 changes: 94 additions & 7 deletions regolith/chained_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@

from collections import ChainMap
from collections.abc import MutableMapping
from copy import deepcopy

from pymongo.collection import Collection as MongoCollection

from regolith.mongoclient import load_mongo_col


class ChainDBSingleton(object):
Expand All @@ -24,8 +29,91 @@ def __new__(cls):
Singleton = ChainDBSingleton()


class ChainDB(ChainMap):
""" A ChainMap who's ``_getitem__`` returns either a ChainDB or
class ChainCollection:
"""
The chained database has been used as a reference up until the transition to remote mongo, rather than an edit-able
object. When using only remote mongo databases, the chained database will become a fully functional chained mongo
collection rather than a reference of what all of the databases look like mashed together. Checking to see if the
object's fs_map dictionary is empty is a good indicator of whether or not mongo methods can/should be used directly.

e.g.
if rc.client.chained_db[collection_name].fs_map == {}:
rc.client.chained_db[collection_name].find_one_and_update({"keyName": "Value"},{"$set": {UpdateDict}})
"""

def __init__(self, *maps):
'''
Initialize a ChainCollection by setting *maps* to the given mappings.
'''

self.mongo_maps = []
self.fs_map = {}

map_list = list(maps)

if map_list and all([isinstance(map, MongoCollection) for map in map_list]):
self.mongo_maps = map_list
elif len(map_list) == 1 and isinstance(map_list[0], dict):
# There is only ever one collection for the filesystem,
# as it is chained at the document level, not collection
self.fs_map = deepcopy(map_list[0])

def __iter__(self):
# load all docs from each mongo map, create list of dicts, chainmap them together, get iter of the chainmap
mongo_chain = ChainMap(*reversed([load_mongo_col(collection) for collection in self.mongo_maps]))
fs_chain = self.fs_map
return iter(ChainMap(*[mongo_chain, fs_chain]))

def __getitem__(self, doc_id):
chained_mongo_docs = ChainMap(*reversed([collection.find_one({"_id": doc_id}) for collection in self.mongo_maps]))
chained_fs_docs = self.fs_map.get(doc_id, {})
return ChainMap(*[chained_mongo_docs, chained_fs_docs])

def __setitem__(self, doc_id, document):
if isinstance(document, ChainDocument):
self.fs_map[doc_id] = deepcopy(document)
elif self.mongo_maps:
# reached if mongo maps is not empty and the document is not chained
for db_coll in self.mongo_maps:
db_coll.find_one_and_update({"_id": doc_id}, {"$set": document})

def __getattr__(self, method):
if hasattr(MongoCollection, method):
results = []
for db_coll in self.mongo_maps:
results.append(getattr(db_coll, method))

# This is a closure that forces the evaluation of the mongo method on every collection w/ the same name.
def multi_call(args):
mongo_results = []
for result in results:
mongo_results.extend(result(args))
return mongo_results

return multi_call
else:
raise AttributeError

def keys(self):
if self.fs_map:
key_list = self.mongo_maps + [self.fs_map]
else:
key_list = self.mongo_maps
return ChainMap(*reversed(key_list)).keys()

def values(self):
if self.fs_map:
full_coll = [load_mongo_col(collection) for collection in self.mongo_maps] + [self.fs_map]
else:
full_coll = [load_mongo_col(collection) for collection in self.mongo_maps]
return ChainMap(*reversed(full_coll)).values()

def items(self):
return zip(self.keys(), self.values())


class ChainDocument(ChainMap):
""" A ChainMap who's ``_getitem__`` returns either a ChainDocument or
the result"""

def __getitem__(self, key):
Expand All @@ -34,11 +122,11 @@ def __getitem__(self, key):
# Try to get all the data from all the mappings
for mapping in self.maps:
results.append(mapping.get(key, Singleton))
# if all the results are mapping create a ChainDB
# if all the results are mapping create a ChainDocument
if all([isinstance(result, MutableMapping) for result in results]):
for result in results:
if res is None:
res = ChainDB(result)
res = ChainDocument(result)
else:
res.maps.append(result)
elif all([isinstance(result, list) for result in results]):
Expand All @@ -62,16 +150,15 @@ def __setitem__(self, key, value):
if key not in self:
super().__setitem__(key, value)
else:
res = None
results = []
# Try to get all the data from all the mappings
for mapping in reversed(self.maps):
if key in mapping:
mapping[key] = value



def _convert_to_dict(cm):
if isinstance(cm, (ChainMap, ChainDB)):
if isinstance(cm, (ChainMap, ChainDocument)):
r = {}
for k, v in cm.items():
r[k] = _convert_to_dict(v)
Expand Down
13 changes: 9 additions & 4 deletions regolith/client_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections import defaultdict

from regolith.fsclient import FileSystemClient
from regolith.mongoclient import MongoClient
from regolith.mongoclient import MongoClient, load_mongo_col


CLIENTS = {
Expand Down Expand Up @@ -99,9 +99,14 @@ def collection_names(self, dbname, include_system_collections=True):

def all_documents(self, collname, copy=True):
"""Returns an iteratable over all documents in a collection."""
if copy:
return deepcopy(self.chained_db.get(collname, {})).values()
return self.chained_db.get(collname, {}).values()
if isinstance(self.chained_db.get(collname, {}), dict):
if copy:
return deepcopy(self.chained_db.get(collname, {})).values()
return self.chained_db.get(collname, {}).values()
else:
# assume we've got a mongo collection
mongo_col = self.chained_db.get(collname, {}).values()
return mongo_col

def insert_one(self, dbname, collname, doc):
"""Inserts one document to a database/collection."""
Expand Down
Loading