Skip to content

Commit

Permalink
minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Binh Vu committed Jul 24, 2024
1 parent 5c88b52 commit 69904e2
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 51 deletions.
11 changes: 5 additions & 6 deletions kgdata/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import os
from pathlib import Path

from loguru import logger

from kgdata.dbpedia.config import DBpediaDirCfg
from kgdata.wikidata.config import WikidataDirCfg
from kgdata.wikipedia.config import WikipediaDirCfg
from loguru import logger

DEFAULT_DATA_DIR = Path(os.path.abspath(__file__)).parent.parent.parent / "data"

Expand All @@ -24,14 +23,14 @@ def init_dbdir_from_env():

if log_config == "1":
logger.info("Wikidata directory: {}", os.environ[WD_DIR_NAME])
WikidataDirCfg.init(os.environ[WD_DIR_NAME])
WikidataDirCfg.init(os.environ[WD_DIR_NAME], verbose=False)

if DBP_DIR_NAME not in os.environ:
raise KeyError(f"Need the env variable {DBP_DIR_NAME} to set DBpedia directory")

if log_config == "1":
logger.info("DBpedia directory: {}", os.environ[DBP_DIR_NAME])
DBpediaDirCfg.init(os.environ[DBP_DIR_NAME])
DBpediaDirCfg.init(os.environ[DBP_DIR_NAME], verbose=False)

if WP_DIR_NAME not in os.environ:
raise KeyError(
Expand All @@ -40,8 +39,8 @@ def init_dbdir_from_env():

if log_config == "1":
logger.info("Wikipedia directory: {}", os.environ[WP_DIR_NAME])
WikipediaDirCfg.init(os.environ[WP_DIR_NAME])
WikipediaDirCfg.init(os.environ[WP_DIR_NAME], verbose=False)


if __name__ == "__main__":
init_dbdir_from_env()
init_dbdir_from_env()
17 changes: 11 additions & 6 deletions kgdata/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
Mapping,
Optional,
TypeVar,
Union,
)

import orjson
Expand All @@ -30,15 +31,15 @@
rocksdb_load,
)
from hugedict.types import HugeMutableMapping
from loguru import logger
from sm.namespaces.namespace import KnowledgeGraphNamespace

from kgdata.models.entity import Entity, EntityMetadata
from kgdata.models.ont_class import OntologyClass
from kgdata.models.ont_property import OntologyProperty, get_default_props
from loguru import logger
from sm.namespaces.namespace import KnowledgeGraphNamespace

if TYPE_CHECKING:
from hugedict.core.rocksdb import FileFormat
from kgdata.dataset import Dataset

T = TypeVar("T")

Expand Down Expand Up @@ -292,7 +293,7 @@ def __iter__(self) -> Iterator[str]:


def build_database(
dataset: str,
dataset: Union[str, Dataset],
get_db: Callable[[], Any],
compact: bool,
format: Optional[FileFormat] = None,
Expand Down Expand Up @@ -329,8 +330,12 @@ def db_options():
if lang is not None:
ds_kwargs["lang"] = lang

module, func = dataset.rsplit(".", 1)
ds = getattr(import_module(module), func)(**ds_kwargs)
if isinstance(dataset, str):
module, func = dataset.rsplit(".", 1)
ds = getattr(import_module(module), func)(**ds_kwargs)
else:
ds = dataset

assert isinstance(ds, Dataset)
db_sig_file = Path(dbpath) / "_SIGNATURE"
if db_sig_file.exists():
Expand Down
6 changes: 5 additions & 1 deletion kgdata/dbpedia/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from pathlib import Path
from typing import Union

from loguru import logger


class DBpediaDirCfg:
"""Locations of DBpedia dumps and datasets on disk."""
Expand All @@ -19,8 +21,10 @@ def get_instance():
return DBpediaDirCfg.instance

@staticmethod
def init(datadir: Union[str, Path]):
def init(datadir: Union[str, Path], verbose: bool = True):
"""Initialize or update the config object to use the given directory"""
if verbose:
logger.info("DBpedia directory: {}", datadir)
DBpediaDirCfg.instance = DBpediaDirCfg(Path(datadir))
return DBpediaDirCfg.instance

Expand Down
6 changes: 5 additions & 1 deletion kgdata/wikidata/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from pathlib import Path
from typing import Union

from loguru import logger


class WikidataDirCfg:
"""Locations of Wikidata dumps and datasets on disk"""
Expand Down Expand Up @@ -127,7 +129,9 @@ def get_instance():
return WikidataDirCfg.instance

@staticmethod
def init(datadir: Union[str, Path]):
def init(datadir: Union[str, Path], verbose: bool = True):
"""Initialize or update the config object to use the given directory"""
if verbose:
logger.info("Wikidata directory: {}", datadir)
WikidataDirCfg.instance = WikidataDirCfg(Path(datadir))
return WikidataDirCfg.instance
1 change: 0 additions & 1 deletion kgdata/wikidata/datasets/entity_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Union

import orjson

from kgdata.dataset import Dataset
from kgdata.wikidata.config import WikidataDirCfg
from kgdata.wikidata.datasets.entities import entities
Expand Down
6 changes: 5 additions & 1 deletion kgdata/wikipedia/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from pathlib import Path
from typing import Union

from loguru import logger


class WikipediaConfig:
# url of the wikipedia server, not trailing slash
Expand Down Expand Up @@ -61,7 +63,9 @@ def get_instance():
return WikipediaDirCfg.instance

@staticmethod
def init(datadir: Union[str, Path]):
def init(datadir: Union[str, Path], verbose: bool = True):
"""Initialize or update the config object to use the given directory"""
if verbose:
logger.info("Wikipedia directory: {}", datadir)
WikipediaDirCfg.instance = WikipediaDirCfg(Path(datadir))
return WikipediaDirCfg.instance
Loading

0 comments on commit 69904e2

Please sign in to comment.