From 5306b6d7a98733a090b5bcf3ec98df083b00e30c Mon Sep 17 00:00:00 2001 From: Michael Wayne Goodman Date: Wed, 10 Feb 2021 14:30:18 +0800 Subject: [PATCH] Fix #23: add support for adding ILIs to the db --- CHANGELOG.md | 1 + wn/_add.py | 48 ++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5031def..d98bc9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ * `wn.project.Package.type` property ([#23]) * Support for detecting and loading ILI tab-separated-value exports; not directly accessible through the public API at this time ([#23]) +* Support for adding ILI resources to the database ([#23]) ### Fixed diff --git a/wn/_add.py b/wn/_add.py index 9008899..0df058e 100644 --- a/wn/_add.py +++ b/wn/_add.py @@ -3,15 +3,18 @@ """ from typing import Optional, Type +from itertools import islice import logging import wn from wn._types import AnyPath +from wn.constants import _WORDNET, _ILI from wn._db import connect, relmap, ilistatmap from wn._queries import find_lexicons from wn.util import ProgressHandler, ProgressBar from wn.project import iterpackages from wn import lmf +from wn import _ili logger = logging.getLogger('wn') @@ -65,7 +68,12 @@ def add( try: for package in iterpackages(source): - _add_lmf(package.resource_file(), progress) + if package.type == _WORDNET: + _add_lmf(package.resource_file(), progress) + elif package.type == _ILI: + _add_ili(package.resource_file(), progress) + else: + raise wn.Error(f'unknown package type: {package.type}') finally: progress.close() @@ -162,11 +170,11 @@ def _precheck(source, cur): def _split(sequence): - i = 0 - for j in range(0, len(sequence), BATCH_SIZE): - yield sequence[i:j] - i = j - yield sequence[i:] + it = iter(sequence) + batch = list(islice(it, 0, BATCH_SIZE)) + while len(batch): + yield batch + batch = list(islice(it, 0, BATCH_SIZE)) def _insert_synsets(synsets, lex_id, cur, progress): @@ -462,6 +470,34 @@ def _insert_examples(objs, lexid, table, cur, progress): progress.update(len(data)) +def _add_ili( + source, + progress: ProgressHandler, +) -> None: + query = ''' + INSERT INTO ilis + VALUES (null,?,?,?,null) + ON CONFLICT(id) DO + UPDATE SET status=excluded.status, + definition=excluded.definition + ''' + with connect() as conn: + cur = conn.cursor() + + progress.flash(f'Reading ILI file: {source!s}') + ili = list(_ili.load(source)) + progress.set(count=0, total=len(ili), status='ILI') + for batch in _split(ili): + data = [ + (info['ili'], + ilistatmap[info.get('status', 'active')], + info.get('definition')) + for info in batch + ] + cur.executemany(query, data) + progress.update(len(data)) + + def remove( lexicon: str, progress_handler: Optional[Type[ProgressHandler]] = ProgressBar