From 6c92848913fec9198384859a1986fe3de72b6f41 Mon Sep 17 00:00:00 2001 From: Binh Vu Date: Sat, 9 Sep 2023 22:58:45 -0700 Subject: [PATCH] fix rust project --- Cargo.lock | 37 ++++++++++++++++++------------- Cargo.toml | 6 +++--- pyproject.toml | 2 +- scripts/build.sh | 5 ++++- scripts/download-data.ipynb | 43 ++++++++++++++++++++++++++++++------- 5 files changed, 65 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63ed126..5318900 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "anes" version = "0.1.6" @@ -704,11 +710,12 @@ checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "hashbrown" -version = "0.13.2" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" dependencies = [ "ahash", + "allocator-api2", "rayon", "serde", ] @@ -807,7 +814,7 @@ dependencies = [ [[package]] name = "kgdata" -version = "3.2.1" +version = "3.2.2" dependencies = [ "anyhow", "bincode", @@ -816,7 +823,7 @@ dependencies = [ "env_logger", "flate2", "glob", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "log", "ord_subset", "petgraph", @@ -1151,13 +1158,13 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb88ae05f306b4bfcde40ac4a51dc0b05936a9207a4b75b798c7729c4258a59" +checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38" dependencies = [ "anyhow", "cfg-if", - "hashbrown 0.13.2", + "hashbrown 0.14.0", "indoc", "inventory", "libc", @@ -1171,9 +1178,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "554db24f0b3c180a9c0b1268f91287ab3f17c162e15b54caaae5a6b3773396b0" +checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" dependencies = [ "once_cell", "target-lexicon", @@ -1181,9 +1188,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "922ede8759e8600ad4da3195ae41259654b9c55da4f7eec84a0ccc7d067a70a4" +checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" dependencies = [ "libc", "pyo3-build-config", @@ -1191,9 +1198,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a5caec6a1dd355964a841fcbeeb1b89fe4146c87295573f94228911af3cc5a2" +checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -1203,9 +1210,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.19.1" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0b78ccbb160db1556cdb6fd96c50334c5d4ec44dc5e0a968d0a1208fa0efa8b" +checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 044c20e..b64021d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "kgdata" -version = "3.2.1" +version = "3.2.2" edition = "2021" readme = "README.md" homepage = "https://github.com/binh-vu/kgdata" @@ -24,11 +24,11 @@ flate2 = { version = "1.0.24", features = [ "zlib-ng", ], default-features = false } glob = "0.3.1" -hashbrown = { version = "0.13.2", features = ["serde", "rayon"] } +hashbrown = { version = "0.14.0", features = ["serde", "rayon"] } log = "0.4.17" ord_subset = "3.1.1" petgraph = "0.6.3" -pyo3 = { version = "0.19.1", features = [ +pyo3 = { version = "0.19.2", features = [ "anyhow", "multiple-pymethods", "hashbrown", diff --git a/pyproject.toml b/pyproject.toml index df6d100..fcf9b45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "kgdata" -version = "5.0.0a8" +version = "5.0.0a9" description = "Library to process dumps of knowledge graphs (Wikipedia, DBpedia, Wikidata)" readme = "README.md" authors = [{ name = "Binh Vu", email = "binh@toan2.com" }] diff --git a/scripts/build.sh b/scripts/build.sh index 4a4363d..9272a27 100644 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -70,8 +70,9 @@ wikidata_dataset entity_metadata wikidata_dataset entity_all_types wikidata_dataset entity_degrees wikidata_dataset entity_labels -wikidata_dataset entity_pagerank wikidata_dataset entity_types_and_degrees +wikidata_dataset entity_outlinks +wikidata_dataset entity_pagerank # ====================================================================== # WIKIPEDIA Datasets @@ -94,6 +95,8 @@ wikidata_db classes wikidata_db properties wikidata_db entities wikidata_db entity_labels +wikidata_db entity_metadata +wikidata_db entity_outlinks wikidata_db entity_redirections wikidata_db wp2wd wikidata_db entity_pagerank diff --git a/scripts/download-data.ipynb b/scripts/download-data.ipynb index 2766231..f9939ff 100644 --- a/scripts/download-data.ipynb +++ b/scripts/download-data.ipynb @@ -48,7 +48,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3964f81555994ad5ae2c05e8cdfb6695", + "model_id": "3f953b8eee6746389eab7561355e3422", "version_major": 2, "version_minor": 0 }, @@ -82,12 +82,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0ff6421164ee4b888e0c48a7838ab6fc", + "model_id": "77e8475eac214e21a9f850ac58249d9b", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "HBox(children=(Button(description='2023-06-19', style=ButtonStyle()), Button(description='2023-07-03', style=B…" + "HBox(children=(Button(description='2023-08-21', style=ButtonStyle()), Button(description='2023-07-31', style=B…" ] }, "metadata": {}, @@ -96,7 +96,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cd99b0b3d8bd4928b06043f856b1c49b", + "model_id": "e0d7e19bc0b849be8146cec96885ef1f", "version_major": 2, "version_minor": 0 }, @@ -208,6 +208,18 @@ "wd_files" ] }, + { + "cell_type": "code", + "execution_count": 9, + "id": "cb09fe48-4c4f-4b32-ae59-04a18b9ae0c4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "wdjobs = []" + ] + }, { "cell_type": "code", "execution_count": 8, @@ -228,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "7cd579aa", "metadata": {}, "outputs": [ @@ -238,7 +250,7 @@ "[DumpFile(date=datetime.date(2023, 6, 20), url='https://dumps.wikimedia.org/other/enterprise_html/runs/20230620/enwiki-NS0-20230620-ENTERPRISE-HTML.json.tar.gz')]" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -250,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "63c65e42", "metadata": {}, "outputs": [], @@ -271,7 +283,22 @@ "execution_count": null, "id": "39cdeb3b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "17d1fee843d6488ab7f3ed75b7f6e070", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Download enwiki-NS0-20230620-ENTERPRISE-HTML.json.tar.gz: 0.00B [00:00, ?B/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "jobs = wdjobs + wpjobs\n", "with WGet.start() as wget:\n",