From 96b3ab95ce7ac6cf5e4e08478855a1de2b0f1f62 Mon Sep 17 00:00:00 2001 From: Harshad Date: Thu, 8 Feb 2024 16:25:05 -0600 Subject: [PATCH] Updated using `importlib_resource` instead of `pkg_resources` and prepare for later versions of `pandas` (#492) Fixes #491 - [x] Updated using `importlib_resource` instead of `pkg_resources` - Reason: `pkg_resources` is going to be deprecated. - [x] Refactor `pandas` related code to smoothly transition to future versions and handle deprecation warnings. - [Pandas PR](https://github.com/pandas-dev/pandas/pull/54710/files#diff-55001624a0932c1b6cee2e6ddb65dea85c1faf0dee84812c0ca0c32916a71438): ``` "Downcasting behavior in `replace` is deprecated and " "will be removed in a future version. To retain the old " "behavior, explicitly call " "`result.infer_objects(copy=False)`. " "To opt-in to the future " "behavior, set " "`pd.set_option('future.no_silent_downcasting', True)`", ``` - `A value is trying to be set on a copy of a slice from a DataFrame` - `.apply(max)` => `.apply(np.maximum.reduce)` - `UserWarning: Boolean Series key will be reindexed to match DataFrame index.` --- poetry.lock | 27 ++++++++++++++++++++++++--- pyproject.toml | 1 + src/sssom/constants.py | 4 ++-- src/sssom/context.py | 6 +++--- src/sssom/util.py | 37 +++++++++++++++++++++++++++++++++---- 5 files changed, 63 insertions(+), 12 deletions(-) diff --git a/poetry.lock b/poetry.lock index a0e56b45..4dd5a6f1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. [[package]] name = "alabaster" @@ -732,6 +732,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -1036,8 +1046,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -1460,6 +1470,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1467,8 +1478,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1485,6 +1504,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1492,6 +1512,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -2444,4 +2465,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "19f44e63c0da2447d484d7932ea7ee788b6b7e590a987437e1ddd652f86ea90a" +content-hash = "90ed12266cf27d163dbda15280bd4a50e4c4a36961579c4770dacf466a26e12d" diff --git a/pyproject.toml b/pyproject.toml index f4c3c239..d4864faf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ deprecation = "^2.1.0" pyyaml = "^6.0.1" rdflib = ">=6.0.0" scipy = {version = "*", extras = ["scipy"]} +importlib-resources = "^6.1.1" [tool.poetry.group.dev.dependencies] pytest = {version = ">=7.1.2"} diff --git a/src/sssom/constants.py b/src/sssom/constants.py index 9f8a09c0..19003ac6 100644 --- a/src/sssom/constants.py +++ b/src/sssom/constants.py @@ -6,14 +6,14 @@ from functools import cached_property, lru_cache from typing import Any, Dict, List, Literal, Set -import pkg_resources +import importlib_resources import yaml from linkml_runtime.utils.schema_as_dict import schema_as_dict from linkml_runtime.utils.schemaview import SchemaView HERE = pathlib.Path(__file__).parent.resolve() -SCHEMA_YAML = pkg_resources.resource_filename("sssom_schema", "schema/sssom_schema.yaml") +SCHEMA_YAML = importlib_resources.files("sssom_schema").joinpath("schema/sssom_schema.yaml") EXTENDED_PREFIX_MAP = HERE / "obo.epm.json" OWL_EQUIV_CLASS_URI = "http://www.w3.org/2002/07/owl#equivalentClass" diff --git a/src/sssom/context.py b/src/sssom/context.py index 6cff7db0..b4e76155 100644 --- a/src/sssom/context.py +++ b/src/sssom/context.py @@ -5,7 +5,7 @@ from typing import Mapping, Union import curies -import pkg_resources +import importlib_resources from curies import Converter from rdflib.namespace import is_ncname @@ -19,8 +19,8 @@ ] SSSOM_BUILT_IN_PREFIXES = ("sssom", "owl", "rdf", "rdfs", "skos", "semapv") -SSSOM_CONTEXT = pkg_resources.resource_filename( - "sssom_schema", "context/sssom_schema.context.jsonld" +SSSOM_CONTEXT = importlib_resources.files("sssom_schema").joinpath( + "context/sssom_schema.context.jsonld" ) diff --git a/src/sssom/util.py b/src/sssom/util.py index 1d9110f2..1ea7291e 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -80,6 +80,13 @@ KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID, PREDICATE_MODIFIER] TRIPLES_IDS = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID] +# ! This will be unnecessary when pandas >= 3.0.0 is released +# ! https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.infer_objects.html# +# A value is trying to be set on a copy of a slice from a DataFrame +pd.options.mode.copy_on_write = True +# Get the version of pandas as a tuple of integers +pandas_version = tuple(map(int, pd.__version__.split("."))) + @dataclass class MappingSetDataFrame: @@ -151,6 +158,12 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr df = pd.DataFrame(get_dict_from_mapping(mapping) for mapping in doc.mapping_set.mappings) meta = _extract_global_metadata(doc) + if pandas_version >= (2, 0, 0): + # For pandas >= 2.0.0, use the 'copy' parameter + df = df.infer_objects(copy=False) + else: + # For pandas < 2.0.0, call 'infer_objects()' without any parameters + df = df.infer_objects() # remove columns where all values are blank. df.replace("", np.nan, inplace=True) df.dropna(axis=1, how="all", inplace=True) # remove columns with all row = 'None'-s. @@ -160,6 +173,14 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr slot for slot, slot_metadata in slots.items() if slot_metadata["range"] == "double" } non_double_cols = df.loc[:, ~df.columns.isin(slots_with_double_as_range)] + + if pandas_version >= (2, 0, 0): + # For pandas >= 2.0.0, use the 'copy' parameter + non_double_cols = non_double_cols.infer_objects(copy=False) + else: + # For pandas < 2.0.0, call 'infer_objects()' without any parameters + non_double_cols = non_double_cols.infer_objects() + non_double_cols.replace(np.nan, "", inplace=True) df.update(non_double_cols) @@ -1397,18 +1418,26 @@ def invert_mappings( non_predicate_modified_df = df if subject_prefix: - subject_starts_with_prefix_condition = df[SUBJECT_ID].str.startswith(subject_prefix + ":") - object_starts_with_prefix_condition = df[OBJECT_ID].str.startswith(subject_prefix + ":") + # Filter rows where 'SUBJECT_ID' starts with the prefix but 'OBJECT_ID' does not prefixed_subjects_df = pd.DataFrame( non_predicate_modified_df[ - (subject_starts_with_prefix_condition & ~object_starts_with_prefix_condition) + ( + non_predicate_modified_df[SUBJECT_ID].str.startswith(subject_prefix + ":") + & ~non_predicate_modified_df[OBJECT_ID].str.startswith(subject_prefix + ":") + ) ] ) + + # Filter rows where 'SUBJECT_ID' does not start with the prefix but 'OBJECT_ID' does non_prefix_subjects_df = pd.DataFrame( non_predicate_modified_df[ - (~subject_starts_with_prefix_condition & object_starts_with_prefix_condition) + ( + ~non_predicate_modified_df[SUBJECT_ID].str.startswith(subject_prefix + ":") + & non_predicate_modified_df[OBJECT_ID].str.startswith(subject_prefix + ":") + ) ] ) + df_to_invert = non_prefix_subjects_df.loc[ non_prefix_subjects_df[PREDICATE_ID].isin(list(predicate_invert_map.keys())) ]