Skip to content

Commit

Permalink
Allow list of files in Dependencies methods (#370)
Browse files Browse the repository at this point in the history
* Allow list of files in Dependencies methods

* Add first tests

* Extend tests

* Update docstrings
  • Loading branch information
hagenw committed May 3, 2024
1 parent ce623f6 commit 799eb2d
Show file tree
Hide file tree
Showing 4 changed files with 269 additions and 175 deletions.
129 changes: 85 additions & 44 deletions audb/core/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,77 +200,95 @@ def tables(self) -> typing.List[str]:
"""
return self._df[self._df["type"] == define.DependType.META].index.tolist()

def archive(self, file: str) -> str:
r"""Name of archive the file belongs to.
def archive(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[str, typing.List[str]]:
r"""Name of archive a file belong to.
Args:
file: relative file path
files: relative file path(s)
Returns:
archive name
archive name(s)
"""
return self._df.archive[file]
return self._column_loc("archive", files)

def bit_depth(self, file: str) -> int:
def bit_depth(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[int, typing.List[int]]:
r"""Bit depth of media file.
Args:
file: relative file path
files: relative file path(s)
Returns:
bit depth
bit depth(s)
"""
return self._column_loc("bit_depth", file, int)
return self._column_loc("bit_depth", files, int)

def channels(self, file: str) -> int:
def channels(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[int, typing.List[int]]:
r"""Number of channels of media file.
Args:
file: relative file path
files: relative file path(s)
Returns:
number of channels
number(s) of channels
"""
return self._column_loc("channels", file, int)
return self._column_loc("channels", files, int)

def checksum(self, file: str) -> str:
def checksum(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[str, typing.List[str]]:
r"""Checksum of file.
Args:
file: relative file path
files: relative file path(s)
Returns:
checksum of file
checksum of file(s)
"""
return self._column_loc("checksum", file)
return self._column_loc("checksum", files)

def duration(self, file: str) -> float:
def duration(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[float, typing.List[float]]:
r"""Duration of file.
Args:
file: relative file path
files: relative file path(s)
Returns:
duration in seconds
duration(s) in seconds
"""
return self._column_loc("duration", file, float)
return self._column_loc("duration", files, float)

def format(self, file: str) -> str:
def format(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[str, typing.List[str]]:
r"""Format of file.
Args:
file: relative file path
files: relative file path(s)
Returns:
file format (always lower case)
file format(s) (always lower case)
"""
return self._column_loc("format", file)
return self._column_loc("format", files)

def load(self, path: str):
r"""Read dependencies from file.
Expand Down Expand Up @@ -321,29 +339,35 @@ def load(self, path: str):
)
self._df.index = self._df.index.astype("string")

def removed(self, file: str) -> bool:
def removed(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[bool, typing.List[bool]]:
r"""Check if file is marked as removed.
Args:
file: relative file path
files: relative file path(s)
Returns:
``True`` if file was removed
"""
return self._column_loc("removed", file, bool)
return self._column_loc("removed", files, bool)

def sampling_rate(self, file: str) -> int:
def sampling_rate(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[int, typing.List[int]]:
r"""Sampling rate of media file.
Args:
file: relative file path
files: relative file path(s)
Returns:
sampling rate in Hz
sampling rate(s) in Hz
"""
return self._column_loc("sampling_rate", file, int)
return self._column_loc("sampling_rate", files, int)

def save(self, path: str):
r"""Write dependencies to file.
Expand All @@ -362,29 +386,35 @@ def save(self, path: str):
protocol=4, # supported by Python >= 3.4
)

def type(self, file: str) -> int:
def type(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[int, typing.List[int]]:
r"""Type of file.
Args:
file: relative file path
files: relative file path(s)
Returns:
type
type(s)
"""
return self._column_loc("type", file, int)
return self._column_loc("type", files, int)

def version(self, file: str) -> str:
def version(
self,
files: typing.Union[str, typing.Sequence[str]],
) -> typing.Union[str, typing.List[str]]:
r"""Version of file.
Args:
file: relative file path
files: relative file path(s)
Returns:
version string
version string(s)
"""
return self._column_loc("version", file)
return self._column_loc("version", files)

def _add_attachment(
self,
Expand Down Expand Up @@ -487,10 +517,21 @@ def _column_loc(
dtype: typing.Callable = None,
) -> typing.Union[typing.Any, typing.List[typing.Any]]:
r"""Column content for selected files."""
value = self._df.at[files, column]
if dtype is not None:
value = dtype(value)
return value
# Single file
if isinstance(files, str):
value = self._df.at[files, column]
if dtype is not None:
value = dtype(value)
return value

# Multiple files
else:
values = self._df.loc[files, column]
if dtype is not None:
values = [dtype(value) for value in values]
else:
values = values.tolist()
return values

def _drop(self, files: typing.Sequence[str]):
r"""Drop files from table.
Expand Down
64 changes: 37 additions & 27 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,34 +51,44 @@ using `pyarrow` dtypes).
|-------------------------------------------------|----------|----------|-----------|
| Dependencies.\_\_call__() | 0.000 | 0.000 | 0.000 |
| Dependencies.\_\_contains__(10000 files) | 0.005 | 0.004 | 0.004 |
| Dependencies.\_\_get_item__(10000 files) | 0.322 | 0.224 | 0.900 |
| Dependencies.\_\_get_item__(10000 files) | 0.316 | 0.219 | 0.903 |
| Dependencies.\_\_len__() | 0.000 | 0.000 | 0.000 |
| Dependencies.\_\_str__() | 0.006 | 0.005 | 0.006 |
| Dependencies.archives | 0.144 | 0.116 | 0.152 |
| Dependencies.attachments | 0.030 | 0.018 | 0.018 |
| Dependencies.attachment_ids | 0.029 | 0.018 | 0.018 |
| Dependencies.files | 0.030 | 0.011 | 0.046 |
| Dependencies.media | 0.129 | 0.073 | 0.095 |
| Dependencies.removed_media | 0.117 | 0.070 | 0.087 |
| Dependencies.table_ids | 0.037 | 0.026 | 0.023 |
| Dependencies.tables | 0.029 | 0.017 | 0.017 |
| Dependencies.archive(10000 files) | 0.045 | 0.042 | 0.065 |
| Dependencies.bit_depth(10000 files) | 0.024 | 0.024 | 0.045 |
| Dependencies.channels(10000 files) | 0.023 | 0.023 | 0.045 |
| Dependencies.checksum(10000 files) | 0.026 | 0.023 | 0.047 |
| Dependencies.duration(10000 files) | 0.023 | 0.023 | 0.043 |
| Dependencies.format(10000 files) | 0.026 | 0.023 | 0.047 |
| Dependencies.removed(10000 files) | 0.023 | 0.023 | 0.043 |
| Dependencies.sampling_rate(10000 files) | 0.023 | 0.023 | 0.043 |
| Dependencies.type(10000 files) | 0.023 | 0.023 | 0.043 |
| Dependencies.version(10000 files) | 0.026 | 0.023 | 0.047 |
| Dependencies._add_attachment() | 0.055 | 0.062 | 0.220 |
| Dependencies._add_media(10000 files) | 0.057 | 0.057 | 0.066 |
| Dependencies._add_meta() | 0.117 | 0.129 | 0.145 |
| Dependencies._drop() | 0.075 | 0.078 | 0.121 |
| Dependencies._remove() | 0.061 | 0.069 | 0.064 |
| Dependencies._update_media() | 0.087 | 0.086 | 0.145 |
| Dependencies._update_media_version(10000 files) | 0.011 | 0.011 | 0.020 |
| Dependencies.\_\_str__() | 0.005 | 0.005 | 0.006 |
| Dependencies.archives | 0.143 | 0.118 | 0.144 |
| Dependencies.attachments | 0.029 | 0.018 | 0.018 |
| Dependencies.attachment_ids | 0.028 | 0.017 | 0.017 |
| Dependencies.files | 0.030 | 0.011 | 0.043 |
| Dependencies.media | 0.132 | 0.071 | 0.086 |
| Dependencies.removed_media | 0.123 | 0.068 | 0.081 |
| Dependencies.table_ids | 0.035 | 0.025 | 0.023 |
| Dependencies.tables | 0.028 | 0.017 | 0.017 |
| Dependencies.archive(10000 files) | 0.028 | 0.025 | 0.047 |
| Dependencies.archive([10000 files]) | 0.134 | 0.008 | 0.224 |
| Dependencies.bit_depth(10000 files) | 0.026 | 0.024 | 0.045 |
| Dependencies.bit_depth([10000 files]) | 0.140 | 0.002 | 0.224 |
| Dependencies.channels(10000 files) | 0.025 | 0.024 | 0.044 |
| Dependencies.channels([10000 files]) | 0.140 | 0.002 | 0.224 |
| Dependencies.checksum(10000 files) | 0.027 | 0.025 | 0.047 |
| Dependencies.checksum([10000 files]) | 0.142 | 0.002 | 0.220 |
| Dependencies.duration(10000 files) | 0.025 | 0.025 | 0.044 |
| Dependencies.duration([10000 files]) | 0.139 | 0.002 | 0.223 |
| Dependencies.format(10000 files) | 0.027 | 0.023 | 0.047 |
| Dependencies.format([10000 files]) | 0.142 | 0.002 | 0.221 |
| Dependencies.removed(10000 files) | 0.025 | 0.024 | 0.044 |
| Dependencies.removed([10000 files]) | 0.140 | 0.002 | 0.224 |
| Dependencies.sampling_rate(10000 files) | 0.026 | 0.024 | 0.045 |
| Dependencies.sampling_rate([10000 files]) | 0.140 | 0.002 | 0.223 |
| Dependencies.type(10000 files) | 0.026 | 0.024 | 0.045 |
| Dependencies.type([10000 files]) | 0.141 | 0.002 | 0.225 |
| Dependencies.version(10000 files) | 0.027 | 0.023 | 0.047 |
| Dependencies.version([10000 files]) | 0.142 | 0.002 | 0.226 |
| Dependencies._add_attachment() | 0.059 | 0.062 | 0.210 |
| Dependencies._add_media(10000 files) | 0.058 | 0.057 | 0.066 |
| Dependencies._add_meta() | 0.115 | 0.133 | 0.145 |
| Dependencies._drop() | 0.075 | 0.076 | 0.118 |
| Dependencies._remove() | 0.062 | 0.069 | 0.065 |
| Dependencies._update_media() | 0.087 | 0.090 | 0.144 |
| Dependencies._update_media_version(10000 files) | 0.011 | 0.011 | 0.021 |


## audb.Dependencies loading/writing to file
Expand Down
Loading

0 comments on commit 799eb2d

Please sign in to comment.