Skip to content

Commit

Permalink
Establish iter_collections module, import iterdir() from gooey
Browse files Browse the repository at this point in the history
This is a start for consolidating common functionality scattered
around various extensions into a single implementation (pattern).

This changeset import `iterdir()` from `datalad-gooey`. In contrast to
the original implementation, this new one is using a stricter approach
to types, and overfits less to a dataset-aware use case.

However, it is not meant to be the exclusive implementation, but merely
a start and a place to migrate directory iterators into.

Ping datalad#323
  • Loading branch information
mih committed May 9, 2023
1 parent 9c65b96 commit ae185e3
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 0 deletions.
8 changes: 8 additions & 0 deletions datalad_next/iter_collections/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Iterators for particular types of collections
.. currentmodule:: datalad_next.iter_collections
.. autosummary::
:toctree: generated
directory
"""
78 changes: 78 additions & 0 deletions datalad_next/iter_collections/directory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""Report on the content of directories"""

from __future__ import annotations

from dataclasses import dataclass
from enum import Enum
import os
from pathlib import Path
import stat
from typing import Generator

from datalad_next.exceptions import CapturedException


class PathType(Enum):
file = 'file'
directory = 'directory'
symlink = 'symlink'


@dataclass
class IterdirItem:
path: Path
type: PathType
symlink_target: Path | None = None


def iterdir(
path: Path,
symlink_targets: bool = True,
) -> Generator[IterdirItem, None, None]:
"""Use ``Path.iterdir()`` to iterate over a directory and report content
In addition to a plain ``Path.iterdir()`` the report includes a path-type
label (distinguished are ``file``, ``directory``, ``symlink``), and
(optionally) information on the target path of a symlink.
Parameters
----------
path: Path
Path of the directory to report content for (iterate over).
symlink_targets: bool, optional
Flag whether to read and report the target path of a symbolic link.
Yields
------
:class:`IterdirItem`
"""
# anything reported from here will be state=untracked
# figure out the type, as far as we need it
# right now we do not detect a subdir to be a dataset
# vs a directory, only directories
for c in path.iterdir():
# c could disappear while this is running. Example: temp files managed
# by other processes.
try:
cmode = c.lstat().st_mode
except FileNotFoundError as e:
CapturedException(e)
continue
if stat.S_ISLNK(cmode):
ctype = PathType.symlink
elif stat.S_ISDIR(cmode):
ctype = PathType.directory
else:
# the rest is a file
# there could be fifos and sockets, etc.
# but we do not recognize them here
ctype = PathType.file
item = IterdirItem(
path=c,
type=ctype,
)
if ctype == PathType.symlink:
# could be p.readlink() from PY3.9+
item.symlink_target = Path(os.readlink(c))
yield item
Empty file.
66 changes: 66 additions & 0 deletions datalad_next/iter_collections/tests/test_iterdir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import pytest

from datalad_next.tests.utils import (
create_tree,
rmtree,
)
from datalad_next.utils import check_symlink_capability

from ..directory import (
IterdirItem,
PathType,
iterdir,
)


@pytest.fixture(scope="function")
def dir_tree(tmp_path_factory):
path = tmp_path_factory.mktemp("dir_tree")
create_tree(
path,
{
"random_file1.txt": "some content",
"some_dir": {
"file_in_dir.txt": "some content in file in dir",
},
}
)
symlink = path / 'symlink'
symlink_target = path / 'some_dir' / "file_in_dir.txt"

if check_symlink_capability(symlink, symlink_target):
symlink.symlink_to(symlink_target)

yield path
rmtree(path)


def test_iterdir(dir_tree):
target = [
IterdirItem(path=dir_tree / 'random_file1.txt', type=PathType.file),
IterdirItem(path=dir_tree / 'some_dir', type=PathType.directory),
]
if check_symlink_capability(dir_tree / '__dummy1__',
dir_tree / '__dummy2__'):
target.append(
IterdirItem(
path=dir_tree / 'symlink',
type=PathType.symlink,
symlink_target=dir_tree / 'some_dir' / "file_in_dir.txt",
),
)

iterdir_res = list(iterdir(dir_tree))
assert len(iterdir_res) == len(target)
for item in iterdir(dir_tree):
assert item in target

# check iterdir() to be robust to concurrent removal
it = iterdir(dir_tree)
# start iteration
next(it)
# wipe out content
for i in dir_tree.glob('*'):
rmtree(i)
# consume the rest of the generator, nothing more, but also no crashing
assert [] == list(it)
1 change: 1 addition & 0 deletions docs/source/pyutils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Python utilities
constraints
credman.manager
exceptions
iter_collections
url_operations
url_operations.any
url_operations.file
Expand Down

0 comments on commit ae185e3

Please sign in to comment.