From b690ca7076bdf8188a4768131399caeafb6d97b2 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Sun, 4 Jun 2023 18:13:35 +0200 Subject: [PATCH] Establish `FileSystemItem.from_path()` Can be reused by other derived classes. --- datalad_next/iter_collections/directory.py | 39 ++++-------------- datalad_next/iter_collections/utils.py | 47 +++++++++++++++++++++- 2 files changed, 53 insertions(+), 33 deletions(-) diff --git a/datalad_next/iter_collections/directory.py b/datalad_next/iter_collections/directory.py index c0f006ea..027a4785 100644 --- a/datalad_next/iter_collections/directory.py +++ b/datalad_next/iter_collections/directory.py @@ -6,12 +6,7 @@ from __future__ import annotations from dataclasses import dataclass -import os -from pathlib import ( - Path, - PurePath, -) -import stat +from pathlib import Path from typing import Generator from datalad_next.exceptions import CapturedException @@ -38,8 +33,7 @@ def iter_dir( information on file system elements, such as ``size``, or ``mtime``. In addition to a plain ``Path.iterdir()`` the report includes a path-type - label (distinguished are ``file``, ``directory``, ``symlink``). Moreover, - any number of checksums for file content can be computed and reported. + label (distinguished are ``file``, ``directory``, ``symlink``). Parameters ---------- @@ -58,33 +52,14 @@ def iter_dir( # c could disappear while this is running. Example: temp files managed # by other processes. try: - cstat = c.lstat() + item = DirectoryItem.from_path( + c, + link_target=True, + ) except FileNotFoundError as e: CapturedException(e) continue - cmode = cstat.st_mode - if stat.S_ISLNK(cmode): - ctype = FileSystemItemType.symlink - elif stat.S_ISDIR(cmode): - ctype = FileSystemItemType.directory - else: - # the rest is a file - # there could be fifos and sockets, etc. - # but we do not recognize them here - ctype = FileSystemItemType.file - item = DirectoryItem( - name=PurePath(c.name), - type=ctype, - size=cstat.st_size, - mode=cmode, - mtime=cstat.st_mtime, - uid=cstat.st_uid, - gid=cstat.st_gid, - ) - if ctype == FileSystemItemType.symlink: - # could be p.readlink() from PY3.9+ - item.link_target = PurePath(os.readlink(c)) - if fp and ctype == FileSystemItemType.file: + if fp and item.type == FileSystemItemType.file: with c.open('rb') as fp: item.fp = fp yield item diff --git a/datalad_next/iter_collections/utils.py b/datalad_next/iter_collections/utils.py index 8cf05d95..d9196ded 100644 --- a/datalad_next/iter_collections/utils.py +++ b/datalad_next/iter_collections/utils.py @@ -4,7 +4,12 @@ from dataclasses import dataclass from enum import Enum -from pathlib import PurePath +import os +from pathlib import ( + Path, + PurePath, +) +import stat from typing import ( Any, IO, @@ -58,6 +63,46 @@ class FileSystemItem(PathBasedItem, TypedItem): link_target: PurePath | None = None fp: IO | None = None + @classmethod + def from_path( + cls, + path: Path, + *, + link_target: bool = True, + fp: bool = False, + ): + """Populate item properties from a single `stat` and `readlink` call + + The given ``path`` must exist. The ``link_target`` flag indicates + whether to report the result of ``readlink`` for a symlink-type + path. If `fp` is set, the item includes a file-like object + to access the file's content. + """ + cstat = path.lstat() + cmode = cstat.st_mode + if stat.S_ISLNK(cmode): + ctype = FileSystemItemType.symlink + elif stat.S_ISDIR(cmode): + ctype = FileSystemItemType.directory + else: + # the rest is a file + # there could be fifos and sockets, etc. + # but we do not recognize them here + ctype = FileSystemItemType.file + item = cls( + name=path, + type=ctype, + size=cstat.st_size, + mode=cmode, + mtime=cstat.st_mtime, + uid=cstat.st_uid, + gid=cstat.st_gid, + ) + if ctype == FileSystemItemType.symlink: + # could be p.readlink() from PY3.9+ + item.link_target = PurePath(os.readlink(path)) + return item + def compute_multihash_from_fp(fp, hash: List[str], bufsize=COPY_BUFSIZE): """Compute multiple hashes from a file-like