From 942080e061e6c372c68043f52f8f37029ef06695 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Fri, 16 Jun 2023 09:30:51 +0200 Subject: [PATCH] Make `TarfileItem.name` be of type `PurePosixPath` Rational from https://github.com/datalad/datalad-next/pull/409: ```py >>> PureWindowsPath(*PurePosixPath('d/a\\b\\c.txt').parts) PureWindowsPath('d/a/b/c.txt') ``` This means that we must relay the POSIX nature of the archive member path to the users, because there is no way to express this as a platform (windows) path -- and also no way to extract this file under an equivalent name on an FS that uses windows-semantics. So a type mismatch can be used to trigger mitigation strategies. If feel like a clean(er) solution would be to change `TarFileItem` to declare to have a `name` of type `PurePosixPath`. For the same reason and rational, a symlink target must also be communicated in POSIX form. --- datalad_next/iter_collections/tarfile.py | 14 ++++++++++---- .../iter_collections/tests/test_itertar.py | 18 +++++++++--------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/datalad_next/iter_collections/tarfile.py b/datalad_next/iter_collections/tarfile.py index 6adf44a5..704b883c 100644 --- a/datalad_next/iter_collections/tarfile.py +++ b/datalad_next/iter_collections/tarfile.py @@ -8,7 +8,6 @@ from dataclasses import dataclass from pathlib import ( Path, - PurePath, PurePosixPath, ) import tarfile @@ -22,7 +21,14 @@ @dataclass # sadly PY3.10+ only (kw_only=True) class TarfileItem(FileSystemItem): - pass + name: PurePosixPath + """TAR uses POSIX paths as item identifiers. Not all POSIX paths can + be represented on all (non-POSIX) file systems, therefore the item + name is represented in POSIX form, instead of a platform-dependent + ``PurePath``.""" + link_target: PurePosixPath | None = None + """Just as for ``name``, a link target is also reported in POSIX + format.""" def iter_tar( @@ -63,14 +69,14 @@ def iter_tar( else FileSystemItemType.hardlink if member.islnk() \ else FileSystemItemType.specialfile item = TarfileItem( - name=PurePath(PurePosixPath(member.name)), + name=PurePosixPath(member.name), type=mtype, size=member.size, mode=member.mode, mtime=member.mtime, uid=member.uid, gid=member.gid, - link_target=PurePath(PurePosixPath(member.linkname)) + link_target=PurePosixPath(member.linkname) if member.linkname else None, ) if fp and mtype in ( diff --git a/datalad_next/iter_collections/tests/test_itertar.py b/datalad_next/iter_collections/tests/test_itertar.py index 8c11f856..7f76b298 100644 --- a/datalad_next/iter_collections/tests/test_itertar.py +++ b/datalad_next/iter_collections/tests/test_itertar.py @@ -1,4 +1,4 @@ -from pathlib import PurePath +from pathlib import PurePosixPath import pytest from datalad.api import download @@ -47,7 +47,7 @@ def test_iter_tar(sample_tar_xz): 'md5': 'ba1f2511fc30423bdbb183fe33f3dd0f'} targets = [ TarfileItem( - name=PurePath('test-archive'), + name=PurePosixPath('test-archive'), type=FileSystemItemType.directory, size=0, mtime=1683657433, @@ -55,16 +55,16 @@ def test_iter_tar(sample_tar_xz): uid=1000, gid=1000), TarfileItem( - name=PurePath('test-archive') / '123.txt', + name=PurePosixPath('test-archive') / '123.txt', type=FileSystemItemType.symlink, size=0, mtime=1683657414, mode=511, uid=1000, gid=1000, - link_target=PurePath('subdir') / 'onetwothree_again.txt'), + link_target=PurePosixPath('subdir') / 'onetwothree_again.txt'), TarfileItem( - name=PurePath('test-archive') / '123_hard.txt', + name=PurePosixPath('test-archive') / '123_hard.txt', type=FileSystemItemType.file, size=4, mtime=1683657364, @@ -73,7 +73,7 @@ def test_iter_tar(sample_tar_xz): gid=1000, link_target=None), TarfileItem( - name=PurePath('test-archive') / 'subdir', + name=PurePosixPath('test-archive') / 'subdir', type=FileSystemItemType.directory, size=0, mtime=1683657400, @@ -81,7 +81,7 @@ def test_iter_tar(sample_tar_xz): uid=1000, gid=1000), TarfileItem( - name=PurePath('test-archive') / 'subdir' / 'onetwothree_again.txt', + name=PurePosixPath('test-archive') / 'subdir' / 'onetwothree_again.txt', type=FileSystemItemType.file, size=4, mtime=1683657400, @@ -90,14 +90,14 @@ def test_iter_tar(sample_tar_xz): gid=1000, link_target=None), TarfileItem( - name=PurePath('test-archive') / 'onetwothree.txt', + name=PurePosixPath('test-archive') / 'onetwothree.txt', type=FileSystemItemType.hardlink, size=0, mtime=1683657364, mode=436, uid=1000, gid=1000, - link_target=PurePath('test-archive') / '123_hard.txt'), + link_target=PurePosixPath('test-archive') / '123_hard.txt'), ] ires = [] for i in iter_tar(sample_tar_xz, fp=True):