Skip to content

Commit

Permalink
Make TarfileItem.name be of type PurePosixPath
Browse files Browse the repository at this point in the history
Rational from #409:

```py
>>> PureWindowsPath(*PurePosixPath('d/a\\b\\c.txt').parts)
PureWindowsPath('d/a/b/c.txt')
```

This means that we must relay the POSIX nature of the archive member
path to the users, because there is no way to express this as a platform
(windows) path -- and also no way to extract this file under an
equivalent name on an FS that uses windows-semantics. So a type
mismatch can be used to trigger mitigation strategies.

If feel like a clean(er) solution would be to change `TarFileItem` to
declare to have a `name` of type `PurePosixPath`.

For the same reason and rational, a symlink target must also be
communicated in POSIX form.
  • Loading branch information
mih committed Jun 19, 2023
1 parent 59a6317 commit 942080e
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 13 deletions.
14 changes: 10 additions & 4 deletions datalad_next/iter_collections/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from dataclasses import dataclass
from pathlib import (
Path,
PurePath,
PurePosixPath,
)
import tarfile
Expand All @@ -22,7 +21,14 @@

@dataclass # sadly PY3.10+ only (kw_only=True)
class TarfileItem(FileSystemItem):
pass
name: PurePosixPath
"""TAR uses POSIX paths as item identifiers. Not all POSIX paths can
be represented on all (non-POSIX) file systems, therefore the item
name is represented in POSIX form, instead of a platform-dependent
``PurePath``."""
link_target: PurePosixPath | None = None
"""Just as for ``name``, a link target is also reported in POSIX
format."""


def iter_tar(
Expand Down Expand Up @@ -63,14 +69,14 @@ def iter_tar(
else FileSystemItemType.hardlink if member.islnk() \
else FileSystemItemType.specialfile
item = TarfileItem(
name=PurePath(PurePosixPath(member.name)),
name=PurePosixPath(member.name),
type=mtype,
size=member.size,
mode=member.mode,
mtime=member.mtime,
uid=member.uid,
gid=member.gid,
link_target=PurePath(PurePosixPath(member.linkname))
link_target=PurePosixPath(member.linkname)
if member.linkname else None,
)
if fp and mtype in (
Expand Down
18 changes: 9 additions & 9 deletions datalad_next/iter_collections/tests/test_itertar.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pathlib import PurePath
from pathlib import PurePosixPath
import pytest

from datalad.api import download
Expand Down Expand Up @@ -47,24 +47,24 @@ def test_iter_tar(sample_tar_xz):
'md5': 'ba1f2511fc30423bdbb183fe33f3dd0f'}
targets = [
TarfileItem(
name=PurePath('test-archive'),
name=PurePosixPath('test-archive'),
type=FileSystemItemType.directory,
size=0,
mtime=1683657433,
mode=509,
uid=1000,
gid=1000),
TarfileItem(
name=PurePath('test-archive') / '123.txt',
name=PurePosixPath('test-archive') / '123.txt',
type=FileSystemItemType.symlink,
size=0,
mtime=1683657414,
mode=511,
uid=1000,
gid=1000,
link_target=PurePath('subdir') / 'onetwothree_again.txt'),
link_target=PurePosixPath('subdir') / 'onetwothree_again.txt'),
TarfileItem(
name=PurePath('test-archive') / '123_hard.txt',
name=PurePosixPath('test-archive') / '123_hard.txt',
type=FileSystemItemType.file,
size=4,
mtime=1683657364,
Expand All @@ -73,15 +73,15 @@ def test_iter_tar(sample_tar_xz):
gid=1000,
link_target=None),
TarfileItem(
name=PurePath('test-archive') / 'subdir',
name=PurePosixPath('test-archive') / 'subdir',
type=FileSystemItemType.directory,
size=0,
mtime=1683657400,
mode=509,
uid=1000,
gid=1000),
TarfileItem(
name=PurePath('test-archive') / 'subdir' / 'onetwothree_again.txt',
name=PurePosixPath('test-archive') / 'subdir' / 'onetwothree_again.txt',
type=FileSystemItemType.file,
size=4,
mtime=1683657400,
Expand All @@ -90,14 +90,14 @@ def test_iter_tar(sample_tar_xz):
gid=1000,
link_target=None),
TarfileItem(
name=PurePath('test-archive') / 'onetwothree.txt',
name=PurePosixPath('test-archive') / 'onetwothree.txt',
type=FileSystemItemType.hardlink,
size=0,
mtime=1683657364,
mode=436,
uid=1000,
gid=1000,
link_target=PurePath('test-archive') / '123_hard.txt'),
link_target=PurePosixPath('test-archive') / '123_hard.txt'),
]
ires = []
for i in iter_tar(sample_tar_xz, fp=True):
Expand Down

0 comments on commit 942080e

Please sign in to comment.