From a8615897cb6e80ffd48343c493f2c159fb773e5f Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 6 May 2024 23:50:41 +0100 Subject: [PATCH] Sync from upstream --- cpython | 2 +- pathlib_abc/__init__.py | 267 ++++----------------------- pathlib_abc/_fnmatch.py | 73 ++++++++ pathlib_abc/_glob.py | 374 +++++++++++++++++++++++++++++--------- tests/test_pathlib_abc.py | 93 +++------- 5 files changed, 414 insertions(+), 395 deletions(-) create mode 100644 pathlib_abc/_fnmatch.py diff --git a/cpython b/cpython index 1216b5e..c1ff089 160000 --- a/cpython +++ b/cpython @@ -1 +1 @@ -Subproject commit 1216b5e3043567de4b3b5bc2c3cf4fa3d575683d +Subproject commit c1ff0895ad9ad8c53a308dc28b8775c8c1ef2dcc diff --git a/pathlib_abc/__init__.py b/pathlib_abc/__init__.py index 8a04748..10845d9 100644 --- a/pathlib_abc/__init__.py +++ b/pathlib_abc/__init__.py @@ -17,6 +17,11 @@ from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO +from ._glob import Globber, no_recurse_symlinks + + +__all__ = ["UnsupportedOperation"] + # # Internals # @@ -42,115 +47,6 @@ def _ignore_error(exception): def _is_case_sensitive(parser): return parser.normcase('Aa') == 'Aa' -# -# Globbing helpers -# - -re = glob = None - - -@functools.lru_cache(maxsize=512) -def _compile_pattern(pat, sep, case_sensitive, recursive=True): - """Compile given glob pattern to a re.Pattern object (observing case - sensitivity).""" - global re, glob - if re is None: - import re - from . import _glob as glob - - flags = 0 if case_sensitive else re.IGNORECASE - regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep) - return re.compile(regex, flags=flags).match - - -def _select_special(paths, part): - """Yield special literal children of the given paths.""" - for path in paths: - yield path._make_child_relpath(part) - - -def _select_children(parent_paths, dir_only, follow_symlinks, match): - """Yield direct children of given paths, filtering by name and type.""" - if follow_symlinks is None: - follow_symlinks = True - for parent_path in parent_paths: - try: - # We must close the scandir() object before proceeding to - # avoid exhausting file descriptors when globbing deep trees. - with parent_path._scandir() as scandir_it: - entries = list(scandir_it) - except OSError: - pass - else: - for entry in entries: - if dir_only: - try: - if not entry.is_dir(follow_symlinks=follow_symlinks): - continue - except OSError: - continue - # Avoid cost of making a path object for non-matching paths by - # matching against the os.DirEntry.name string. - if match is None or match(entry.name): - yield parent_path._make_child_direntry(entry) - - -def _select_recursive(parent_paths, dir_only, follow_symlinks, match): - """Yield given paths and all their children, recursively, filtering by - string and type. - """ - if follow_symlinks is None: - follow_symlinks = False - for parent_path in parent_paths: - if match is not None: - # If we're filtering paths through a regex, record the length of - # the parent path. We'll pass it to match(path, pos=...) later. - parent_len = len(str(parent_path._make_child_relpath('_'))) - 1 - paths = [parent_path._make_child_relpath('')] - while paths: - path = paths.pop() - if match is None or match(str(path), parent_len): - # Yield *directory* path that matches pattern (if any). - yield path - try: - # We must close the scandir() object before proceeding to - # avoid exhausting file descriptors when globbing deep trees. - with path._scandir() as scandir_it: - entries = list(scandir_it) - except OSError: - pass - else: - for entry in entries: - # Handle directory entry. - try: - if entry.is_dir(follow_symlinks=follow_symlinks): - # Recurse into this directory. - paths.append(path._make_child_direntry(entry)) - continue - except OSError: - pass - - # Handle file entry. - if not dir_only: - # Avoid cost of making a path object for non-matching - # files by matching against the os.DirEntry object. - if match is None or match(path._direntry_str(entry), parent_len): - # Yield *file* path that matches pattern (if any). - yield path._make_child_direntry(entry) - - -def _select_unique(paths): - """Yields the given paths, filtering out duplicates.""" - yielded = set() - try: - for path in paths: - path_str = str(path) - if path_str not in yielded: - yield path - yielded.add(path_str) - finally: - yielded.clear() - class UnsupportedOperation(NotImplementedError): """An exception that is raised when an unsupported operation is called on @@ -225,6 +121,7 @@ class PurePathBase(abc.ABC): '_resolving', ) parser = ParserBase() + _globber = Globber def __init__(self, path, *paths): self._raw_path = self.parser.join(path, *paths) if paths else path @@ -461,14 +358,6 @@ def is_absolute(self): a drive).""" return self.parser.isabs(self._raw_path) - @property - def _pattern_stack(self): - """Stack of path components, to be used with patterns in glob().""" - anchor, parts = self._stack - if anchor: - raise NotImplementedError("Non-relative patterns are unsupported") - return parts - @property def _pattern_str(self): """The path expressed as a string, for use in pattern-matching.""" @@ -494,8 +383,9 @@ def match(self, path_pattern, *, case_sensitive=None): return False if len(path_parts) > len(pattern_parts) and path_pattern.anchor: return False + globber = self._globber(sep, case_sensitive) for path_part, pattern_part in zip(path_parts, pattern_parts): - match = _compile_pattern(pattern_part, sep, case_sensitive, recursive=False) + match = globber.compile(pattern_part) if match(path_part) is None: return False return True @@ -509,7 +399,8 @@ def full_match(self, pattern, *, case_sensitive=None): pattern = self.with_segments(pattern) if case_sensitive is None: case_sensitive = _is_case_sensitive(self.parser) - match = _compile_pattern(pattern._pattern_str, pattern.parser.sep, case_sensitive) + globber = self._globber(pattern.parser.sep, case_sensitive, recursive=True) + match = globber.compile(pattern._pattern_str) return match(self._pattern_str) is not None @@ -727,7 +618,7 @@ def samefile(self, other_path): def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): """ - Open the file pointed by this path and return a file object, as + Open the file pointed to by this path and return a file object, as the built-in open() function does. """ raise UnsupportedOperation(self._unsupported_msg('open()')) @@ -773,83 +664,32 @@ def iterdir(self): """ raise UnsupportedOperation(self._unsupported_msg('iterdir()')) - def _scandir(self): - # Emulate os.scandir(), which returns an object that can be used as a - # context manager. This method is called by walk() and glob(). - from contextlib import nullcontext - return nullcontext(self.iterdir()) - - def _direntry_str(self, entry): - # Transform an entry yielded from _scandir() into a path string. - # PathBase._scandir() yields PathBase objects, so use str(). - return str(entry) - - def _make_child_direntry(self, entry): - # Transform an entry yielded from _scandir() into a path object. - # PathBase._scandir() yields PathBase objects, so this is a no-op. - return entry - - def _make_child_relpath(self, name): - return self.joinpath(name) - - def glob(self, pattern, *, case_sensitive=None, follow_symlinks=True): + def _glob_selector(self, parts, case_sensitive, recurse_symlinks): + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self.parser) + case_pedantic = False + else: + # The user has expressed a case sensitivity choice, but we don't + # know the case sensitivity of the underlying filesystem, so we + # must use scandir() for everything, including non-wildcard parts. + case_pedantic = True + recursive = True if recurse_symlinks else no_recurse_symlinks + globber = self._globber(self.parser.sep, case_sensitive, case_pedantic, recursive) + return globber.selector(parts) + + def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. """ if not isinstance(pattern, PurePathBase): pattern = self.with_segments(pattern) - if case_sensitive is None: - # TODO: evaluate case-sensitivity of each directory in _select_children(). - case_sensitive = _is_case_sensitive(self.parser) - - stack = pattern._pattern_stack - specials = ('', '.', '..') - deduplicate_paths = False - sep = self.parser.sep - paths = iter([self] if self.is_dir() else []) - while stack: - part = stack.pop() - if part in specials: - # Join special component (e.g. '..') onto paths. - paths = _select_special(paths, part) - - elif part == '**': - # Consume following '**' components, which have no effect. - while stack and stack[-1] == '**': - stack.pop() - - # Consume following non-special components, provided we're - # treating symlinks consistently. Each component is joined - # onto 'part', which is used to generate an re.Pattern object. - if follow_symlinks is not None: - while stack and stack[-1] not in specials: - part += sep + stack.pop() - - # If the previous loop consumed pattern components, compile an - # re.Pattern object based on those components. - match = _compile_pattern(part, sep, case_sensitive) if part != '**' else None - - # Recursively walk directories, filtering by type and regex. - paths = _select_recursive(paths, bool(stack), follow_symlinks, match) - - # De-duplicate if we've already seen a '**' component. - if deduplicate_paths: - paths = _select_unique(paths) - deduplicate_paths = True - - elif '**' in part: - raise ValueError("Invalid pattern: '**' can only be an entire path component") - - else: - # If the pattern component isn't '*', compile an re.Pattern - # object based on the component. - match = _compile_pattern(part, sep, case_sensitive) if part != '*' else None - - # Iterate over directories' children filtering by type and regex. - paths = _select_children(paths, bool(stack), follow_symlinks, match) - return paths + anchor, parts = pattern._stack + if anchor: + raise NotImplementedError("Non-relative patterns are unsupported") + select = self._glob_selector(parts, case_sensitive, recurse_symlinks) + return select(self) - def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=True): + def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): """Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. @@ -857,52 +697,11 @@ def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=True): if not isinstance(pattern, PurePathBase): pattern = self.with_segments(pattern) pattern = '**' / pattern - return self.glob(pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks) + return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks) def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" - paths = [self] - - while paths: - path = paths.pop() - if isinstance(path, tuple): - yield path - continue - - # We may not have read permission for self, in which case we can't - # get a list of the files the directory contains. os.walk() - # always suppressed the exception in that instance, rather than - # blow up for a minor reason when (say) a thousand readable - # directories are still left to visit. That logic is copied here. - try: - scandir_obj = path._scandir() - except OSError as error: - if on_error is not None: - on_error(error) - continue - - with scandir_obj as scandir_it: - dirnames = [] - filenames = [] - if not top_down: - paths.append((path, dirnames, filenames)) - for entry in scandir_it: - try: - is_dir = entry.is_dir(follow_symlinks=follow_symlinks) - except OSError: - # Carried over from os.path.isdir(). - is_dir = False - - if is_dir: - if not top_down: - paths.append(path._make_child_direntry(entry)) - dirnames.append(entry.name) - else: - filenames.append(entry.name) - - if top_down: - yield path, dirnames, filenames - paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + return self._globber.walk(self, top_down, on_error, follow_symlinks) def absolute(self): """Return an absolute version of this path diff --git a/pathlib_abc/_fnmatch.py b/pathlib_abc/_fnmatch.py new file mode 100644 index 0000000..77cb3e7 --- /dev/null +++ b/pathlib_abc/_fnmatch.py @@ -0,0 +1,73 @@ +import re + + +def _translate(pat, STAR, QUESTION_MARK): + res = [] + add = res.append + i, n = 0, len(pat) + while i < n: + c = pat[i] + i = i+1 + if c == '*': + # compress consecutive `*` into one + if (not res) or res[-1] is not STAR: + add(STAR) + elif c == '?': + add(QUESTION_MARK) + elif c == '[': + j = i + if j < n and pat[j] == '!': + j = j+1 + if j < n and pat[j] == ']': + j = j+1 + while j < n and pat[j] != ']': + j = j+1 + if j >= n: + add('\\[') + else: + stuff = pat[i:j] + if '-' not in stuff: + stuff = stuff.replace('\\', r'\\') + else: + chunks = [] + k = i+2 if pat[i] == '!' else i+1 + while True: + k = pat.find('-', k, j) + if k < 0: + break + chunks.append(pat[i:k]) + i = k+1 + k = k+3 + chunk = pat[i:j] + if chunk: + chunks.append(chunk) + else: + chunks[-1] += '-' + # Remove empty ranges -- invalid in RE. + for k in range(len(chunks)-1, 0, -1): + if chunks[k-1][-1] > chunks[k][0]: + chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] + del chunks[k] + # Escape backslashes and hyphens for set difference (--). + # Hyphens that create ranges shouldn't be escaped. + stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') + for s in chunks) + # Escape set operations (&&, ~~ and ||). + stuff = re.sub(r'([&~|])', r'\\\1', stuff) + i = j+1 + if not stuff: + # Empty range: never match. + add('(?!)') + elif stuff == '!': + # Negated empty range: match any character. + add('.') + else: + if stuff[0] == '!': + stuff = '^' + stuff[1:] + elif stuff[0] in ('^', '['): + stuff = '\\' + stuff + add(f'[{stuff}]') + else: + add(re.escape(c)) + assert i == n + return res diff --git a/pathlib_abc/_glob.py b/pathlib_abc/_glob.py index ef8e004..0c89c85 100644 --- a/pathlib_abc/_glob.py +++ b/pathlib_abc/_glob.py @@ -1,85 +1,21 @@ import os import re +from . import _fnmatch as fnmatch +import functools +import operator -def _translate(pat, STAR, QUESTION_MARK): - res = [] - add = res.append - i, n = 0, len(pat) - while i < n: - c = pat[i] - i = i+1 - if c == '*': - # compress consecutive `*` into one - if (not res) or res[-1] is not STAR: - add(STAR) - elif c == '?': - add(QUESTION_MARK) - elif c == '[': - j = i - if j < n and pat[j] == '!': - j = j+1 - if j < n and pat[j] == ']': - j = j+1 - while j < n and pat[j] != ']': - j = j+1 - if j >= n: - add('\\[') - else: - stuff = pat[i:j] - if '-' not in stuff: - stuff = stuff.replace('\\', r'\\') - else: - chunks = [] - k = i+2 if pat[i] == '!' else i+1 - while True: - k = pat.find('-', k, j) - if k < 0: - break - chunks.append(pat[i:k]) - i = k+1 - k = k+3 - chunk = pat[i:j] - if chunk: - chunks.append(chunk) - else: - chunks[-1] += '-' - # Remove empty ranges -- invalid in RE. - for k in range(len(chunks)-1, 0, -1): - if chunks[k-1][-1] > chunks[k][0]: - chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] - del chunks[k] - # Escape backslashes and hyphens for set difference (--). - # Hyphens that create ranges shouldn't be escaped. - stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') - for s in chunks) - # Escape set operations (&&, ~~ and ||). - stuff = re.sub(r'([&~|])', r'\\\1', stuff) - i = j+1 - if not stuff: - # Empty range: never match. - add('(?!)') - elif stuff == '!': - # Negated empty range: match any character. - add('.') - else: - if stuff[0] == '!': - stuff = '^' + stuff[1:] - elif stuff[0] in ('^', '['): - stuff = '\\' + stuff - add(f'[{stuff}]') - else: - add(re.escape(c)) - assert i == n - return res +special_parts = ('', '.', '..') +magic_check = re.compile('([*?[])') +magic_check_bytes = re.compile(b'([*?[])') +no_recurse_symlinks = object() def translate(pat, *, recursive=False, include_hidden=False, seps=None): """Translate a pathname with shell wildcards to a regular expression. If `recursive` is true, the pattern segment '**' will match any number of - path segments; if '**' appears outside its own segment, ValueError will be - raised. + path segments. If `include_hidden` is true, wildcards can match path segments beginning with a dot ('.'). @@ -113,22 +49,282 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): for idx, part in enumerate(parts): if part == '*': results.append(one_segment if idx < last_part_idx else one_last_segment) - continue - if recursive: - if part == '**': - if idx < last_part_idx: - if parts[idx + 1] != '**': - results.append(any_segments) - else: - results.append(any_last_segments) - continue - elif '**' in part: - raise ValueError("Invalid pattern: '**' can only be an entire path component") - if part: - if not include_hidden and part[0] in '*?': - results.append(r'(?!\.)') - results.extend(_translate(part, f'{not_sep}*', not_sep)) - if idx < last_part_idx: - results.append(any_sep) + elif recursive and part == '**': + if idx < last_part_idx: + if parts[idx + 1] != '**': + results.append(any_segments) + else: + results.append(any_last_segments) + else: + if part: + if not include_hidden and part[0] in '*?': + results.append(r'(?!\.)') + results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)) + if idx < last_part_idx: + results.append(any_sep) res = ''.join(results) return fr'(?s:{res})\Z' + + +@functools.lru_cache(maxsize=512) +def compile_pattern(pat, sep, case_sensitive, recursive=True): + """Compile given glob pattern to a re.Pattern object (observing case + sensitivity).""" + flags = 0 if case_sensitive else re.IGNORECASE + regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) + return re.compile(regex, flags=flags).match + + +class Globber: + """Class providing shell-style pattern matching and globbing. + """ + + def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): + self.sep = sep + self.case_sensitive = case_sensitive + self.case_pedantic = case_pedantic + self.recursive = recursive + + # Low-level methods + + lstat = operator.methodcaller('lstat') + add_slash = operator.methodcaller('joinpath', '') + + @staticmethod + def scandir(path): + """Emulates os.scandir(), which returns an object that can be used as + a context manager. This method is called by walk() and glob(). + """ + from contextlib import nullcontext + return nullcontext(path.iterdir()) + + @staticmethod + def concat_path(path, text): + """Appends text to the given path. + """ + return path.with_segments(path._raw_path + text) + + @staticmethod + def parse_entry(entry): + """Returns the path of an entry yielded from scandir(). + """ + return entry + + # High-level methods + + def compile(self, pat): + return compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) + + def selector(self, parts): + """Returns a function that selects from a given path, walking and + filtering according to the glob-style pattern parts in *parts*. + """ + if not parts: + return self.select_exists + part = parts.pop() + if self.recursive and part == '**': + selector = self.recursive_selector + elif part in special_parts: + selector = self.special_selector + elif not self.case_pedantic and magic_check.search(part) is None: + selector = self.literal_selector + else: + selector = self.wildcard_selector + return selector(part, parts) + + def special_selector(self, part, parts): + """Returns a function that selects special children of the given path. + """ + select_next = self.selector(parts) + + def select_special(path, exists=False): + path = self.concat_path(self.add_slash(path), part) + return select_next(path, exists) + return select_special + + def literal_selector(self, part, parts): + """Returns a function that selects a literal descendant of a path. + """ + + # Optimization: consume and join any subsequent literal parts here, + # rather than leaving them for the next selector. This reduces the + # number of string concatenation operations and calls to add_slash(). + while parts and magic_check.search(parts[-1]) is None: + part += self.sep + parts.pop() + + select_next = self.selector(parts) + + def select_literal(path, exists=False): + path = self.concat_path(self.add_slash(path), part) + return select_next(path, exists=False) + return select_literal + + def wildcard_selector(self, part, parts): + """Returns a function that selects direct children of a given path, + filtering by pattern. + """ + + match = None if part == '*' else self.compile(part) + dir_only = bool(parts) + if dir_only: + select_next = self.selector(parts) + + def select_wildcard(path, exists=False): + try: + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with self.scandir(path) as scandir_it: + entries = list(scandir_it) + except OSError: + pass + else: + for entry in entries: + if match is None or match(entry.name): + if dir_only: + try: + if not entry.is_dir(): + continue + except OSError: + continue + entry_path = self.parse_entry(entry) + if dir_only: + yield from select_next(entry_path, exists=True) + else: + yield entry_path + return select_wildcard + + def recursive_selector(self, part, parts): + """Returns a function that selects a given path and all its children, + recursively, filtering by pattern. + """ + # Optimization: consume following '**' parts, which have no effect. + while parts and parts[-1] == '**': + parts.pop() + + # Optimization: consume and join any following non-special parts here, + # rather than leaving them for the next selector. They're used to + # build a regular expression, which we use to filter the results of + # the recursive walk. As a result, non-special pattern segments + # following a '**' wildcard don't require additional filesystem access + # to expand. + follow_symlinks = self.recursive is not no_recurse_symlinks + if follow_symlinks: + while parts and parts[-1] not in special_parts: + part += self.sep + parts.pop() + + match = None if part == '**' else self.compile(part) + dir_only = bool(parts) + select_next = self.selector(parts) + + def select_recursive(path, exists=False): + path = self.add_slash(path) + match_pos = len(str(path)) + if match is None or match(str(path), match_pos): + yield from select_next(path, exists) + stack = [path] + while stack: + yield from select_recursive_step(stack, match_pos) + + def select_recursive_step(stack, match_pos): + path = stack.pop() + try: + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with self.scandir(path) as scandir_it: + entries = list(scandir_it) + except OSError: + pass + else: + for entry in entries: + is_dir = False + try: + if entry.is_dir(follow_symlinks=follow_symlinks): + is_dir = True + except OSError: + pass + + if is_dir or not dir_only: + entry_path = self.parse_entry(entry) + if match is None or match(str(entry_path), match_pos): + if dir_only: + yield from select_next(entry_path, exists=True) + else: + # Optimization: directly yield the path if this is + # last pattern part. + yield entry_path + if is_dir: + stack.append(entry_path) + + return select_recursive + + def select_exists(self, path, exists=False): + """Yields the given path, if it exists. + """ + if exists: + # Optimization: this path is already known to exist, e.g. because + # it was returned from os.scandir(), so we skip calling lstat(). + yield path + else: + try: + self.lstat(path) + yield path + except OSError: + pass + + @classmethod + def walk(cls, root, top_down, on_error, follow_symlinks): + """Walk the directory tree from the given root, similar to os.walk(). + """ + paths = [root] + while paths: + path = paths.pop() + if isinstance(path, tuple): + yield path + continue + try: + with cls.scandir(path) as scandir_it: + dirnames = [] + filenames = [] + if not top_down: + paths.append((path, dirnames, filenames)) + for entry in scandir_it: + name = entry.name + try: + if entry.is_dir(follow_symlinks=follow_symlinks): + if not top_down: + paths.append(cls.parse_entry(entry)) + dirnames.append(name) + else: + filenames.append(name) + except OSError: + filenames.append(name) + except OSError as error: + if on_error is not None: + on_error(error) + else: + if top_down: + yield path, dirnames, filenames + if dirnames: + prefix = cls.add_slash(path) + paths += [cls.concat_path(prefix, d) for d in reversed(dirnames)] + + +class StringGlobber(Globber): + lstat = staticmethod(os.lstat) + scandir = staticmethod(os.scandir) + parse_entry = operator.attrgetter('path') + concat_path = operator.add + + if os.name == 'nt': + @staticmethod + def add_slash(pathname): + tail = os.path.splitroot(pathname)[2] + if not tail or tail[-1] in '\\/': + return pathname + return f'{pathname}\\' + else: + @staticmethod + def add_slash(pathname): + if not pathname or pathname[-1] == '/': + return pathname + return f'{pathname}/' diff --git a/tests/test_pathlib_abc.py b/tests/test_pathlib_abc.py index d0232e1..b52a3f0 100644 --- a/tests/test_pathlib_abc.py +++ b/tests/test_pathlib_abc.py @@ -9,6 +9,7 @@ import posixpath TESTFN = "TESTFN" +is_wasi = False _tests_needing_posix = set() @@ -1429,10 +1430,10 @@ def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) def stat(self, *, follow_symlinks=True): - if follow_symlinks: - path = str(self.resolve()) + if follow_symlinks or self.name in ('', '.', '..'): + path = str(self.resolve(strict=True)) else: - path = str(self.parent.resolve() / self.name) + path = str(self.parent.resolve(strict=True) / self.name) if path in self._files: st_mode = stat.S_IFREG elif path in self._directories: @@ -1741,8 +1742,9 @@ def _check(glob, expected): def test_glob_posix(self): P = self.cls p = P(self.base) + q = p / "FILEa" given = set(p.glob("FILEa")) - expect = set() + expect = {q} if q.exists() else set() self.assertEqual(given, expect) self.assertEqual(set(p.glob("FILEa*")), set()) @@ -1753,8 +1755,6 @@ def test_glob_windows(self): self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") }) self.assertEqual(set(p.glob("*a\\")), { P(self.base, "dirA/") }) self.assertEqual(set(p.glob("F*a")), { P(self.base, "fileA") }) - self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) - self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) def test_glob_empty_pattern(self): P = self.cls @@ -1776,9 +1776,9 @@ def _check(path, pattern, case_sensitive, expected): _check(path, "dirb/file*", False, ["dirB/fileB"]) @needs_symlinks - def test_glob_follow_symlinks_common(self): + def test_glob_recurse_symlinks_common(self): def _check(path, glob, expected): - actual = {path for path in path.glob(glob, follow_symlinks=True) + actual = {path for path in path.glob(glob, recurse_symlinks=True) if path.parts.count("linkD") <= 1} # exclude symlink loop. self.assertEqual(actual, { P(self.base, q) for q in expected }) P = self.cls @@ -1812,39 +1812,9 @@ def _check(path, glob, expected): _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) _check(p, "*/dirD/**/", ["dirC/dirD/"]) - @needs_symlinks - def test_glob_no_follow_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.glob(glob, follow_symlinks=False)} - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", []) - _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check(p, "*A", ["dirA", "fileA", "linkA"]) - _check(p, "*B/*", ["dirB/fileB", "dirB/linkD"]) - _check(p, "*/fileB", ["dirB/fileB"]) - _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/"]) - _check(p, "dir*/*/..", ["dirC/dirD/.."]) - _check(p, "dir*/**", [ - "dirA/", "dirA/linkC", - "dirB/", "dirB/fileB", "dirB/linkD", - "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", - "dirE/"]) - _check(p, "dir*/**/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) - _check(p, "dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check(p, "dir*/*/**", ["dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "dir*/*/**/", ["dirC/dirD/"]) - _check(p, "dir*/*/**/..", ["dirC/dirD/.."]) - _check(p, "dir*/**/fileC", ["dirC/fileC"]) - _check(p, "dir*/*/../dirD/**", ["dirC/dirD/../dirD/", "dirC/dirD/../dirD/fileD"]) - _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) - _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "*/dirD/**/", ["dirC/dirD/"]) - - def test_rglob_follow_symlinks_none(self): + def test_rglob_recurse_symlinks_false(self): def _check(path, glob, expected): - actual = set(path.rglob(glob, follow_symlinks=None)) + actual = set(path.rglob(glob, recurse_symlinks=False)) self.assertEqual(actual, { P(self.base, q) for q in expected }) P = self.cls p = P(self.base) @@ -1887,8 +1857,9 @@ def _check(path, glob, expected): def test_rglob_posix(self): P = self.cls p = P(self.base, "dirC") + q = p / "dirD" / "FILEd" given = set(p.rglob("FILEd")) - expect = set() + expect = {q} if q.exists() else set() self.assertEqual(given, expect) self.assertEqual(set(p.rglob("FILEd*")), set()) @@ -1898,12 +1869,11 @@ def test_rglob_windows(self): p = P(self.base, "dirC") self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) - self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) @needs_symlinks - def test_rglob_follow_symlinks_common(self): + def test_rglob_recurse_symlinks_common(self): def _check(path, glob, expected): - actual = {path for path in path.rglob(glob, follow_symlinks=True) + actual = {path for path in path.rglob(glob, recurse_symlinks=True) if path.parts.count("linkD") <= 1} # exclude symlink loop. self.assertEqual(actual, { P(self.base, q) for q in expected }) P = self.cls @@ -1932,37 +1902,12 @@ def _check(path, glob, expected): _check(p, "*.txt", ["dirC/novel.txt"]) _check(p, "*.*", ["dirC/novel.txt"]) - @needs_symlinks - def test_rglob_no_follow_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.rglob(glob, follow_symlinks=False)} - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", ["dirB/fileB"]) - _check(p, "*/fileA", []) - _check(p, "*/fileB", ["dirB/fileB"]) - _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ]) - _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) - _check(p, "", ["", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) - - p = P(self.base, "dirC") - _check(p, "*", ["dirC/fileC", "dirC/novel.txt", - "dirC/dirD", "dirC/dirD/fileD"]) - _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) - _check(p, "*/*", ["dirC/dirD/fileD"]) - _check(p, "*/", ["dirC/dirD/"]) - _check(p, "", ["dirC/", "dirC/dirD/"]) - # gh-91616, a re module regression - _check(p, "*.txt", ["dirC/novel.txt"]) - _check(p, "*.*", ["dirC/novel.txt"]) - @needs_symlinks def test_rglob_symlink_loop(self): # Don't get fooled by symlink loops (Issue #26012). P = self.cls p = P(self.base) - given = set(p.rglob('*', follow_symlinks=None)) + given = set(p.rglob('*', recurse_symlinks=False)) expect = {'brokenLink', 'dirA', 'dirA/linkC', 'dirB', 'dirB/fileB', 'dirB/linkD', @@ -1976,6 +1921,8 @@ def test_rglob_symlink_loop(self): } self.assertEqual(given, {p / x for x in expect}) + # See https://github.com/WebAssembly/wasi-filesystem/issues/26 + @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") def test_glob_dotdot(self): # ".." is not special in globs. P = self.cls @@ -1986,7 +1933,11 @@ def test_glob_dotdot(self): self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") }) self.assertEqual(set(p.glob("dirA/../file*/..")), set()) self.assertEqual(set(p.glob("../xyzzy")), set()) - self.assertEqual(set(p.glob("xyzzy/..")), set()) + if self.cls.parser is posixpath: + self.assertEqual(set(p.glob("xyzzy/..")), set()) + else: + # ".." segments are normalized first on Windows, so this path is stat()able. + self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) @needs_symlinks