Skip to content

Commit

Permalink
Sync from upstream.
Browse files Browse the repository at this point in the history
  • Loading branch information
barneygale committed Jan 27, 2024
1 parent 7386535 commit 7748b5b
Show file tree
Hide file tree
Showing 3 changed files with 842 additions and 129 deletions.
2 changes: 1 addition & 1 deletion cpython
Submodule cpython updated 690 files
152 changes: 87 additions & 65 deletions pathlib_abc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ def _is_case_sensitive(pathmod):
re = glob = None


@functools.lru_cache(maxsize=256)
def _compile_pattern(pat, sep, case_sensitive):
@functools.lru_cache(maxsize=512)
def _compile_pattern(pat, sep, case_sensitive, recursive=True):
"""Compile given glob pattern to a re.Pattern object (observing case
sensitivity)."""
global re, glob
Expand All @@ -59,10 +59,16 @@ def _compile_pattern(pat, sep, case_sensitive):
from . import _glob as glob

flags = 0 if case_sensitive else re.IGNORECASE
regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep)
regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep)
return re.compile(regex, flags=flags).match


def _select_special(paths, part):
"""Yield special literal children of the given paths."""
for path in paths:
yield path._make_child_relpath(part)


def _select_children(parent_paths, dir_only, follow_symlinks, match):
"""Yield direct children of given paths, filtering by name and type."""
if follow_symlinks is None:
Expand All @@ -84,7 +90,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
except OSError:
continue
if match(entry.name):
yield parent_path._make_child_entry(entry, dir_only)
yield parent_path._make_child_entry(entry)


def _select_recursive(parent_paths, dir_only, follow_symlinks):
Expand All @@ -107,7 +113,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks):
for entry in entries:
try:
if entry.is_dir(follow_symlinks=follow_symlinks):
paths.append(path._make_child_entry(entry, dir_only))
paths.append(path._make_child_entry(entry))
continue
except OSError:
pass
Expand Down Expand Up @@ -427,24 +433,56 @@ def is_absolute(self):
a drive)."""
return self.pathmod.isabs(self._raw_path)

@property
def _pattern_stack(self):
"""Stack of path components, to be used with patterns in glob()."""
anchor, parts = self._stack
if anchor:
raise NotImplementedError("Non-relative patterns are unsupported")
return parts

@property
def _pattern_str(self):
"""The path expressed as a string, for use in pattern-matching."""
return str(self)

def match(self, path_pattern, *, case_sensitive=None):
"""
Return True if this path matches the given pattern.
Return True if this path matches the given pattern. If the pattern is
relative, matching is done from the right; otherwise, the entire path
is matched. The recursive wildcard '**' is *not* supported by this
method.
"""
if not isinstance(path_pattern, PurePathBase):
path_pattern = self.with_segments(path_pattern)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.pathmod)
sep = path_pattern.pathmod.sep
pattern_str = str(path_pattern)
if path_pattern.anchor:
pass
elif path_pattern.parts:
pattern_str = f'**{sep}{pattern_str}'
else:
path_parts = self.parts[::-1]
pattern_parts = path_pattern.parts[::-1]
if not pattern_parts:
raise ValueError("empty pattern")
match = _compile_pattern(pattern_str, sep, case_sensitive)
return match(str(self)) is not None
if len(path_parts) < len(pattern_parts):
return False
if len(path_parts) > len(pattern_parts) and path_pattern.anchor:
return False
for path_part, pattern_part in zip(path_parts, pattern_parts):
match = _compile_pattern(pattern_part, sep, case_sensitive, recursive=False)
if match(path_part) is None:
return False
return True

def full_match(self, pattern, *, case_sensitive=None):
"""
Return True if this path matches the given glob-style pattern. The
pattern is matched against the entire path.
"""
if not isinstance(pattern, PurePathBase):
pattern = self.with_segments(pattern)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.pathmod)
match = _compile_pattern(pattern._pattern_str, pattern.pathmod.sep, case_sensitive)
return match(self._pattern_str) is not None



Expand Down Expand Up @@ -714,10 +752,8 @@ def _scandir(self):
from contextlib import nullcontext
return nullcontext(self.iterdir())

def _make_child_entry(self, entry, is_dir=False):
def _make_child_entry(self, entry):
# Transform an entry yielded from _scandir() into a path object.
if is_dir:
return entry.joinpath('')
return entry

def _make_child_relpath(self, name):
Expand All @@ -727,57 +763,35 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
path_pattern = self.with_segments(pattern)
if path_pattern.anchor:
raise NotImplementedError("Non-relative patterns are unsupported")
elif not path_pattern.parts:
raise ValueError("Unacceptable pattern: {!r}".format(pattern))

pattern_parts = list(path_pattern.parts)
if not self.pathmod.split(pattern)[1]:
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
pattern_parts.append('')

if not isinstance(pattern, PurePathBase):
pattern = self.with_segments(pattern)
if case_sensitive is None:
# TODO: evaluate case-sensitivity of each directory in _select_children().
case_sensitive = _is_case_sensitive(self.pathmod)

# If symlinks are handled consistently, and the pattern does not
# contain '..' components, then we can use a 'walk-and-match' strategy
# when expanding '**' wildcards. When a '**' wildcard is encountered,
# all following pattern parts are immediately consumed and used to
# build a `re.Pattern` object. This pattern is used to filter the
# recursive walk. As a result, pattern parts following a '**' wildcard
# do not perform any filesystem access, which can be much faster!
filter_paths = follow_symlinks is not None and '..' not in pattern_parts
stack = pattern._pattern_stack
specials = ('', '.', '..')
filter_paths = False
deduplicate_paths = False
sep = self.pathmod.sep
paths = iter([self.joinpath('')] if self.is_dir() else [])
part_idx = 0
while part_idx < len(pattern_parts):
part = pattern_parts[part_idx]
part_idx += 1
if part == '':
# Trailing slash.
pass
elif part == '..':
paths = (path._make_child_relpath('..') for path in paths)
paths = iter([self] if self.is_dir() else [])
while stack:
part = stack.pop()
if part in specials:
paths = _select_special(paths, part)
elif part == '**':
# Consume adjacent '**' components.
while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**':
part_idx += 1
while stack and stack[-1] == '**':
stack.pop()

if filter_paths and part_idx < len(pattern_parts) and pattern_parts[part_idx] != '':
dir_only = pattern_parts[-1] == ''
paths = _select_recursive(paths, dir_only, follow_symlinks)
# Consume adjacent non-special components and enable post-walk
# regex filtering, provided we're treating symlinks consistently.
if follow_symlinks is not None:
while stack and stack[-1] not in specials:
filter_paths = True
stack.pop()

# Filter out paths that don't match pattern.
prefix_len = len(str(self._make_child_relpath('_'))) - 1
match = _compile_pattern(str(path_pattern), sep, case_sensitive)
paths = (path for path in paths if match(str(path), prefix_len))
return paths

dir_only = part_idx < len(pattern_parts)
dir_only = bool(stack)
paths = _select_recursive(paths, dir_only, follow_symlinks)
if deduplicate_paths:
# De-duplicate if we've already seen a '**' component.
Expand All @@ -786,18 +800,25 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
elif '**' in part:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
else:
dir_only = part_idx < len(pattern_parts)
dir_only = bool(stack)
match = _compile_pattern(part, sep, case_sensitive)
paths = _select_children(paths, dir_only, follow_symlinks, match)
if filter_paths:
# Filter out paths that don't match pattern.
prefix_len = len(str(self._make_child_relpath('_'))) - 1
match = _compile_pattern(pattern._pattern_str, sep, case_sensitive)
paths = (path for path in paths if match(path._pattern_str, prefix_len))
return paths

def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
"""
return self.glob(
f'**/{pattern}', case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
if not isinstance(pattern, PurePathBase):
pattern = self.with_segments(pattern)
pattern = '**' / pattern
return self.glob(pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)

def walk(self, top_down=True, on_error=None, follow_symlinks=False):
"""Walk the directory tree from this directory, similar to os.walk()."""
Expand All @@ -824,6 +845,8 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False):
with scandir_obj as scandir_it:
dirnames = []
filenames = []
if not top_down:
paths.append((path, dirnames, filenames))
for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
Expand All @@ -832,16 +855,15 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False):
is_dir = False

if is_dir:
if not top_down:
paths.append(path._make_child_entry(entry))
dirnames.append(entry.name)
else:
filenames.append(entry.name)

if top_down:
yield path, dirnames, filenames
else:
paths.append((path, dirnames, filenames))

paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]

def absolute(self):
"""Return an absolute version of this path
Expand Down
Loading

0 comments on commit 7748b5b

Please sign in to comment.