Skip to content

Commit

Permalink
Sync from upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
barneygale committed May 6, 2024
1 parent c56cae6 commit a861589
Show file tree
Hide file tree
Showing 5 changed files with 414 additions and 395 deletions.
2 changes: 1 addition & 1 deletion cpython
Submodule cpython updated 1185 files
267 changes: 33 additions & 234 deletions pathlib_abc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO

from ._glob import Globber, no_recurse_symlinks


__all__ = ["UnsupportedOperation"]

#
# Internals
#
Expand All @@ -42,115 +47,6 @@ def _ignore_error(exception):
def _is_case_sensitive(parser):
return parser.normcase('Aa') == 'Aa'

#
# Globbing helpers
#

re = glob = None


@functools.lru_cache(maxsize=512)
def _compile_pattern(pat, sep, case_sensitive, recursive=True):
"""Compile given glob pattern to a re.Pattern object (observing case
sensitivity)."""
global re, glob
if re is None:
import re
from . import _glob as glob

flags = 0 if case_sensitive else re.IGNORECASE
regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep)
return re.compile(regex, flags=flags).match


def _select_special(paths, part):
"""Yield special literal children of the given paths."""
for path in paths:
yield path._make_child_relpath(part)


def _select_children(parent_paths, dir_only, follow_symlinks, match):
"""Yield direct children of given paths, filtering by name and type."""
if follow_symlinks is None:
follow_symlinks = True
for parent_path in parent_paths:
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
with parent_path._scandir() as scandir_it:
entries = list(scandir_it)
except OSError:
pass
else:
for entry in entries:
if dir_only:
try:
if not entry.is_dir(follow_symlinks=follow_symlinks):
continue
except OSError:
continue
# Avoid cost of making a path object for non-matching paths by
# matching against the os.DirEntry.name string.
if match is None or match(entry.name):
yield parent_path._make_child_direntry(entry)


def _select_recursive(parent_paths, dir_only, follow_symlinks, match):
"""Yield given paths and all their children, recursively, filtering by
string and type.
"""
if follow_symlinks is None:
follow_symlinks = False
for parent_path in parent_paths:
if match is not None:
# If we're filtering paths through a regex, record the length of
# the parent path. We'll pass it to match(path, pos=...) later.
parent_len = len(str(parent_path._make_child_relpath('_'))) - 1
paths = [parent_path._make_child_relpath('')]
while paths:
path = paths.pop()
if match is None or match(str(path), parent_len):
# Yield *directory* path that matches pattern (if any).
yield path
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
with path._scandir() as scandir_it:
entries = list(scandir_it)
except OSError:
pass
else:
for entry in entries:
# Handle directory entry.
try:
if entry.is_dir(follow_symlinks=follow_symlinks):
# Recurse into this directory.
paths.append(path._make_child_direntry(entry))
continue
except OSError:
pass

# Handle file entry.
if not dir_only:
# Avoid cost of making a path object for non-matching
# files by matching against the os.DirEntry object.
if match is None or match(path._direntry_str(entry), parent_len):
# Yield *file* path that matches pattern (if any).
yield path._make_child_direntry(entry)


def _select_unique(paths):
"""Yields the given paths, filtering out duplicates."""
yielded = set()
try:
for path in paths:
path_str = str(path)
if path_str not in yielded:
yield path
yielded.add(path_str)
finally:
yielded.clear()


class UnsupportedOperation(NotImplementedError):
"""An exception that is raised when an unsupported operation is called on
Expand Down Expand Up @@ -225,6 +121,7 @@ class PurePathBase(abc.ABC):
'_resolving',
)
parser = ParserBase()
_globber = Globber

def __init__(self, path, *paths):
self._raw_path = self.parser.join(path, *paths) if paths else path
Expand Down Expand Up @@ -461,14 +358,6 @@ def is_absolute(self):
a drive)."""
return self.parser.isabs(self._raw_path)

@property
def _pattern_stack(self):
"""Stack of path components, to be used with patterns in glob()."""
anchor, parts = self._stack
if anchor:
raise NotImplementedError("Non-relative patterns are unsupported")
return parts

@property
def _pattern_str(self):
"""The path expressed as a string, for use in pattern-matching."""
Expand All @@ -494,8 +383,9 @@ def match(self, path_pattern, *, case_sensitive=None):
return False
if len(path_parts) > len(pattern_parts) and path_pattern.anchor:
return False
globber = self._globber(sep, case_sensitive)
for path_part, pattern_part in zip(path_parts, pattern_parts):
match = _compile_pattern(pattern_part, sep, case_sensitive, recursive=False)
match = globber.compile(pattern_part)
if match(path_part) is None:
return False
return True
Expand All @@ -509,7 +399,8 @@ def full_match(self, pattern, *, case_sensitive=None):
pattern = self.with_segments(pattern)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.parser)
match = _compile_pattern(pattern._pattern_str, pattern.parser.sep, case_sensitive)
globber = self._globber(pattern.parser.sep, case_sensitive, recursive=True)
match = globber.compile(pattern._pattern_str)
return match(self._pattern_str) is not None


Expand Down Expand Up @@ -727,7 +618,7 @@ def samefile(self, other_path):
def open(self, mode='r', buffering=-1, encoding=None,
errors=None, newline=None):
"""
Open the file pointed by this path and return a file object, as
Open the file pointed to by this path and return a file object, as
the built-in open() function does.
"""
raise UnsupportedOperation(self._unsupported_msg('open()'))
Expand Down Expand Up @@ -773,136 +664,44 @@ def iterdir(self):
"""
raise UnsupportedOperation(self._unsupported_msg('iterdir()'))

def _scandir(self):
# Emulate os.scandir(), which returns an object that can be used as a
# context manager. This method is called by walk() and glob().
from contextlib import nullcontext
return nullcontext(self.iterdir())

def _direntry_str(self, entry):
# Transform an entry yielded from _scandir() into a path string.
# PathBase._scandir() yields PathBase objects, so use str().
return str(entry)

def _make_child_direntry(self, entry):
# Transform an entry yielded from _scandir() into a path object.
# PathBase._scandir() yields PathBase objects, so this is a no-op.
return entry

def _make_child_relpath(self, name):
return self.joinpath(name)

def glob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.parser)
case_pedantic = False
else:
# The user has expressed a case sensitivity choice, but we don't
# know the case sensitivity of the underlying filesystem, so we
# must use scandir() for everything, including non-wildcard parts.
case_pedantic = True
recursive = True if recurse_symlinks else no_recurse_symlinks
globber = self._globber(self.parser.sep, case_sensitive, case_pedantic, recursive)
return globber.selector(parts)

def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
if not isinstance(pattern, PurePathBase):
pattern = self.with_segments(pattern)
if case_sensitive is None:
# TODO: evaluate case-sensitivity of each directory in _select_children().
case_sensitive = _is_case_sensitive(self.parser)

stack = pattern._pattern_stack
specials = ('', '.', '..')
deduplicate_paths = False
sep = self.parser.sep
paths = iter([self] if self.is_dir() else [])
while stack:
part = stack.pop()
if part in specials:
# Join special component (e.g. '..') onto paths.
paths = _select_special(paths, part)

elif part == '**':
# Consume following '**' components, which have no effect.
while stack and stack[-1] == '**':
stack.pop()

# Consume following non-special components, provided we're
# treating symlinks consistently. Each component is joined
# onto 'part', which is used to generate an re.Pattern object.
if follow_symlinks is not None:
while stack and stack[-1] not in specials:
part += sep + stack.pop()

# If the previous loop consumed pattern components, compile an
# re.Pattern object based on those components.
match = _compile_pattern(part, sep, case_sensitive) if part != '**' else None

# Recursively walk directories, filtering by type and regex.
paths = _select_recursive(paths, bool(stack), follow_symlinks, match)

# De-duplicate if we've already seen a '**' component.
if deduplicate_paths:
paths = _select_unique(paths)
deduplicate_paths = True

elif '**' in part:
raise ValueError("Invalid pattern: '**' can only be an entire path component")

else:
# If the pattern component isn't '*', compile an re.Pattern
# object based on the component.
match = _compile_pattern(part, sep, case_sensitive) if part != '*' else None

# Iterate over directories' children filtering by type and regex.
paths = _select_children(paths, bool(stack), follow_symlinks, match)
return paths
anchor, parts = pattern._stack
if anchor:
raise NotImplementedError("Non-relative patterns are unsupported")
select = self._glob_selector(parts, case_sensitive, recurse_symlinks)
return select(self)

def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=True):
def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
"""
if not isinstance(pattern, PurePathBase):
pattern = self.with_segments(pattern)
pattern = '**' / pattern
return self.glob(pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks)
return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)

def walk(self, top_down=True, on_error=None, follow_symlinks=False):
"""Walk the directory tree from this directory, similar to os.walk()."""
paths = [self]

while paths:
path = paths.pop()
if isinstance(path, tuple):
yield path
continue

# We may not have read permission for self, in which case we can't
# get a list of the files the directory contains. os.walk()
# always suppressed the exception in that instance, rather than
# blow up for a minor reason when (say) a thousand readable
# directories are still left to visit. That logic is copied here.
try:
scandir_obj = path._scandir()
except OSError as error:
if on_error is not None:
on_error(error)
continue

with scandir_obj as scandir_it:
dirnames = []
filenames = []
if not top_down:
paths.append((path, dirnames, filenames))
for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
except OSError:
# Carried over from os.path.isdir().
is_dir = False

if is_dir:
if not top_down:
paths.append(path._make_child_direntry(entry))
dirnames.append(entry.name)
else:
filenames.append(entry.name)

if top_down:
yield path, dirnames, filenames
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
return self._globber.walk(self, top_down, on_error, follow_symlinks)

def absolute(self):
"""Return an absolute version of this path
Expand Down
Loading

0 comments on commit a861589

Please sign in to comment.