Skip to content

Commit

Permalink
pythonGH-101362: Omit path anchor from pathlib.PurePath()._parts
Browse files Browse the repository at this point in the history
Improve performance of path construction by skipping the addition of the
path anchor (`drive + root`) to the internal `_parts` list. This change
allows us to simplify the implementations of `joinpath()`, `name`,
`parent`, and `parents` a little. The public `parts` tuple is unaffected.
  • Loading branch information
barneygale committed Mar 6, 2023
1 parent 6716254 commit 8bf4600
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 52 deletions.
40 changes: 20 additions & 20 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,7 @@ def __init__(self, path):
self._parts = path._parts

def __len__(self):
if self._drv or self._root:
return len(self._parts) - 1
else:
return len(self._parts)
return len(self._parts)

def __getitem__(self, idx):
if isinstance(idx, slice):
Expand Down Expand Up @@ -269,7 +266,7 @@ def __new__(cls, *args):
def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
return (self.__class__, tuple(self._parts))
return (self.__class__, self.parts)

@classmethod
def _parse_parts(cls, parts):
Expand All @@ -295,8 +292,7 @@ def _parse_parts(cls, parts):
if drv.startswith(sep):
# pathlib assumes that UNC paths always have a root.
root = sep
unfiltered_parsed = [drv + root] + rel.split(sep)
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
parsed = [sys.intern(x) for x in rel.split(sep) if x and x != '.']
return drv, root, parsed

@classmethod
Expand All @@ -318,10 +314,11 @@ def _from_parsed_parts(cls, drv, root, parts):

@classmethod
def _format_parsed_parts(cls, drv, root, parts):
tail = cls._flavour.sep.join(parts)
if drv or root:
return drv + root + cls._flavour.sep.join(parts[1:])
return f'{drv}{root}{tail}'
else:
return cls._flavour.sep.join(parts)
return tail

def __str__(self):
"""Return the string representation of the path, suitable for
Expand Down Expand Up @@ -376,7 +373,7 @@ def _parts_normcase(self):
try:
return self._parts_normcase_cached
except AttributeError:
self._parts_normcase_cached = [self._flavour.normcase(p) for p in self._parts]
self._parts_normcase_cached = [self._flavour.normcase(p) for p in self.parts]
return self._parts_normcase_cached

def __eq__(self, other):
Expand Down Expand Up @@ -427,7 +424,7 @@ def anchor(self):
def name(self):
"""The final path component, if any."""
parts = self._parts
if len(parts) == (1 if (self._drv or self._root) else 0):
if not parts:
return ''
return parts[-1]

Expand Down Expand Up @@ -551,7 +548,10 @@ def parts(self):
try:
return self._parts_tuple
except AttributeError:
self._parts_tuple = tuple(self._parts)
if self._drv or self._root:
self._parts_tuple = (self._drv + self._root,) + tuple(self._parts)
else:
self._parts_tuple = tuple(self._parts)
return self._parts_tuple

def joinpath(self, *args):
Expand All @@ -564,13 +564,13 @@ def joinpath(self, *args):
drv2, root2, parts2 = self._parse_parts(args)
if root2:
if not drv2 and drv1:
return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
return self._from_parsed_parts(drv1, root2, parts2)
else:
return self._from_parsed_parts(drv2, root2, parts2)
elif drv2:
if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1):
# Same drive => second path is relative to the first.
return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:])
return self._from_parsed_parts(drv1, root1, parts1 + parts2)
else:
return self._from_parsed_parts(drv2, root2, parts2)
else:
Expand All @@ -595,7 +595,7 @@ def parent(self):
drv = self._drv
root = self._root
parts = self._parts
if len(parts) == 1 and (drv or root):
if not parts:
return self
return self._from_parsed_parts(drv, root, parts[:-1])

Expand All @@ -622,7 +622,7 @@ def is_reserved(self):
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self._parts[0].startswith('\\\\'):
if self._drv.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self._parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
Expand All @@ -632,12 +632,12 @@ def match(self, path_pattern):
"""
Return True if this path matches the given pattern.
"""
path_pattern = self._flavour.normcase(path_pattern)
drv, root, pat_parts = self._parse_parts((path_pattern,))
pat = type(self)(path_pattern)
pat_parts = pat._parts_normcase
if not pat_parts:
raise ValueError("empty pattern")
parts = self._parts_normcase
if drv or root:
if pat._drv or pat._root:
if len(pat_parts) != len(parts):
return False
elif len(pat_parts) > len(parts):
Expand Down Expand Up @@ -806,7 +806,7 @@ def absolute(self):
cwd = self._flavour.abspath(self._drv)
else:
cwd = os.getcwd()
return self._from_parts([cwd] + self._parts)
return self._from_parts((cwd,) + self.parts)

def resolve(self, strict=False):
"""
Expand Down
64 changes: 32 additions & 32 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ def test_parse_parts_common(self):
check(['a', '.', 'b'], ('', '', ['a', 'b']))
check(['a', '.', '.'], ('', '', ['a']))
# The first part is anchored.
check(['/a/b'], ('', sep, [sep, 'a', 'b']))
check(['/a', 'b'], ('', sep, [sep, 'a', 'b']))
check(['/a/', 'b'], ('', sep, [sep, 'a', 'b']))
check(['/a/b'], ('', sep, ['a', 'b']))
check(['/a', 'b'], ('', sep, ['a', 'b']))
check(['/a/', 'b'], ('', sep, ['a', 'b']))
# Ignoring parts before an anchored part.
check(['a', '/b', 'c'], ('', sep, [sep, 'b', 'c']))
check(['a', '/b', '/c'], ('', sep, [sep, 'c']))
check(['a', '/b', 'c'], ('', sep, ['b', 'c']))
check(['a', '/b', '/c'], ('', sep, ['c']))


class PosixFlavourTest(_BaseFlavourTest, unittest.TestCase):
Expand All @@ -72,9 +72,9 @@ def test_parse_parts(self):
check = self._check_parse_parts
# Collapsing of excess leading slashes, except for the double-slash
# special case.
check(['//a', 'b'], ('', '//', ['//', 'a', 'b']))
check(['///a', 'b'], ('', '/', ['/', 'a', 'b']))
check(['////a', 'b'], ('', '/', ['/', 'a', 'b']))
check(['//a', 'b'], ('', '//', ['a', 'b']))
check(['///a', 'b'], ('', '/', ['a', 'b']))
check(['////a', 'b'], ('', '/', ['a', 'b']))
# Paths which look like NT paths aren't treated specially.
check(['c:a'], ('', '', ['c:a']))
check(['c:\\a'], ('', '', ['c:\\a']))
Expand All @@ -88,40 +88,40 @@ class NTFlavourTest(_BaseFlavourTest, unittest.TestCase):
def test_parse_parts(self):
check = self._check_parse_parts
# First part is anchored.
check(['c:'], ('c:', '', ['c:']))
check(['c:/'], ('c:', '\\', ['c:\\']))
check(['/'], ('', '\\', ['\\']))
check(['c:a'], ('c:', '', ['c:', 'a']))
check(['c:/a'], ('c:', '\\', ['c:\\', 'a']))
check(['/a'], ('', '\\', ['\\', 'a']))
check(['c:'], ('c:', '', []))
check(['c:/'], ('c:', '\\', []))
check(['/'], ('', '\\', []))
check(['c:a'], ('c:', '', ['a']))
check(['c:/a'], ('c:', '\\', ['a']))
check(['/a'], ('', '\\', ['a']))
# UNC paths.
check(['//a/b'], ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c']))
check(['//a/b'], ('\\\\a\\b', '\\', []))
check(['//a/b/'], ('\\\\a\\b', '\\', []))
check(['//a/b/c'], ('\\\\a\\b', '\\', ['c']))
# Second part is anchored, so that the first part is ignored.
check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c']))
check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
check(['a', 'Z:b', 'c'], ('Z:', '', ['b', 'c']))
check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['b', 'c']))
# UNC paths.
check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['d']))
# Collapsing and stripping excess slashes.
check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['Z:\\', 'b', 'c', 'd']))
check(['a', 'Z://b//c/', 'd/'], ('Z:', '\\', ['b', 'c', 'd']))
# UNC paths.
check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
check(['a', '//b/c//', 'd'], ('\\\\b\\c', '\\', ['d']))
# Extended paths.
check(['//?/c:/'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\']))
check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'a']))
check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['\\\\?\\c:\\', 'b']))
check(['//?/c:/'], ('\\\\?\\c:', '\\', []))
check(['//?/c:/a'], ('\\\\?\\c:', '\\', ['a']))
check(['//?/c:/a', '/b'], ('\\\\?\\c:', '\\', ['b']))
# Extended UNC paths (format is "\\?\UNC\server\share").
check(['//?/UNC/b/c'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\']))
check(['//?/UNC/b/c/d'], ('\\\\?\\UNC\\b\\c', '\\', ['\\\\?\\UNC\\b\\c\\', 'd']))
check(['//?/UNC/b/c'], ('\\\\?\\UNC\\b\\c', '\\', []))
check(['//?/UNC/b/c/d'], ('\\\\?\\UNC\\b\\c', '\\', ['d']))
# Second part has a root but not drive.
check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c']))
check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c']))
check(['a', '/b', 'c'], ('', '\\', ['b', 'c']))
check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['b', 'c']))
check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['b', 'c']))
# Joining with the same drive => the first path is appended to if
# the second path is relative.
check(['c:/a/b', 'c:x/y'], ('c:', '\\', ['c:\\', 'a', 'b', 'x', 'y']))
check(['c:/a/b', 'c:/x/y'], ('c:', '\\', ['c:\\', 'x', 'y']))
check(['c:/a/b', 'c:x/y'], ('c:', '\\', ['a', 'b', 'x', 'y']))
check(['c:/a/b', 'c:/x/y'], ('c:', '\\', ['x', 'y']))


#
Expand Down

0 comments on commit 8bf4600

Please sign in to comment.