Skip to content

Commit

Permalink
GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282)
Browse files Browse the repository at this point in the history
Use a stack to implement `pathlib.Path.walk()` iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees.

Co-authored-by: Barney Gale <barney.gale@gmail.com>
Co-authored-by: Brett Cannon <brett@python.org>
  • Loading branch information
3 people committed Mar 22, 2023
1 parent af9c34f commit 713df2c
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 38 deletions.
78 changes: 40 additions & 38 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,45 +1197,47 @@ def expanduser(self):
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
"""Walk the directory tree from this directory, similar to os.walk()."""
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
return self._walk(top_down, on_error, follow_symlinks)

def _walk(self, top_down, on_error, follow_symlinks):
# We may not have read permission for self, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
scandir_it = self._scandir()
except OSError as error:
if on_error is not None:
on_error(error)
return

with scandir_it:
dirnames = []
filenames = []
for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
except OSError:
# Carried over from os.path.isdir().
is_dir = False

if is_dir:
dirnames.append(entry.name)
else:
filenames.append(entry.name)

if top_down:
yield self, dirnames, filenames

for dirname in dirnames:
dirpath = self._make_child_relpath(dirname)
yield from dirpath._walk(top_down, on_error, follow_symlinks)
paths = [self]

while paths:
path = paths.pop()
if isinstance(path, tuple):
yield path
continue

# We may not have read permission for self, in which case we can't
# get a list of the files the directory contains. os.walk()
# always suppressed the exception in that instance, rather than
# blow up for a minor reason when (say) a thousand readable
# directories are still left to visit. That logic is copied here.
try:
scandir_it = path._scandir()
except OSError as error:
if on_error is not None:
on_error(error)
continue

with scandir_it:
dirnames = []
filenames = []
for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
except OSError:
# Carried over from os.path.isdir().
is_dir = False

if is_dir:
dirnames.append(entry.name)
else:
filenames.append(entry.name)

if top_down:
yield path, dirnames, filenames
else:
paths.append((path, dirnames, filenames))

if not top_down:
yield self, dirnames, filenames
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]


class PosixPath(Path, PurePosixPath):
Expand Down
13 changes: 13 additions & 0 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from unittest import mock

from test.support import import_helper
from test.support import set_recursion_limit
from test.support import is_emscripten, is_wasi
from test.support import os_helper
from test.support.os_helper import TESTFN, FakePath
Expand Down Expand Up @@ -2793,6 +2794,18 @@ def test_walk_many_open_files(self):
self.assertEqual(next(it), expected)
path = path / 'd'

def test_walk_above_recursion_limit(self):
recursion_limit = 40
# directory_depth > recursion_limit
directory_depth = recursion_limit + 10
base = pathlib.Path(os_helper.TESTFN, 'deep')
path = pathlib.Path(base, *(['d'] * directory_depth))
path.mkdir(parents=True)

with set_recursion_limit(recursion_limit):
list(base.walk())
list(base.walk(top_down=False))


class PathTest(_BasePathTest, unittest.TestCase):
cls = pathlib.Path
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix pathlib.Path.walk RecursionError on deep directory trees by rewriting it using iteration instead of recursion.

0 comments on commit 713df2c

Please sign in to comment.