From f425a5024df4b971bd97557e558693576e878830 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 13 Jan 2024 01:11:49 -0500 Subject: [PATCH] Purify `CanonPath` The core `CanonPath` constructors were using `absPath`, but `absPath` in some situations does IO which is not appropriate. It turns out that these constructors avoided those situations, and thus were pure, but it was far from obvious this was the case. To remedy the situation, abstract the core algorithm from `canonPath` to use separately in `CanonPath` without any IO. No we know by-construction that those constructors are pure. That leaves `CanonPath::fromCWD` as the only operation which uses IO / is impure. Add docs on it, and `CanonPath` as a whole, explaining the situation. This is also necessary to support Windows paths on windows without messing up `CanonPath`. But, I think it is good even without that. Co-authored-by: Eelco Dolstra --- src/libutil/canon-path.cc | 14 +++++- src/libutil/canon-path.hh | 25 +++++++++-- src/libutil/file-path-impl.hh | 74 ++++++++++++++++++++++++++++++++ src/libutil/file-system.cc | 57 +++++++----------------- tests/unit/libutil/canon-path.cc | 18 ++++++++ 5 files changed, 141 insertions(+), 47 deletions(-) create mode 100644 src/libutil/file-path-impl.hh diff --git a/src/libutil/canon-path.cc b/src/libutil/canon-path.cc index 0a0f96a0530f..9b8cf8a448c9 100644 --- a/src/libutil/canon-path.cc +++ b/src/libutil/canon-path.cc @@ -1,16 +1,26 @@ #include "canon-path.hh" +#include "util.hh" +#include "file-path-impl.hh" #include "file-system.hh" namespace nix { CanonPath CanonPath::root = CanonPath("/"); +static std::string absPathPure(std::string_view path) +{ + return canonPathInner(path, [](auto &, auto &){}); +} + CanonPath::CanonPath(std::string_view raw) - : path(absPath(raw, "/")) + : path(absPathPure(concatStrings("/", raw))) { } CanonPath::CanonPath(std::string_view raw, const CanonPath & root) - : path(absPath(raw, root.abs())) + : path(absPathPure( + raw.size() > 0 && raw[0] == '/' + ? raw + : concatStrings(root.abs(), "/", raw))) { } CanonPath::CanonPath(const std::vector & elems) diff --git a/src/libutil/canon-path.hh b/src/libutil/canon-path.hh index 997c8c731311..1b5ad0f311d9 100644 --- a/src/libutil/canon-path.hh +++ b/src/libutil/canon-path.hh @@ -21,9 +21,21 @@ namespace nix { * * - There are no components equal to '.' or '..'. * - * Note that the path does not need to correspond to an actually - * existing path, and there is no guarantee that symlinks are - * resolved. + * `CanonPath` are "virtual" Nix paths for abstract file system objects; + * they are always Unix-style paths, regardless of what OS Nix is + * running on. The `/` root doesn't denote the ambient host file system + * root, but some virtual FS root. + * + * @note It might be useful to compare `openat(some_fd, "foo/bar")` on + * Unix. `"foo/bar"` on is an relative path because an absolute on would + * "overide" the `some_fd` directory file descriptor and escape to "real + * root". Conversely, Nix's abstract file operations *never* escape the + * designated virtual file system ( i.e. `SourceAccessor` or + * `ParseSink`), so `CanonPath` does not need an absolute/relative + * distinction. + * + * @note the path does not need to correspond to an actually existing + * path, and there is no guarantee that symlinks are resolved. */ class CanonPath { @@ -52,6 +64,13 @@ public: */ CanonPath(const std::vector & elems); + /** + * This is the only IO-using / impure canonical path operation, and + * it doesn't make sense on non-unix because a non-unix style path + * cannot be faithfully turned into a `CanonPath`. + * + * @todo Perhaps this function should be removed. + */ static CanonPath fromCwd(std::string_view path = "."); static CanonPath root; diff --git a/src/libutil/file-path-impl.hh b/src/libutil/file-path-impl.hh new file mode 100644 index 000000000000..cf245313352f --- /dev/null +++ b/src/libutil/file-path-impl.hh @@ -0,0 +1,74 @@ +#pragma once +/** + * @file + * + * Pure (no IO) infrastructure just for defining other path types; + * should not be used directly outside of utilities. + */ +#include +#include + +namespace nix { + +/** + * Core pure path canonicalization algorithm. + * + * @param hookComponent A chance to modify `s` and `path` in an + * arbitrary way, e.g. if it points to a symlink. Does nothing by + * default. + */ +typename std::string canonPathInner( + std::string_view path, + auto && hookComponent) +{ + assert(path != ""); + + typename std::string s; + s.reserve(256); + + while (true) { + + /* Skip slashes. */ + while (!path.empty() && path[0] == '/') + path.remove_prefix(1); + + if (path.empty()) break; + + auto nextComp = ({ + auto nextPathSep = path.find('/'); + nextPathSep == path.npos ? path : path.substr(0, nextPathSep); + }); + + /* Ignore `.'. */ + if (nextComp == ".") + path.remove_prefix(1); + + /* If `..', delete the last component. */ + else if (nextComp == "..") + { + if (!s.empty()) s.erase(s.rfind('/')); + path.remove_prefix(2); + } + + /* Normal component; copy it. */ + else { + s += '/'; + if (const auto slash = path.find('/'); slash == s.npos) { + s += path; + path = {}; + } else { + s += path.substr(0, slash); + path = path.substr(slash); + } + + hookComponent(s, path); + } + } + + if (s.empty()) + s = "/"; + + return s; +} + +} diff --git a/src/libutil/file-system.cc b/src/libutil/file-system.cc index 9fa1f62dfc05..bce50f807149 100644 --- a/src/libutil/file-system.cc +++ b/src/libutil/file-system.cc @@ -1,5 +1,6 @@ #include "environment-variables.hh" #include "file-system.hh" +#include "file-path-impl.hh" #include "signals.hh" #include "finally.hh" #include "serialise.hh" @@ -21,11 +22,18 @@ namespace fs = std::filesystem; namespace nix { + +static bool isAbsolute(PathView path) +{ + return !path.empty() && path[0] == '/'; +} + + Path absPath(PathView path, std::optional dir, bool resolveSymlinks) { std::string scratch; - if (path.empty() || path[0] != '/') { + if (!isAbsolute(path)) { // In this case we need to call `canonPath` on a newly-created // string. We set `scratch` to that string first, and then set // `path` to `scratch`. This ensures the newly-created string @@ -58,10 +66,7 @@ Path canonPath(PathView path, bool resolveSymlinks) { assert(path != ""); - std::string s; - s.reserve(256); - - if (path[0] != '/') + if (!isAbsolute(path)) throw Error("not an absolute path: '%1%'", path); std::string temp; @@ -70,35 +75,10 @@ Path canonPath(PathView path, bool resolveSymlinks) arbitrary (but high) limit to prevent infinite loops. */ unsigned int followCount = 0, maxFollow = 1024; - while (1) { - - /* Skip slashes. */ - while (!path.empty() && path[0] == '/') path.remove_prefix(1); - if (path.empty()) break; - - /* Ignore `.'. */ - if (path == "." || path.substr(0, 2) == "./") - path.remove_prefix(1); - - /* If `..', delete the last component. */ - else if (path == ".." || path.substr(0, 3) == "../") - { - if (!s.empty()) s.erase(s.rfind('/')); - path.remove_prefix(2); - } - - /* Normal component; copy it. */ - else { - s += '/'; - if (const auto slash = path.find('/'); slash == path.npos) { - s += path; - path = {}; - } else { - s += path.substr(0, slash); - path = path.substr(slash); - } - - /* If s points to a symlink, resolve it and continue from there */ + return canonPathInner( + path, + [&followCount, &temp, maxFollow, resolveSymlinks] + (std::string & s, std::string_view & path) { if (resolveSymlinks && isLink(s)) { if (++followCount >= maxFollow) throw Error("infinite symlink recursion in path '%1%'", path); @@ -113,14 +93,7 @@ Path canonPath(PathView path, bool resolveSymlinks) } } } - } - } - - if (s.empty()) { - s = "/"; - } - - return s; + }); } diff --git a/tests/unit/libutil/canon-path.cc b/tests/unit/libutil/canon-path.cc index fc94ccc3d4f7..61bee70b5845 100644 --- a/tests/unit/libutil/canon-path.cc +++ b/tests/unit/libutil/canon-path.cc @@ -41,6 +41,24 @@ namespace nix { } } + TEST(CanonPath, from_existing) { + CanonPath p0("foo//bar/"); + { + CanonPath p("/baz//quux/", p0); + ASSERT_EQ(p.abs(), "/baz/quux"); + ASSERT_EQ(p.rel(), "baz/quux"); + ASSERT_EQ(*p.baseName(), "quux"); + ASSERT_EQ(*p.dirOf(), "/baz"); + } + { + CanonPath p("baz//quux/", p0); + ASSERT_EQ(p.abs(), "/foo/bar/baz/quux"); + ASSERT_EQ(p.rel(), "foo/bar/baz/quux"); + ASSERT_EQ(*p.baseName(), "quux"); + ASSERT_EQ(*p.dirOf(), "/foo/bar/baz"); + } + } + TEST(CanonPath, pop) { CanonPath p("foo/bar/x"); ASSERT_EQ(p.abs(), "/foo/bar/x");