Skip to content

Commit

Permalink
Purify CanonPath
Browse files Browse the repository at this point in the history
The core `CanonPath` constructors were using `absPath`, but `absPath` in
some situations does IO which is not appropriate. It turns out that
these constructors avoided those situations, and thus were pure, but it
was far from obvious this was the case.

To remedy the situation, abstract the core algorithm from `canonPath` to
use separately in `CanonPath` without any IO. No we know by-construction
that those constructors are pure.

That leaves `CanonPath::fromCWD` as the only operation which uses IO /
is impure. Add docs on it, and `CanonPath` as a whole, explaining the
situation.

This is also necessary to support Windows paths on windows without
messing up `CanonPath`. But, I think it is good even without that.

Co-authored-by: Eelco Dolstra <edolstra@gmail.com>
Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
  • Loading branch information
3 people committed Feb 16, 2024
1 parent 06be819 commit d27d828
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 49 deletions.
15 changes: 12 additions & 3 deletions src/libutil/canon-path.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
#include "canon-path.hh"
#include "file-system.hh"
#include "util.hh"
#include "file-path-impl.hh"

namespace nix {

CanonPath CanonPath::root = CanonPath("/");

static std::string absPathPure(std::string_view path)
{
return canonPathInner(path, [](auto &, auto &){});
}

CanonPath::CanonPath(std::string_view raw)
: path(absPath(raw, "/"))
: path(absPathPure(concatStrings("/", raw)))
{ }

CanonPath::CanonPath(std::string_view raw, const CanonPath & root)
: path(absPath(raw, root.abs()))
: path(absPathPure(
raw.size() > 0 && raw[0] == '/'
? raw
: concatStrings(root.abs(), "/", raw)))
{ }

CanonPath::CanonPath(const std::vector<std::string> & elems)
Expand Down
18 changes: 15 additions & 3 deletions src/libutil/canon-path.hh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,21 @@ namespace nix {
*
* - There are no components equal to '.' or '..'.
*
* Note that the path does not need to correspond to an actually
* existing path, and there is no guarantee that symlinks are
* resolved.
* `CanonPath` are "virtual" Nix paths for abstract file system objects;
* they are always Unix-style paths, regardless of what OS Nix is
* running on. The `/` root doesn't denote the ambient host file system
* root, but some virtual FS root.
*
* @note It might be useful to compare `openat(some_fd, "foo/bar")` on
* Unix. `"foo/bar"` is a relative path because an absolute path would
* "override" the `some_fd` directory file descriptor and escape to the
* "system root". Conversely, Nix's abstract file operations *never* escape the
* designated virtual file system (i.e. `SourceAccessor` or
* `ParseSink`), so `CanonPath` does not need an absolute/relative
* distinction.
*
* @note The path does not need to correspond to an actually existing
* path, and the path may or may not have unresolved symlinks.
*/
class CanonPath
{
Expand Down
74 changes: 74 additions & 0 deletions src/libutil/file-path-impl.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#pragma once
/**
* @file
*
* Pure (no IO) infrastructure just for defining other path types;
* should not be used directly outside of utilities.
*/
#include <string>
#include <string_view>

namespace nix {

/**
* Core pure path canonicalization algorithm.
*
* @param hookComponent A chance to modify `s` and `path` in an
* arbitrary way, e.g. if it points to a symlink. Does nothing by
* default.
*/
typename std::string canonPathInner(
std::string_view path,
auto && hookComponent)
{
assert(path != "");

typename std::string s;
s.reserve(256);

while (true) {

/* Skip slashes. */
while (!path.empty() && path[0] == '/')
path.remove_prefix(1);

if (path.empty()) break;

auto nextComp = ({
auto nextPathSep = path.find('/');
nextPathSep == path.npos ? path : path.substr(0, nextPathSep);
});

/* Ignore `.'. */
if (nextComp == ".")
path.remove_prefix(1);

/* If `..', delete the last component. */
else if (nextComp == "..")
{
if (!s.empty()) s.erase(s.rfind('/'));
path.remove_prefix(2);
}

/* Normal component; copy it. */
else {
s += '/';
if (const auto slash = path.find('/'); slash == s.npos) {
s += path;
path = {};
} else {
s += path.substr(0, slash);
path = path.substr(slash);
}

hookComponent(s, path);
}
}

if (s.empty())
s = "/";

return s;
}

}
59 changes: 16 additions & 43 deletions src/libutil/file-system.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "environment-variables.hh"
#include "file-system.hh"
#include "file-path-impl.hh"
#include "signals.hh"
#include "finally.hh"
#include "serialise.hh"
Expand All @@ -21,11 +22,18 @@ namespace fs = std::filesystem;

namespace nix {

/** Treat the string as possibly an absolute path, by inspecting the start of it. Return whether it was probably intended to be absolute. */
static bool isAbsolute(PathView path)
{
return !path.empty() && path[0] == '/';
}


Path absPath(PathView path, std::optional<PathView> dir, bool resolveSymlinks)
{
std::string scratch;

if (path.empty() || path[0] != '/') {
if (!isAbsolute(path)) {
// In this case we need to call `canonPath` on a newly-created
// string. We set `scratch` to that string first, and then set
// `path` to `scratch`. This ensures the newly-created string
Expand Down Expand Up @@ -58,10 +66,7 @@ Path canonPath(PathView path, bool resolveSymlinks)
{
assert(path != "");

std::string s;
s.reserve(256);

if (path[0] != '/')
if (!isAbsolute(path))
throw Error("not an absolute path: '%1%'", path);

std::string temp;
Expand All @@ -70,41 +75,16 @@ Path canonPath(PathView path, bool resolveSymlinks)
arbitrary (but high) limit to prevent infinite loops. */
unsigned int followCount = 0, maxFollow = 1024;

while (1) {

/* Skip slashes. */
while (!path.empty() && path[0] == '/') path.remove_prefix(1);
if (path.empty()) break;

/* Ignore `.'. */
if (path == "." || path.substr(0, 2) == "./")
path.remove_prefix(1);

/* If `..', delete the last component. */
else if (path == ".." || path.substr(0, 3) == "../")
{
if (!s.empty()) s.erase(s.rfind('/'));
path.remove_prefix(2);
}

/* Normal component; copy it. */
else {
s += '/';
if (const auto slash = path.find('/'); slash == path.npos) {
s += path;
path = {};
} else {
s += path.substr(0, slash);
path = path.substr(slash);
}

/* If s points to a symlink, resolve it and continue from there */
return canonPathInner(
path,
[&followCount, &temp, maxFollow, resolveSymlinks]
(std::string & s, std::string_view & path) {
if (resolveSymlinks && isLink(s)) {
if (++followCount >= maxFollow)
throw Error("infinite symlink recursion in path '%1%'", path);
temp = concatStrings(readLink(s), path);
path = temp;
if (!temp.empty() && temp[0] == '/') {
if (isAbsolute(temp)) {
s.clear(); /* restart for symlinks pointing to absolute path */
} else {
s = dirOf(s);
Expand All @@ -113,14 +93,7 @@ Path canonPath(PathView path, bool resolveSymlinks)
}
}
}
}
}

if (s.empty()) {
s = "/";
}

return s;
});
}


Expand Down
18 changes: 18 additions & 0 deletions tests/unit/libutil/canon-path.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,24 @@ namespace nix {
}
}

TEST(CanonPath, from_existing) {
CanonPath p0("foo//bar/");
{
CanonPath p("/baz//quux/", p0);
ASSERT_EQ(p.abs(), "/baz/quux");
ASSERT_EQ(p.rel(), "baz/quux");
ASSERT_EQ(*p.baseName(), "quux");
ASSERT_EQ(*p.dirOf(), "/baz");
}
{
CanonPath p("baz//quux/", p0);
ASSERT_EQ(p.abs(), "/foo/bar/baz/quux");
ASSERT_EQ(p.rel(), "foo/bar/baz/quux");
ASSERT_EQ(*p.baseName(), "quux");
ASSERT_EQ(*p.dirOf(), "/foo/bar/baz");
}
}

TEST(CanonPath, pop) {
CanonPath p("foo/bar/x");
ASSERT_EQ(p.abs(), "/foo/bar/x");
Expand Down

0 comments on commit d27d828

Please sign in to comment.