Skip to content

Commit

Permalink
Fix handling of non-ASCII characters in archive entry file names
Browse files Browse the repository at this point in the history
When creating a `PathFragment` from a ZIP or TAR entry file name, the
raw bytes of the name are now wrapped into a Latin-1 encoded String,
which is how Bazel internally represents file paths.

Previously, ZIP entries as well as TAR entries with PAX headers would
result in ordinary decoded Java strings, resulting in corrupted file
names when passed to Bazel's file system operations.
  • Loading branch information
fmeum committed May 19, 2023
1 parent 0511bb4 commit cef4b5a
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/rules:repository/repository_function",
"//src/main/java/com/google/devtools/build/lib/rules:repository/workspace_base_rule",
"//src/main/java/com/google/devtools/build/lib/rules:repository/workspace_configured_target_factory",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
package com.google.devtools.build.lib.bazel.repository;

import static com.google.devtools.build.lib.bazel.repository.StripPrefixedPath.maybeDeprefixSymlink;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.base.Optional;
import com.google.common.io.ByteStreams;
import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
Expand All @@ -29,6 +29,7 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
Expand All @@ -54,20 +55,21 @@ public Path decompress(DecompressorDescriptor descriptor)
Map<Path, PathFragment> symlinks = new HashMap<>();

try (InputStream decompressorStream = getDecompressorStream(descriptor)) {
TarArchiveInputStream tarStream = new TarArchiveInputStream(decompressorStream);
// USTAR tar headers use an unspecified encoding whereas PAX tar headers always use UTF-8.
// Since this would result in ambiguity with Bazel forcing the default JVM encoding to be
// ISO-8859-1, we explicitly specify the encoding as UTF-8.
TarArchiveInputStream tarStream = new TarArchiveInputStream(decompressorStream, UTF_8.name());
TarArchiveEntry entry;
while ((entry = tarStream.getNextTarEntry()) != null) {
String entryName = entry.getName();
entryName = renameFiles.getOrDefault(entryName, entryName);
StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entryName, prefix);
StripPrefixedPath entryPath =
StripPrefixedPath.maybeDeprefix(entryName.getBytes(UTF_8), prefix);
foundPrefix = foundPrefix || entryPath.foundPrefix();

if (prefix.isPresent() && !foundPrefix) {
Optional<String> suggestion =
CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment());
if (suggestion.isPresent()) {
availablePrefixes.add(suggestion.get());
}
CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment())
.ifPresent(availablePrefixes::add);
}

if (entryPath.skip()) {
Expand All @@ -80,8 +82,9 @@ public Path decompress(DecompressorDescriptor descriptor)
filePath.createDirectoryAndParents();
} else {
if (entry.isSymbolicLink() || entry.isLink()) {
PathFragment targetName = PathFragment.create(entry.getLinkName());
targetName = maybeDeprefixSymlink(targetName, prefix, descriptor.destinationPath());
PathFragment targetName =
maybeDeprefixSymlink(
entry.getLinkName().getBytes(UTF_8), prefix, descriptor.destinationPath());
if (entry.isSymbolicLink()) {
symlinks.put(filePath, targetName);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
package com.google.devtools.build.lib.bazel.repository;

import com.google.auto.value.AutoValue;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
import com.google.devtools.build.lib.vfs.Path;
import java.util.Map;
import java.util.Optional;

/** Description of an archive to be decompressed. */
@AutoValue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,17 @@

package com.google.devtools.build.lib.bazel.repository;

import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.skyframe.SkyFunctionException.Transience;
import com.google.devtools.build.skyframe.SkyValue;
import java.io.IOException;
import java.util.Optional;
import java.util.Set;
import net.starlark.java.eval.Starlark;

Expand Down Expand Up @@ -59,9 +62,14 @@ private static String prepareErrorMessage(String prefix, Set<String> availablePr
}

public static Optional<String> maybeMakePrefixSuggestion(PathFragment pathFragment) {
return pathFragment.isMultiSegment()
? Optional.of(pathFragment.getSegment(0))
: Optional.absent();
if (!pathFragment.isMultiSegment()) {
return Optional.empty();
}
String rawFirstSegment = pathFragment.getSegment(0);
// Users can only specify prefixes from Starlark, where strings always use UTF-8, so we
// optimistically decode with it here even though we do not know the original encoding.
return Optional.of(new String(rawFirstSegment.getBytes(ISO_8859_1), UTF_8));
// return Optional.of(rawFirstSegment);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@

package com.google.devtools.build.lib.bazel.repository;

import com.google.common.base.Optional;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.base.Preconditions;
import com.google.devtools.build.lib.concurrent.ThreadSafety;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import java.util.Optional;

/**
* Utility class for removing a prefix from an archive's path.
Expand All @@ -36,17 +38,19 @@ public final class StripPrefixedPath {
* could cause collisions, if a zip file had one entry for bin/some-binary and another entry for
* /bin/some-binary.
*
* Note that the prefix is stripped to move the files up one level, so if you have an entry
* <p>Note that the prefix is stripped to move the files up one level, so if you have an entry
* "foo/../bar" and a prefix of "foo", the result will be "bar" not "../bar".
*/
public static StripPrefixedPath maybeDeprefix(String entry, Optional<String> prefix) {
public static StripPrefixedPath maybeDeprefix(byte[] entry, Optional<String> prefix) {
Preconditions.checkNotNull(entry);
PathFragment entryPath = relativize(entry);
if (!prefix.isPresent()) {
if (prefix.isEmpty()) {
return new StripPrefixedPath(entryPath, false, false);
}

PathFragment prefixPath = relativize(prefix.get());
// Bazel parses Starlark files, which are the ultimate source of prefixes, as Latin-1
// (ISO-8859-1).
PathFragment prefixPath = relativize(prefix.get().getBytes(ISO_8859_1));
boolean found = false;
boolean skip = false;
if (entryPath.startsWith(prefixPath)) {
Expand All @@ -64,8 +68,8 @@ public static StripPrefixedPath maybeDeprefix(String entry, Optional<String> pre
/**
* Normalize the path and, if it is absolute, make it relative (e.g., /foo/bar becomes foo/bar).
*/
private static PathFragment relativize(String path) {
PathFragment entryPath = PathFragment.create(path);
private static PathFragment relativize(byte[] path) {
PathFragment entryPath = createPathFragment(path);
if (entryPath.isAbsolute()) {
entryPath = entryPath.toRelative();
}
Expand All @@ -79,10 +83,10 @@ private StripPrefixedPath(PathFragment pathFragment, boolean found, boolean skip
}

public static PathFragment maybeDeprefixSymlink(
PathFragment linkPathFragment, Optional<String> prefix, Path root) {
boolean wasAbsolute = linkPathFragment.isAbsolute();
byte[] rawTarget, Optional<String> prefix, Path root) {
boolean wasAbsolute = createPathFragment(rawTarget).isAbsolute();
// Strip the prefix from the link path if set.
linkPathFragment = maybeDeprefix(linkPathFragment.getPathString(), prefix).getPathFragment();
PathFragment linkPathFragment = maybeDeprefix(rawTarget, prefix).getPathFragment();
if (wasAbsolute) {
// Recover the path to an absolute path as maybeDeprefix() relativize the path
// even if the prefix is not set
Expand All @@ -103,4 +107,10 @@ public boolean skip() {
return skip;
}

static PathFragment createPathFragment(byte[] rawBytes) {
// Bazel internally represents paths as raw bytes by using the Latin-1 encoding, which has the
// property that (new String(bytes, ISO_8859_1)).getBytes(ISO_8859_1)) equals bytes for every
// byte array bytes.
return PathFragment.create(new String(rawBytes, ISO_8859_1));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
package com.google.devtools.build.lib.bazel.repository;

import static com.google.devtools.build.lib.bazel.repository.StripPrefixedPath.maybeDeprefixSymlink;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.io.ByteStreams;
import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor;
Expand All @@ -29,11 +29,11 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import javax.annotation.Nullable;

Expand Down Expand Up @@ -89,7 +89,8 @@ public Path decompress(DecompressorDescriptor descriptor)
for (ZipFileEntry entry : entries) {
String entryName = entry.getName();
entryName = renameFiles.getOrDefault(entryName, entryName);
StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entryName, prefix);
StripPrefixedPath entryPath =
StripPrefixedPath.maybeDeprefix(entryName.getBytes(UTF_8), prefix);
foundPrefix = foundPrefix || entryPath.foundPrefix();
if (entryPath.skip()) {
continue;
Expand All @@ -102,12 +103,9 @@ public Path decompress(DecompressorDescriptor descriptor)
Set<String> prefixes = new HashSet<>();
for (ZipFileEntry entry : entries) {
StripPrefixedPath entryPath =
StripPrefixedPath.maybeDeprefix(entry.getName(), Optional.absent());
Optional<String> suggestion =
CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment());
if (suggestion.isPresent()) {
prefixes.add(suggestion.get());
}
StripPrefixedPath.maybeDeprefix(entry.getName().getBytes(UTF_8), Optional.empty());
CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment())
.ifPresent(prefixes::add);
}
throw new CouldNotFindPrefixException(prefix.get(), prefixes);
}
Expand Down Expand Up @@ -146,17 +144,22 @@ private static void extractZipEntry(
// For symlinks, the "compressed data" is actually the target name.
int read = reader.getInputStream(entry).read(buffer);
Preconditions.checkState(read == buffer.length);
PathFragment target = PathFragment.create(new String(buffer, Charset.defaultCharset()));

PathFragment target = StripPrefixedPath.createPathFragment(buffer);
if (target.containsUplevelReferences()) {
PathFragment pointsTo = strippedRelativePath.getParentDirectory().getRelative(target);
if (pointsTo.containsUplevelReferences()) {
throw new IOException("Zip entries cannot refer to files outside of their directory: "
+ reader.getFilename() + " has a symlink " + strippedRelativePath + " pointing to "
+ target);
throw new IOException(
"Zip entries cannot refer to files outside of their directory: "
+ reader.getFilename()
+ " has a symlink "
+ strippedRelativePath
+ " pointing to "
+ new String(buffer, UTF_8));
}
}
target = maybeDeprefixSymlink(target, prefix, destinationDirectory);
symlinks.put(outputPath, target);

symlinks.put(outputPath, maybeDeprefixSymlink(buffer, prefix, destinationDirectory));
} else {
try (InputStream input = reader.getInputStream(entry);
OutputStream output = outputPath.getOutputStream()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
package com.google.devtools.build.lib.bazel.repository;

import static com.google.common.truth.Truth.assertThat;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.base.Optional;
import com.google.devtools.build.lib.clock.BlazeClock;
import com.google.devtools.build.lib.util.OS;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem;
import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
Expand All @@ -33,31 +34,32 @@
public class StripPrefixedPathTest {
@Test
public void testStrip() {
StripPrefixedPath result = StripPrefixedPath.maybeDeprefix("foo/bar", Optional.of("foo"));
StripPrefixedPath result =
StripPrefixedPath.maybeDeprefix("foo/bar".getBytes(UTF_8), Optional.of("foo"));
assertThat(PathFragment.create("bar")).isEqualTo(result.getPathFragment());
assertThat(result.foundPrefix()).isTrue();
assertThat(result.skip()).isFalse();

result = StripPrefixedPath.maybeDeprefix("foo", Optional.of("foo"));
result = StripPrefixedPath.maybeDeprefix("foo".getBytes(UTF_8), Optional.of("foo"));
assertThat(result.skip()).isTrue();

result = StripPrefixedPath.maybeDeprefix("bar/baz", Optional.of("foo"));
result = StripPrefixedPath.maybeDeprefix("bar/baz".getBytes(UTF_8), Optional.of("foo"));
assertThat(result.foundPrefix()).isFalse();

result = StripPrefixedPath.maybeDeprefix("foof/bar", Optional.of("foo"));
result = StripPrefixedPath.maybeDeprefix("foof/bar".getBytes(UTF_8), Optional.of("foo"));
assertThat(result.foundPrefix()).isFalse();
}

@Test
public void testAbsolute() {
StripPrefixedPath result = StripPrefixedPath.maybeDeprefix(
"/foo/bar", Optional.<String>absent());
StripPrefixedPath result =
StripPrefixedPath.maybeDeprefix("/foo/bar".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar"));

result = StripPrefixedPath.maybeDeprefix("///foo/bar/baz", Optional.<String>absent());
result = StripPrefixedPath.maybeDeprefix("///foo/bar/baz".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar/baz"));

result = StripPrefixedPath.maybeDeprefix("/foo/bar/baz", Optional.of("/foo"));
result = StripPrefixedPath.maybeDeprefix("/foo/bar/baz".getBytes(UTF_8), Optional.of("/foo"));
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("bar/baz"));
}

Expand All @@ -66,21 +68,21 @@ public void testWindowsAbsolute() {
if (OS.getCurrent() != OS.WINDOWS) {
return;
}
StripPrefixedPath result = StripPrefixedPath.maybeDeprefix(
"c:/foo/bar", Optional.<String>absent());
StripPrefixedPath result =
StripPrefixedPath.maybeDeprefix("c:/foo/bar".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar"));
}

@Test
public void testNormalize() {
StripPrefixedPath result = StripPrefixedPath.maybeDeprefix(
"../bar", Optional.<String>absent());
StripPrefixedPath result =
StripPrefixedPath.maybeDeprefix("../bar".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("../bar"));

result = StripPrefixedPath.maybeDeprefix("foo/../baz", Optional.<String>absent());
result = StripPrefixedPath.maybeDeprefix("foo/../baz".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("baz"));

result = StripPrefixedPath.maybeDeprefix("foo/../baz", Optional.of("foo"));
result = StripPrefixedPath.maybeDeprefix("foo/../baz".getBytes(UTF_8), Optional.of("foo"));
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("baz"));
}

Expand All @@ -91,23 +93,23 @@ public void testDeprefixSymlink() {

PathFragment relativeNoPrefix =
StripPrefixedPath.maybeDeprefixSymlink(
PathFragment.create("a/b"), Optional.absent(), fileSystem.getPath("/usr"));
"a/b".getBytes(UTF_8), Optional.empty(), fileSystem.getPath("/usr"));
// there is no attempt to get absolute path for the relative symlinks target path
assertThat(relativeNoPrefix).isEqualTo(PathFragment.create("a/b"));

PathFragment absoluteNoPrefix =
StripPrefixedPath.maybeDeprefixSymlink(
PathFragment.create("/a/b"), Optional.absent(), fileSystem.getPath("/usr"));
"/a/b".getBytes(UTF_8), Optional.empty(), fileSystem.getPath("/usr"));
assertThat(absoluteNoPrefix).isEqualTo(PathFragment.create("/usr/a/b"));

PathFragment absolutePrefix =
StripPrefixedPath.maybeDeprefixSymlink(
PathFragment.create("/root/a/b"), Optional.of("root"), fileSystem.getPath("/usr"));
"/root/a/b".getBytes(UTF_8), Optional.of("root"), fileSystem.getPath("/usr"));
assertThat(absolutePrefix).isEqualTo(PathFragment.create("/usr/a/b"));

PathFragment relativePrefix =
StripPrefixedPath.maybeDeprefixSymlink(
PathFragment.create("root/a/b"), Optional.of("root"), fileSystem.getPath("/usr"));
"root/a/b".getBytes(UTF_8), Optional.of("root"), fileSystem.getPath("/usr"));
// there is no attempt to get absolute path for the relative symlinks target path
assertThat(relativePrefix).isEqualTo(PathFragment.create("a/b"));
}
Expand Down
Loading

0 comments on commit cef4b5a

Please sign in to comment.