Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize representation of runfiles in compact execution log #23321

Closed
wants to merge 51 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
7b47753
Add RunfilesTree proto
fmeum Aug 16, 2024
09bd497
WIP
fmeum Aug 16, 2024
287b3a8
Fix build
fmeum Aug 16, 2024
86ff166
Basic test setup
fmeum Aug 16, 2024
bcfc317
WIP
fmeum Aug 16, 2024
528d1be
Almost passing tests
fmeum Aug 16, 2024
13564a1
Simplify and support legacyExternalRunfiles
fmeum Aug 16, 2024
6b93b38
Fix tests
fmeum Aug 21, 2024
3f9e234
Fix tests and add docs
fmeum Aug 21, 2024
0ac5e81
Explain order
fmeum Aug 21, 2024
1e02f91
Fix test
fmeum Aug 22, 2024
80f81ac
First comments
fmeum Aug 29, 2024
cf0df2b
WIP
fmeum Aug 29, 2024
181901e
Basic runfiles test
fmeum Aug 30, 2024
eafc3e7
Key on legacyExternalRunfiles
fmeum Aug 30, 2024
880b67c
Fix collisions
fmeum Aug 30, 2024
4efa0f4
Improve collision tests
fmeum Aug 30, 2024
3806f46
Add root symlink collision test
fmeum Aug 30, 2024
8989415
Add test for `.runfile` file
fmeum Aug 30, 2024
4f321a7
Fix .runfile logic
fmeum Aug 30, 2024
4711a24
Test for cross type collision
fmeum Aug 30, 2024
06aaf17
Test postorder collisions
fmeum Aug 30, 2024
d63c480
Do not distinguish IDs by type
fmeum Aug 30, 2024
9d3560b
Store dense IDs
fmeum Aug 30, 2024
595613d
Optimize IDs
fmeum Aug 31, 2024
152a60f
Add better postorder test
fmeum Aug 31, 2024
a85be80
Test symlinks
fmeum Aug 31, 2024
0953237
Fix post order
fmeum Aug 31, 2024
8f29296
Cleanup
fmeum Aug 31, 2024
e8f854b
Add logEntryWithoutId
fmeum Sep 1, 2024
7e3df9a
Simplify ID check
fmeum Sep 1, 2024
6c245cc
Sibling test
fmeum Sep 1, 2024
5d4642e
Test with sibling layout
fmeum Sep 2, 2024
64d2c82
Encode sibling
fmeum Sep 2, 2024
a66984b
Implement support for sibling layout
fmeum Sep 2, 2024
2fb002e
Add comments and tests
fmeum Sep 2, 2024
a4413d4
More docs and uint32
fmeum Sep 2, 2024
961218c
Don't retain test runfiles trees
fmeum Sep 2, 2024
56d7a30
Fix build
fmeum Sep 2, 2024
8637fe7
Add more docs
fmeum Sep 2, 2024
b5efe8a
Inline var
fmeum Sep 2, 2024
dab1a5a
Do not flatten symlinks
fmeum Sep 15, 2024
2d87ea9
Better empty files test
fmeum Sep 17, 2024
cc18347
Update docs
fmeum Sep 17, 2024
56b6861
Simplify InputMetadataProvider
fmeum Sep 17, 2024
818f3a0
Cleanup
fmeum Sep 17, 2024
8eadcde
Add docs and test
fmeum Sep 17, 2024
8eb2cef
Simplify supporting both versions
fmeum Sep 17, 2024
786440e
Remove unused methods
fmeum Sep 17, 2024
baa6b38
Fix build
fmeum Sep 18, 2024
f8ea5b2
Address comments
fmeum Sep 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add comments and tests
  • Loading branch information
fmeum committed Sep 17, 2024
commit 2fb002efe1fb2fdd8a8dfb4e7566cb847c3408da
Original file line number Diff line number Diff line change
Expand Up @@ -38,38 +38,50 @@
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.function.Consumer;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import javax.annotation.Nullable;

/** Reconstructs an execution log in expanded format from the compact format representation. */
public final class SpawnLogReconstructor implements MessageInputStream<SpawnExec> {
// pkg/source.txt --> repo: null, path: "pkg/source.txt"
// external/repo/pkg/source.txt --> repo: "repo", path: "pkg/source.txt"
@VisibleForTesting
static final Pattern DEFAULT_SOURCE_FILE_RUNFILES_PATH_PATTERN =
Pattern.compile("(?:external/(?<repo>[^/]+)/)?(?<path>.+)");

// bazel-out/k8-fastbuild/bin/pkg/source.txt --> repo: null, path: "pkg/source.txt"
// blaze-out/k8-fastbuild/bin/external/repo/pkg/source.txt --> repo: "repo", path:
// "pkg/source.txt"
@VisibleForTesting
static final Pattern DEFAULT_GENERATED_FILE_RUNFILES_PATH_PATTERN =
// Examples:
// * bazel-out/k8-fastbuild/bin/pkg/file.txt (repo: null, path: "pkg/file.txt")
// * bazel-out/k8-fastbuild/bin/external/some_repo/pkg/file.txt (repo: "some_repo", path:
// "pkg/file.txt")
private static final Pattern DEFAULT_GENERATED_FILE_RUNFILES_PATH_PATTERN =
Pattern.compile("(?:bazel|blaze)-out/[^/]+/[^/]+/(?:external/(?<repo>[^/]+)/)?(?<path>.+)");

// pkg/source.txt --> repo: null, path: "pkg/source.txt"
// ../repo/pkg/source.txt --> repo: "repo", path: "pkg/source.txt"
@VisibleForTesting
static final Pattern SIBLING_LAYOUT_SOURCE_FILE_RUNFILES_PATH_PATTERN =
Pattern.compile("(?:\\.\\./(?<repo>[^/]+)/)?(?<path>.+)");
// Examples:
// * pkg/file.txt (repo: null, path: "pkg/file.txt")
// * external/some_repo/pkg/file.txt (repo: "some_repo", path: "pkg/file.txt")
private static final Pattern DEFAULT_SOURCE_FILE_RUNFILES_PATH_PATTERN =
Pattern.compile("(?:external/(?<repo>[^/]+)/)?(?<path>.+)");
tjgq marked this conversation as resolved.
Show resolved Hide resolved

@VisibleForTesting
static final Pattern SIBLING_LAYOUT_GENERATED_FILE_RUNFILES_PATH_PATTERN =
Pattern.compile("(?:bazel|blaze)-out/(?:(?<repo>[^/]+(?!/coverage-metadata/)(?=/[^/]+-[^/]+/))/)?[^/]+/[^/]+/(?<path>.+)");
// Examples:
// * bazel-out/k8-fastbuild/bin/pkg/file.txt (repo: null, path: "pkg/file.txt")
// * bazel-out/some_repo/k8-fastbuild/bin/pkg/file.txt (repo: "some_repo", path: "pkg/file.txt")
// * bazel-out/k8-fastbuild/k8-fastbuild/bin/pkg/file.txt (repo: "k8-fastbuild", path:
// "pkg/file.txt")
//
// Repo names are distinguished from mnemonics via a positive lookahead on the following segment,
// which in the case of a repo name is a mnemonic and thus contains a hyphen, whereas a mnemonic
// is followed by an output directory name, which does not contain a hyphen unless it is
// "coverage-metadata" (which in turn is not likely to be a mnemonic).
private static final Pattern SIBLING_LAYOUT_GENERATED_FILE_RUNFILES_PATH_PATTERN =
Pattern.compile(
"(?:bazel|blaze)-out/(?:(?<repo>[^/]+(?=/[^/]+-[^/]+/)(?!/coverage-metadata/))/)?[^/]+/[^/]+/(?<path>.+)");
tjgq marked this conversation as resolved.
Show resolved Hide resolved

// Examples:
// * pkg/file.txt (repo: null, path: "pkg/file.txt")
// * ../some_repo/pkg/file.txt (repo: "some_repo", path: "pkg/file.txt")
private static final Pattern SIBLING_LAYOUT_SOURCE_FILE_RUNFILES_PATH_PATTERN =
Pattern.compile("(?:\\.\\./(?<repo>[^/]+)/)?(?<path>.+)");

private final ZstdInputStream in;

/** Represents a reconstructed input file, symlink, or directory. */
private sealed interface Input {
String path();

Expand All @@ -95,8 +107,7 @@ record Directory(String path, Collection<Protos.File> files) implements Input {}
private final ArrayList<Object> inputMap = new ArrayList<>();
private String hashFunctionName = "";
private String workspaceRunfilesDirectory = "";
private Pattern generatedFileRunfilesPathPattern;
private Pattern sourceFileRunfilesPathPattern;
private boolean siblingRepositoryLayout = false;

public SpawnLogReconstructor(InputStream in) throws IOException {
this.in = new ZstdInputStream(in);
Expand All @@ -113,13 +124,7 @@ public SpawnExec read() throws IOException {
case INVOCATION -> {
hashFunctionName = entry.getInvocation().getHashFunctionName();
workspaceRunfilesDirectory = entry.getInvocation().getWorkspaceRunfilesDirectory();
if (entry.getInvocation().getSiblingRepositoryLayout()) {
generatedFileRunfilesPathPattern = SIBLING_LAYOUT_GENERATED_FILE_RUNFILES_PATH_PATTERN;
sourceFileRunfilesPathPattern = SIBLING_LAYOUT_SOURCE_FILE_RUNFILES_PATH_PATTERN;
} else {
generatedFileRunfilesPathPattern = DEFAULT_GENERATED_FILE_RUNFILES_PATH_PATTERN;
sourceFileRunfilesPathPattern = DEFAULT_SOURCE_FILE_RUNFILES_PATH_PATTERN;
}
siblingRepositoryLayout = entry.getInvocation().getSiblingRepositoryLayout();
}
case FILE -> putInput(entry.getId(), reconstructFile(entry.getFile()));
case DIRECTORY -> putInput(entry.getId(), reconstructDir(entry.getDirectory()));
Expand Down Expand Up @@ -322,35 +327,44 @@ private Input.Directory reconstructRunfilesDir(ExecLogEntry.RunfilesTree runfile
"%s/%s/.runfile".formatted(runfilesTree.getPath(), workspaceRunfilesDirectory);
runfiles.put(dotRunfilePath, File.newBuilder().setPath(dotRunfilePath).build());
}
// Copy to avoid retaining the entire runfiles map.
return new Input.Directory(runfilesTree.getPath(), ImmutableList.copyOf(runfiles.values()));
}

private boolean hasWorkspaceRunfilesDirectory(String path) {
Matcher matcher = generatedFileRunfilesPathPattern.matcher(path);
@VisibleForTesting
static MatchResult extractRunfilesPath(String execPath, boolean siblingRepositoryLayout) {
Matcher matcher =
(siblingRepositoryLayout
? SIBLING_LAYOUT_GENERATED_FILE_RUNFILES_PATH_PATTERN
: DEFAULT_GENERATED_FILE_RUNFILES_PATH_PATTERN)
.matcher(execPath);
if (matcher.matches()) {
return matcher.group("repo") == null;
} else {
matcher = sourceFileRunfilesPathPattern.matcher(path);
Preconditions.checkState(matcher.matches());
return matcher.group("repo") == null;
return matcher;
}
matcher =
(siblingRepositoryLayout
? SIBLING_LAYOUT_SOURCE_FILE_RUNFILES_PATH_PATTERN
: DEFAULT_SOURCE_FILE_RUNFILES_PATH_PATTERN)
.matcher(execPath);
Preconditions.checkState(matcher.matches());
return matcher;
}

private Stream<String> getRunfilesPaths(String originalPath, boolean legacyExternalRunfiles) {
Matcher matcher = generatedFileRunfilesPathPattern.matcher(originalPath);
if (!matcher.matches()) {
matcher = sourceFileRunfilesPathPattern.matcher(originalPath);
}
Preconditions.checkState(matcher.matches());
String repo = matcher.group("repo");
String path = matcher.group("path");
private boolean hasWorkspaceRunfilesDirectory(String path) {
return extractRunfilesPath(path, siblingRepositoryLayout).group("repo") == null;
}

private Stream<String> getRunfilesPaths(String execPath, boolean legacyExternalRunfiles) {
MatchResult matchResult = extractRunfilesPath(execPath, siblingRepositoryLayout);
String repo = matchResult.group("repo");
String repoRelativePath = matchResult.group("path");
if (repo == null) {
return Stream.of(workspaceRunfilesDirectory + "/" + path);
return Stream.of(workspaceRunfilesDirectory + "/" + repoRelativePath);
} else {
Stream.Builder<String> paths = Stream.builder();
paths.add(repo + "/" + path);
paths.add(repo + "/" + repoRelativePath);
if (legacyExternalRunfiles) {
paths.add("%s/external/%s/%s".formatted(workspaceRunfilesDirectory, repo, path));
paths.add("%s/external/%s/%s".formatted(workspaceRunfilesDirectory, repo, repoRelativePath));
}
return paths.build();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package com.google.devtools.build.lib.exec;

import static com.google.common.truth.Truth.assertThat;

import java.util.regex.MatchResult;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

@RunWith(JUnit4.class)
public class SpawnLogReconstructorTest {

@Test
public void extractRunfilesPathDefault() {
assertThat(matchDefault("file.txt")).isEqualTo(new Result(null, "file.txt"));
assertThat(matchDefault("pkg/file.txt")).isEqualTo(new Result(null, "pkg/file.txt"));
assertThat(matchDefault("pkg/external/file.txt"))
.isEqualTo(new Result(null, "pkg/external/file.txt"));
assertThat(matchDefault("external/some_repo/pkg/file.txt"))
.isEqualTo(new Result("some_repo", "pkg/file.txt"));
assertThat(matchDefault("external/some-repo+/pkg/file.txt"))
.isEqualTo(new Result("some-repo+", "pkg/file.txt"));
assertThat(matchDefault("bazel-out/k8-fastbuild/bin/pkg/file.txt"))
.isEqualTo(new Result(null, "pkg/file.txt"));
assertThat(matchDefault("bazel-out/k8-fastbuild/bin/pkg/external/file.txt"))
.isEqualTo(new Result(null, "pkg/external/file.txt"));
assertThat(matchDefault("bazel-out/k8-fastbuild/bin/external/some_repo/pkg/file.txt"))
.isEqualTo(new Result("some_repo", "pkg/file.txt"));
assertThat(matchDefault("bazel-out/k8-fastbuild/bin/external/some-repo+/pkg/file.txt"))
.isEqualTo(new Result("some-repo+", "pkg/file.txt"));
}

@Test
public void extractRunfilesPathSibling() {
assertThat(matchSibling("file.txt")).isEqualTo(new Result(null, "file.txt"));
assertThat(matchSibling("pkg/file.txt")).isEqualTo(new Result(null, "pkg/file.txt"));
assertThat(matchSibling("pkg/external/file.txt"))
.isEqualTo(new Result(null, "pkg/external/file.txt"));
assertThat(matchSibling("external/pkg/file.txt"))
.isEqualTo(new Result(null, "external/pkg/file.txt"));
assertThat(matchSibling("../some_repo/pkg/file.txt"))
.isEqualTo(new Result("some_repo", "pkg/file.txt"));
assertThat(matchSibling("../some-repo+/pkg/file.txt"))
.isEqualTo(new Result("some-repo+", "pkg/file.txt"));
assertThat(matchSibling("bazel-out/k8-fastbuild/bin/pkg/file.txt"))
.isEqualTo(new Result(null, "pkg/file.txt"));
assertThat(matchSibling("bazel-out/k8-fastbuild/bin/pkg/external/file.txt"))
.isEqualTo(new Result(null, "pkg/external/file.txt"));
assertThat(matchSibling("bazel-out/k8-fastbuild/bin/external/pkg/file.txt"))
.isEqualTo(new Result(null, "external/pkg/file.txt"));
assertThat(matchSibling("bazel-out/some_repo/k8-fastbuild/bin/pkg/file.txt"))
.isEqualTo(new Result("some_repo", "pkg/file.txt"));
assertThat(matchSibling("bazel-out/some-repo+/k8-fastbuild/bin/pkg/file.txt"))
.isEqualTo(new Result("some-repo+", "pkg/file.txt"));
assertThat(matchSibling("bazel-out/k8-fastbuild/coverage-metadata/bin/other/pkg/gen.txt"))
.isEqualTo(new Result(null, "bin/other/pkg/gen.txt"));
assertThat(
matchSibling(
"bazel-out/some_repo/k8-fastbuild/coverage-metadata/bin/other/pkg/gen.txt"))
.isEqualTo(new Result("some_repo", "bin/other/pkg/gen.txt"));
}

private record Result(String repo, String path) {}

private static Result matchDefault(String path) {
MatchResult result =
SpawnLogReconstructor.extractRunfilesPath(path, /* siblingRepositoryLayout= */ false);
return new Result(result.group("repo"), result.group("path"));
}

private static Result matchSibling(String path) {
MatchResult result =
SpawnLogReconstructor.extractRunfilesPath(path, /* siblingRepositoryLayout= */ true);
return new Result(result.group("repo"), result.group("path"));
}
}