Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize representation of runfiles in compact execution log #23321

Closed
wants to merge 51 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
7b47753
Add RunfilesTree proto
fmeum Aug 16, 2024
09bd497
WIP
fmeum Aug 16, 2024
287b3a8
Fix build
fmeum Aug 16, 2024
86ff166
Basic test setup
fmeum Aug 16, 2024
bcfc317
WIP
fmeum Aug 16, 2024
528d1be
Almost passing tests
fmeum Aug 16, 2024
13564a1
Simplify and support legacyExternalRunfiles
fmeum Aug 16, 2024
6b93b38
Fix tests
fmeum Aug 21, 2024
3f9e234
Fix tests and add docs
fmeum Aug 21, 2024
0ac5e81
Explain order
fmeum Aug 21, 2024
1e02f91
Fix test
fmeum Aug 22, 2024
80f81ac
First comments
fmeum Aug 29, 2024
cf0df2b
WIP
fmeum Aug 29, 2024
181901e
Basic runfiles test
fmeum Aug 30, 2024
eafc3e7
Key on legacyExternalRunfiles
fmeum Aug 30, 2024
880b67c
Fix collisions
fmeum Aug 30, 2024
4efa0f4
Improve collision tests
fmeum Aug 30, 2024
3806f46
Add root symlink collision test
fmeum Aug 30, 2024
8989415
Add test for `.runfile` file
fmeum Aug 30, 2024
4f321a7
Fix .runfile logic
fmeum Aug 30, 2024
4711a24
Test for cross type collision
fmeum Aug 30, 2024
06aaf17
Test postorder collisions
fmeum Aug 30, 2024
d63c480
Do not distinguish IDs by type
fmeum Aug 30, 2024
9d3560b
Store dense IDs
fmeum Aug 30, 2024
595613d
Optimize IDs
fmeum Aug 31, 2024
152a60f
Add better postorder test
fmeum Aug 31, 2024
a85be80
Test symlinks
fmeum Aug 31, 2024
0953237
Fix post order
fmeum Aug 31, 2024
8f29296
Cleanup
fmeum Aug 31, 2024
e8f854b
Add logEntryWithoutId
fmeum Sep 1, 2024
7e3df9a
Simplify ID check
fmeum Sep 1, 2024
6c245cc
Sibling test
fmeum Sep 1, 2024
5d4642e
Test with sibling layout
fmeum Sep 2, 2024
64d2c82
Encode sibling
fmeum Sep 2, 2024
a66984b
Implement support for sibling layout
fmeum Sep 2, 2024
2fb002e
Add comments and tests
fmeum Sep 2, 2024
a4413d4
More docs and uint32
fmeum Sep 2, 2024
961218c
Don't retain test runfiles trees
fmeum Sep 2, 2024
56d7a30
Fix build
fmeum Sep 2, 2024
8637fe7
Add more docs
fmeum Sep 2, 2024
b5efe8a
Inline var
fmeum Sep 2, 2024
dab1a5a
Do not flatten symlinks
fmeum Sep 15, 2024
2d87ea9
Better empty files test
fmeum Sep 17, 2024
cc18347
Update docs
fmeum Sep 17, 2024
56b6861
Simplify InputMetadataProvider
fmeum Sep 17, 2024
818f3a0
Cleanup
fmeum Sep 17, 2024
8eadcde
Add docs and test
fmeum Sep 17, 2024
8eb2cef
Simplify supporting both versions
fmeum Sep 17, 2024
786440e
Remove unused methods
fmeum Sep 17, 2024
baa6b38
Fix build
fmeum Sep 18, 2024
f8ea5b2
Address comments
fmeum Sep 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix post order
  • Loading branch information
fmeum committed Sep 17, 2024
commit 09532374b5e202a079fd4b32be3d83e657c5d250
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.function.Supplier;
import java.util.function.BiConsumer;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -137,13 +138,15 @@ private SpawnExec reconstructSpawnExec(ExecLogEntry.Spawn entry) throws IOExcept
builder.setPlatform(entry.getPlatform());
}

SortedMap<String, File> inputs = reconstructInputs(entry.getInputSetId(), TreeMap::new).inputs;
SortedMap<String, File> toolInputs =
reconstructInputs(entry.getToolSetId(), TreeMap::new).inputs;
SortedMap<String, File> inputs = new TreeMap<>();
visitPostOrder(entry.getInputSetId(), inputs::put, /* hasMainRepoInput= */ true);
HashSet<String> toolInputs = new HashSet<>();
visitPostOrder(
entry.getToolSetId(), (path, file) -> toolInputs.add(path), /* hasMainRepoInput= */ false);

for (Map.Entry<String, File> e : inputs.entrySet()) {
File file = e.getValue();
if (toolInputs.containsKey(e.getKey())) {
if (toolInputs.contains(e.getKey())) {
file = file.toBuilder().setIsTool(true).build();
}
builder.addInputs(file);
Expand Down Expand Up @@ -182,44 +185,55 @@ private SpawnExec reconstructSpawnExec(ExecLogEntry.Spawn entry) throws IOExcept
private record FlattenedInputSet<T extends Map<String, File>>(
T inputs, boolean hasWorkspaceRunfilesDirectory) {}

private <T extends Map<String, File>> FlattenedInputSet<T> reconstructInputs(
int setId, Supplier<T> newMap) throws IOException {
T inputs = newMap.get();
ArrayDeque<Integer> setsToVisit = new ArrayDeque<>();
HashSet<Integer> visited = new HashSet<>();
boolean hasWorkspaceRunfilesDirectory = false;
if (setId != 0) {
setsToVisit.addLast(setId);
visited.add(setId);
private boolean visitPostOrder(
int setId, BiConsumer<String, File> consumer, boolean hasMainRepoInput) throws IOException {
if (setId == 0) {
return hasMainRepoInput;
}
ArrayDeque<Integer> setsToVisit = new ArrayDeque<>();
HashMap<Integer, Integer> previousVisitCount = new HashMap<>();
setsToVisit.push(setId);
while (!setsToVisit.isEmpty()) {
ExecLogEntry.InputSet set = getInputSet(setsToVisit.removeFirst());
for (int inputId : set.getInputIdsList()) {
if (!visited.add(inputId)) {
continue;
}
Input input = getInput(inputId);
switch (input) {
case Input.File(File file) -> inputs.put(file.getPath(), file);
case Input.Directory(String ignored, Collection<File> files) -> {
for (File dirFile : files) {
inputs.put(dirFile.getPath(), dirFile);
int currentSetId = setsToVisit.pop();
// In case order matters (it does for runfiles, but not for inputs), we visit the set in
// post-order (corresponds to Order#COMPILE_ORDER). Transitive sets are visited before direct
// children; both are visited in left-to-right order.
switch (previousVisitCount.merge(currentSetId, 0, (oldValue, newValue) -> 1)) {
case 0 -> {
// First visit, queue transitive sets for visit before revisiting the current set.
setsToVisit.push(currentSetId);
for (int transitiveSetId :
getInputSet(currentSetId).getTransitiveSetIdsList().reversed()) {
if (!previousVisitCount.containsKey(transitiveSetId)) {
setsToVisit.push(transitiveSetId);
}
}
case Input.Symlink(File symlink) -> inputs.put(symlink.getPath(), symlink);
}
// This is bug-for-bug compatible with the implementation in Runfiles by considering
// an empty non-external directory as a runfiles entry under the workspace runfiles
// directory even though it won't be materialized as one.
hasWorkspaceRunfilesDirectory |= !EXTERNAL_PREFIX_PATTERN.matcher(input.path()).matches();
}
for (int transitiveSetId : set.getTransitiveSetIdsList()) {
if (visited.add(transitiveSetId)) {
setsToVisit.addLast(transitiveSetId);
case 1 -> {
// Second visit, visit the direct inputs only.
for (int inputId : getInputSet(currentSetId).getInputIdsList()) {
if (previousVisitCount.put(inputId, 1) != null) {
continue;
}
Input input = getInput(inputId);
switch (input) {
case Input.File(File file) -> consumer.accept(file.getPath(), file);
case Input.Directory(String ignored, Collection<File> files) -> {
for (File dirFile : files) {
consumer.accept(dirFile.getPath(), dirFile);
}
}
case Input.Symlink(File symlink) -> consumer.accept(symlink.getPath(), symlink);
}
// This is bug-for-bug compatible with the implementation in Runfiles by considering
// an empty non-external directory as a runfiles entry under the workspace runfiles
// directory even though it won't be materialized as one.
hasMainRepoInput |= !EXTERNAL_PREFIX_PATTERN.matcher(input.path()).matches();
}
}
}
}
return new FlattenedInputSet<>(inputs, hasWorkspaceRunfilesDirectory);
return hasMainRepoInput;
}

private Input.Directory reconstructDir(ExecLogEntry.Directory dir) {
Expand Down Expand Up @@ -258,10 +272,8 @@ private Input.Directory reconstructRunfilesDir(ExecLogEntry.RunfilesTree runfile
throws IOException {
// Preserve the order of the inputs to resolve conflicts in the same order as the real Runfiles
// implementation.
var flattenedInputs = reconstructInputs(runfilesTree.getInputSetId(), LinkedHashMap::new);
boolean hasWorkspaceRunfilesDirectory = flattenedInputs.hasWorkspaceRunfilesDirectory;
LinkedHashMap<String, File> builder =
new LinkedHashMap<>(runfilesTree.getSymlinkTargetIdCount() + flattenedInputs.inputs.size());
LinkedHashMap<String, File> builder = new LinkedHashMap<>();
boolean hasWorkspaceRunfilesDirectory = false;
for (var symlink : runfilesTree.getSymlinkTargetIdMap().entrySet()) {
hasWorkspaceRunfilesDirectory |=
symlink.getKey().startsWith(workspaceRunfilesDirectory + "/");
Expand All @@ -270,7 +282,11 @@ private Input.Directory reconstructRunfilesDir(ExecLogEntry.RunfilesTree runfile
builder.put(newPath, file);
}
}
Collection<File> inputs = flattenedInputs.inputs.values();
LinkedHashMap<String, File> flattenedInputs = new LinkedHashMap<>();
hasWorkspaceRunfilesDirectory =
visitPostOrder(
runfilesTree.getInputSetId(), flattenedInputs::put, hasWorkspaceRunfilesDirectory);
Collection<File> inputs = flattenedInputs.values();
inputs.stream()
.flatMap(
file ->
Expand Down