Skip to content

Commit

Permalink
[ch] Dr. CI to use CH Part 2 (#5637)
Browse files Browse the repository at this point in the history
Follow up to #5634. These are
the one off queries that I needed to track down.

Surprisingly, none of the current PRs go through this path, so I used
some older PRs, like 135397, 114772, as tests
  • Loading branch information
clee2000 committed Sep 16, 2024
1 parent e4b265a commit 9a8f1a1
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 138 deletions.
28 changes: 6 additions & 22 deletions torchci/lib/commitUtils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import getRocksetClient from "lib/rockset";
import { queryClickhouse } from "./clickhouse";

const ELIGIBLE_COMMITS_FOR_SIMILAR_FAILURE_CHECK: { [sha: string]: boolean } =
{};
Expand All @@ -19,33 +19,17 @@ export async function isEligibleCommitForSimilarFailureCheck(
const query = `
SELECT DISTINCT
last_commit_sha,
merge_commit_sha,
_event_time
merge_commit_sha
FROM
commons.merges
default.merges
WHERE
(
last_commit_sha = : sha
last_commit_sha = {sha: String}
AND merge_commit_sha != ''
)
OR merge_commit_sha = : sha
OR merge_commit_sha = {sha: String}
`;

const rocksetClient = getRocksetClient();
const results = (
await rocksetClient.queries.query({
sql: {
query: query,
parameters: [
{
name: "sha",
type: "string",
value: sha,
},
],
},
})
).results;
const results = await queryClickhouse(query, { sha });

ELIGIBLE_COMMITS_FOR_SIMILAR_FAILURE_CHECK[sha] =
results !== undefined && results.length !== 0 ? true : false;
Expand Down
44 changes: 42 additions & 2 deletions torchci/lib/drciUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { fetchIssuesByLabelCH } from "lib/fetchIssuesByLabel";
import {
hasS3Log,
isFailureFromPrevMergeCommit,
isSameAuthor,
isSameFailure,
} from "lib/jobUtils";
import { MAX_SIZE, OLDEST_FIRST, querySimilarFailures } from "lib/searchUtils";
Expand All @@ -15,6 +14,11 @@ import _ from "lodash";
import { Octokit } from "octokit";
import { isDrCIEnabled, isPyTorchPyTorch, isTime0, TIME_0 } from "./bot/utils";
import { queryClickhouseSaved } from "./clickhouse";
// Import itself to ensure that mocks can be applied, see
// https://stackoverflow.com/questions/51900413/jest-mock-function-doesnt-work-while-it-was-called-in-the-other-function
// https://stackoverflow.com/questions/45111198/how-to-mock-functions-in-the-same-module-using-jest
import * as thisModule from "./drciUtils";
import { getAuthors } from "./getAuthors";
import { IssueData } from "./types";
dayjs.extend(utc);

Expand Down Expand Up @@ -344,7 +348,7 @@ export async function hasSimilarFailures(
isSameFailure(job, failure) &&
// Run this check last because it costs one query to query for the commit
// author of the failure
!(await isSameAuthor(job, failure)) &&
!(await thisModule.isSameAuthor(job, failure)) &&
foundSimilarFailure === undefined
) {
// Save the first similar failure (the one with the highest score) and continue
Expand Down Expand Up @@ -489,3 +493,39 @@ export async function getPRMergeCommits(
return acc;
}, new Map<number, string[]>());
}

export async function isSameAuthor(
job: RecentWorkflowsData,
failure: RecentWorkflowsData
): Promise<boolean> {
const authors = await getAuthors([job, failure]);
// Extract the authors for each job
const jobAuthor =
job.head_sha in authors
? authors[job.head_sha]
: { email: "", commit_username: "", pr_username: "" };
const failureAuthor =
failure.head_sha in authors
? authors[failure.head_sha]
: { email: "", commit_username: "", pr_username: "" };

const isSameEmail =
jobAuthor.email !== "" &&
failureAuthor.email !== "" &&
jobAuthor.email === failureAuthor.email;
const isSameCommitUsername =
jobAuthor.commit_username !== "" &&
failureAuthor.commit_username !== "" &&
jobAuthor.commit_username === failureAuthor.commit_username;
const isSamePrUsername =
jobAuthor.pr_username !== "" &&
failureAuthor.pr_username !== "" &&
jobAuthor.pr_username === failureAuthor.pr_username;

// This function exists because we don't want to wrongly count similar failures
// from commits of the same author as flaky. Some common cases include:
// * ghstack
// * Draft commit
// * Cherry picking
return isSameEmail || isSameCommitUsername || isSamePrUsername;
}
86 changes: 11 additions & 75 deletions torchci/lib/getAuthors.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { RecentWorkflowsData } from "lib/types";
import _ from "lodash";
import getRocksetClient from "./rockset";
import { queryClickhouse } from "./clickhouse";

// NB: Surprisingly, jest cannot mock function in the same module so we need to
// keep this function here in its own module so that it can be mocked. See the
Expand All @@ -21,84 +21,20 @@ export async function getAuthors(jobs: RecentWorkflowsData[]): Promise<{
// events. We need both events because pull_request is when a PR is created while
// push is for a commit is pushed in PR or committed into trunk
const query = `
WITH email AS (
SELECT
w.head_commit.id AS sha,
w.head_commit.author.email
FROM
commons.workflow_run w
WHERE
ARRAY_CONTAINS(
SPLIT(: shas, ','),
w.head_commit.id
)
GROUP BY
sha,
email
),
commit_username AS (
SELECT
p.after AS sha,
p.head_commit.author.username
FROM
commons.push p
WHERE
ARRAY_CONTAINS(
SPLIT(: shas, ','),
p.after
)
GROUP BY
sha,
username
),
pr_username AS (
SELECT
pr.head.sha AS sha,
pr.user.login
FROM
commons.pull_request pr
WHERE
ARRAY_CONTAINS(
SPLIT(: shas, ','),
pr.head.sha
)
GROUP BY
sha,
login
)
SELECT
email.sha,
sha,
email,
IF(
commit_username.username IS NULL,
'', commit_username.username
) AS commit_username,
IF(
pr_username.login IS NULL, '', pr_username.login
) AS pr_username,
username AS commit_username,
login as pr_username
FROM
email
LEFT JOIN commit_username ON email.sha = commit_username.sha
LEFT JOIN pr_username ON email.sha = pr_username.sha
materialized_views.commit_authors final
where
sha in {shas: Array(String)}
`;

const rocksetClient = getRocksetClient();
const results = (
await rocksetClient.queries.query({
sql: {
query: query,
parameters: [
{
name: "shas",
type: "string",
value: _.map(jobs, (job: RecentWorkflowsData) => job.head_sha).join(
","
),
},
],
},
})
).results;
const results = await queryClickhouse(query, {
shas: _.map(jobs, (job: RecentWorkflowsData) => job.head_sha),
});

return results !== undefined ? _.keyBy(results, (record) => record.sha) : {};
return _.keyBy(results, (record) => record.sha);
}
37 changes: 0 additions & 37 deletions torchci/lib/jobUtils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import dayjs from "dayjs";
import { jaroWinkler } from "jaro-winkler-typescript";
import { getAuthors } from "lib/getAuthors";
import {
BasicJobData,
IssueData,
Expand Down Expand Up @@ -331,42 +330,6 @@ export async function backfillMissingLog(
return res.status === 200;
}

export async function isSameAuthor(
job: RecentWorkflowsData,
failure: RecentWorkflowsData
): Promise<boolean> {
const authors = await getAuthors([job, failure]);
// Extract the authors for each job
const jobAuthor =
job.head_sha in authors
? authors[job.head_sha]
: { email: "", commit_username: "", pr_username: "" };
const failureAuthor =
failure.head_sha in authors
? authors[failure.head_sha]
: { email: "", commit_username: "", pr_username: "" };

const isSameEmail =
jobAuthor.email !== "" &&
failureAuthor.email !== "" &&
jobAuthor.email === failureAuthor.email;
const isSameCommitUsername =
jobAuthor.commit_username !== "" &&
failureAuthor.commit_username !== "" &&
jobAuthor.commit_username === failureAuthor.commit_username;
const isSamePrUsername =
jobAuthor.pr_username !== "" &&
failureAuthor.pr_username !== "" &&
jobAuthor.pr_username === failureAuthor.pr_username;

// This function exists because we don't want to wrongly count similar failures
// from commits of the same author as flaky. Some common cases include:
// * ghstack
// * Draft commit
// * Cherry picking
return isSameEmail || isSameCommitUsername || isSamePrUsername;
}

export function isFailureFromPrevMergeCommit(
failure: RecentWorkflowsData,
mergeCommits: String[]
Expand Down
3 changes: 2 additions & 1 deletion torchci/test/drciUtils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { TIME_0 } from "lib/bot/utils";
import { JobData, RecentWorkflowsData } from "lib/types";
import nock from "nock";
import * as commitUtils from "../lib/commitUtils";
import * as drciUtils from "../lib/drciUtils";
import {
getSuppressedLabels,
hasSimilarFailures,
Expand Down Expand Up @@ -47,7 +48,7 @@ describe("Test various utils used by Dr.CI", () => {

const mock = jest.spyOn(searchUtils, "searchSimilarFailures");
mock.mockImplementation(() => Promise.resolve({ jobs: [] }));
const mockJobUtils = jest.spyOn(jobUtils, "isSameAuthor");
const mockJobUtils = jest.spyOn(drciUtils, "isSameAuthor");
mockJobUtils.mockImplementation(() => Promise.resolve(false));
const mockCommitUtils = jest.spyOn(
commitUtils,
Expand Down
2 changes: 1 addition & 1 deletion torchci/test/jobUtils.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { TIME_0 } from "lib/bot/utils";
import { isSameAuthor } from "lib/drciUtils";
import {
BasicJobData,
IssueData,
Expand All @@ -13,7 +14,6 @@ import {
isDisabledTestMentionedInPR,
isFailureFromPrevMergeCommit,
isRecentlyCloseDisabledTest,
isSameAuthor,
isSameContext,
isSameFailure,
removeCancelledJobAfterRetry,
Expand Down

0 comments on commit 9a8f1a1

Please sign in to comment.