Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve chat context eval runner #5722

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions agent/src/cli/command-bench/command-bench.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { isDefined, modelsService } from '@sourcegraph/cody-shared'
import { sleep } from '../../../../vscode/src/completions/utils'
import { setStaticResolvedConfigurationWithAuthCredentials } from '../../../../vscode/src/configuration'
import { localStorage } from '../../../../vscode/src/services/LocalStorageProvider'
import { createOrUpdateTelemetryRecorderProvider } from '../../../../vscode/src/services/telemetry-v2'
import { startPollyRecording } from '../../../../vscode/src/testutils/polly'
import { dotcomCredentials } from '../../../../vscode/src/testutils/testing-credentials'
import { allClientCapabilitiesEnabled } from '../../allClientCapabilitiesEnabled'
Expand Down Expand Up @@ -352,6 +353,8 @@ export const benchCommand = new commander.Command('bench')
async function evaluateWorkspace(options: CodyBenchOptions, recordingDirectory: string): Promise<void> {
console.log(`starting evaluation: fixture=${options.fixture.name} workspace=${options.workspace}`)

createOrUpdateTelemetryRecorderProvider(true)

const workspaceRootUri = vscode.Uri.from({ scheme: 'file', path: options.workspace })

const baseGlobalState: Record<string, any> = {}
Expand Down
34 changes: 34 additions & 0 deletions agent/src/cli/command-bench/strategy-chat-context-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,25 @@ import { parse } from 'csv-parse/sync'
import { createObjectCsvWriter } from 'csv-writer'
import { mkdirp } from 'fs-extra'
import isError from 'lodash/isError'
import { stringify as yamlStringify } from 'yaml'

export interface ClientOptions {
rewrite: boolean
}

export interface EvalOutput {
evaluatedAt: string
codyClientVersion: string
clientOptions: ClientOptions
siteUserMetadata: {
url: string
sourcegraphVersion: string
username: string
userId: string
evaluatedFeatureFlags: Record<string, boolean>
}
examples: ExampleOutput[]
}

export interface EvalContextItem {
repoName: string
Expand Down Expand Up @@ -159,6 +178,21 @@ export async function readExamplesFromCSV(filePath: string): Promise<{
}
}

/**
* Note: this mutates evalOutput to remove the content field from actualContext context items.
*/
export async function writeYAMLMetadata(outputFile: string, evalOutput: EvalOutput): Promise<void> {
await mkdirp(path.dirname(outputFile))

for (const example of evalOutput.examples) {
for (const contextItem of example.actualContext) {
contextItem.content = undefined
}
}

await fs.writeFile(outputFile, yamlStringify(evalOutput))
}

export async function writeExamplesToCSV(outputFile: string, examples: ExampleOutput[]): Promise<void> {
await mkdirp(path.dirname(outputFile))
const csvWriter = createObjectCsvWriter({
Expand Down
94 changes: 77 additions & 17 deletions agent/src/cli/command-bench/strategy-chat-context.ts
Original file line number Diff line number Diff line change
@@ -1,55 +1,106 @@
import path from 'node:path'
import { graphqlClient, isError } from '@sourcegraph/cody-shared'
import {
PromptString,
SourcegraphCompletionsClient,

Check failure on line 4 in agent/src/cli/command-bench/strategy-chat-context.ts

View workflow job for this annotation

GitHub Actions / build

This import is unused.

Check failure on line 4 in agent/src/cli/command-bench/strategy-chat-context.ts

View workflow job for this annotation

GitHub Actions / test-unit (ubuntu, 20)

'SourcegraphCompletionsClient' is declared but its value is never read.

Check failure on line 4 in agent/src/cli/command-bench/strategy-chat-context.ts

View workflow job for this annotation

GitHub Actions / test-unit (windows, 20)

'SourcegraphCompletionsClient' is declared but its value is never read.

Check failure on line 4 in agent/src/cli/command-bench/strategy-chat-context.ts

View workflow job for this annotation

GitHub Actions / test-unit (ubuntu, 18)

'SourcegraphCompletionsClient' is declared but its value is never read.
graphqlClient,
isError,
} from '@sourcegraph/cody-shared'
import { SourcegraphNodeCompletionsClient } from '../../../../vscode/src/completions/nodeClient'
import { rewriteKeywordQuery } from '../../../../vscode/src/local-context/rewrite-keyword-query'
import { version } from '../../../package.json'
import type { RpcMessageHandler } from '../../jsonrpc-alias'
import type { CodyBenchOptions } from './command-bench'
import {
type ClientOptions,
type EvalContextItem,
type Example,
type ExampleOutput,
contextItemFromString,
contextItemToString,
readExamplesFromCSV,
writeExamplesToCSV,
writeYAMLMetadata,
} from './strategy-chat-context-types'

export async function evaluateChatContextStrategy(
client: RpcMessageHandler,
options: CodyBenchOptions
): Promise<void> {
const inputFilename = options.fixture.customConfiguration?.['cody-bench.chatContext.inputFile']
if (options.insecureTls) {
process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'
}
if (!inputFilename) {
throw new Error(
'Missing cody-bench.chatContext.inputFile. To fix this problem, add "customConfiguration": { "cody-bench.chatContext.inputFile": "examples.csv" } to the cody-bench JSON config.'
)
}
const outputFilename = options.fixture.customConfiguration?.['cody-bench.chatContext.outputFile']
if (!outputFilename) {
throw new Error(
'Missing cody-bench.chatContext.outputFile. To fix this problem, add "customConfiguration": { "cody-bench.chatContext.outputFile": "output.csv" } to the cody-bench JSON config.'
)
const inputBasename = path.basename(inputFilename).replace(/\.csv$/, '')

const clientOptions: ClientOptions = options.fixture.customConfiguration?.[
'cody-bench.chatContext.clientOptions'
] ?? {
rewrite: false,
}
const inputFile = path.join(options.workspace, inputFilename)
const outputFile = path.join(options.snapshotDirectory, outputFilename)

if (options.insecureTls) {
process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'
const siteVersion = await graphqlClient.getSiteVersion()
if (isError(siteVersion)) {
throw siteVersion
}
const userInfo = await graphqlClient.getCurrentUserInfo()
if (isError(userInfo)) {
throw userInfo
}
const evaluatedFeatureFlags = await graphqlClient.getEvaluatedFeatureFlags()
if (isError(evaluatedFeatureFlags)) {
throw evaluatedFeatureFlags
}
const shortSiteVersion = siteVersion.match(/-[0-9a-f]{7,40}$/)
? siteVersion.match(/-([0-9a-f]{7,40})$/)?.[1]
: siteVersion
const currentTimestamp = new Date().toISOString()

const outputBase = `${inputBasename}__${shortSiteVersion}`
const outputCSVFilename = `${outputBase}.csv`
const outputYAMLFilename = `${outputBase}.yaml`

const inputFile = path.join(options.workspace, inputFilename)
const outputCSVFile = path.join(options.snapshotDirectory, outputCSVFilename)
const outputYAMLFile = path.join(options.snapshotDirectory, outputYAMLFilename)

const { examples, ignoredRecords } = await readExamplesFromCSV(inputFile)

console.error(`ignoring ${ignoredRecords.length} malformed rows`)
if (!outputFile) {
throw new Error('no output file specified')
if (ignoredRecords.length > 0) {
console.log(`⚠ ignoring ${ignoredRecords.length} malformed rows`)
}

await runContextCommand(examples, outputFile)
const outputs = await runContextCommand({ rewrite: clientOptions.rewrite }, examples)
const codyClientVersion = process.env.CODY_COMMIT ?? version
await writeExamplesToCSV(outputCSVFile, outputs)
await writeYAMLMetadata(outputYAMLFile, {
evaluatedAt: currentTimestamp,
clientOptions,
codyClientVersion,
siteUserMetadata: {
url: options.srcEndpoint,
sourcegraphVersion: siteVersion,
username: userInfo?.username ?? '[none]',
userId: userInfo?.id ?? '[none]',
evaluatedFeatureFlags,
},
examples: outputs,
})
}

async function runContextCommand(examples: Example[], outputFile: string): Promise<void> {
async function runContextCommand(
clientOps: ClientOptions,
examples: Example[]
): Promise<ExampleOutput[]> {
const completionsClient = new SourcegraphNodeCompletionsClient()
const exampleOutputs: ExampleOutput[] = []

for (const example of examples) {
const { targetRepoRevs, query, essentialContext } = example
const { targetRepoRevs, query: origQuery, essentialContext } = example
const repoNames = targetRepoRevs.map(repoRev => repoRev.repoName)
const repoIDNames = await graphqlClient.getRepoIds(repoNames, repoNames.length + 10)
if (isError(repoIDNames)) {
Expand All @@ -63,6 +114,15 @@
)
}
const repoIDs = repoIDNames.map(repoIDName => repoIDName.id)

let query = origQuery
if (clientOps.rewrite) {
query = await rewriteKeywordQuery(
completionsClient,
PromptString.unsafe_fromUserQuery(origQuery)

Check failure on line 122 in agent/src/cli/command-bench/strategy-chat-context.ts

View workflow job for this annotation

GitHub Actions / safe-prompts-lint

New `unsafe_fromUserQuery` invocation found. This is not safe. Please use one of the PromptString helpers instead.
)
}

const resultsResp = await graphqlClient.contextSearch({
repoIDs,
query,
Expand Down Expand Up @@ -94,7 +154,7 @@
})
}

await writeExamplesToCSV(outputFile, exampleOutputs)
return exampleOutputs
}

function contextOverlaps(
Expand Down
Loading