Skip to content

Commit

Permalink
Add compare_patch_filename.py (#2934)
Browse files Browse the repository at this point in the history
  • Loading branch information
li-boxuan committed Jul 15, 2024
1 parent 214f728 commit b834b35
Showing 1 changed file with 55 additions and 0 deletions.
55 changes: 55 additions & 0 deletions evaluation/swe_bench/scripts/setup/compare_patch_filename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""
This script compares gold patches with OpenDevin-generated patches and check whether
OpenDevin found the right (set of) files to modify.
"""

import argparse
import json
import re


def extract_modified_files(patch):
modified_files = set()
file_pattern = re.compile(r'^diff --git a/(.*?) b/')

for line in patch.split('\n'):
match = file_pattern.match(line)
if match:
modified_files.add(match.group(1))

return modified_files


def process_report(od_output_file):
succ = 0
fail = 0
for line in open(od_output_file):
line = json.loads(line)
instance_id = line['instance_id']
gold_patch = line['swe_instance']['patch']
generated_patch = line['git_patch']
gold_modified_files = extract_modified_files(gold_patch)
# swe-bench lite only: a gold patch always contains exactly one file
assert len(gold_modified_files) == 1
generated_modified_files = extract_modified_files(generated_patch)

# Check if all files in gold_patch are also in generated_patch
all_files_in_generated = gold_modified_files.issubset(generated_modified_files)
if all_files_in_generated:
succ += 1
else:
fail += 1
print(
f'{instance_id}: file mismatch, gold = {gold_modified_files}, generated = {generated_modified_files}'
)
print(
f'\nSUMMARY: {succ} out of {succ + fail} instances found correct files to edit, success rate = {succ / float(succ + fail)}'
)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--od_output_file', help='Path to the OD output file')
args = parser.parse_args()

process_report(args.od_output_file)

0 comments on commit b834b35

Please sign in to comment.