Add compare_patch_filename.py

All-Hands-AI · xingyaoww · Jul 15, 2024 · Jul 15, 2024 · Jul 15, 2024 · f70bba9a43b51184903cff4d0e411a5affbfc79a
commit f70bba9a43b51184903cff4d0e411a5affbfc79a
diff --git a/evaluation/swe_bench/scripts/setup/compare_patch_filename.py b/evaluation/swe_bench/scripts/setup/compare_patch_filename.py
@@ -0,0 +1,55 @@
+"""
+This script compares gold patches with OpenDevin-generated patches and check whether
+OpenDevin found the right (set of) files to modify.
+"""
+
+import argparse
+import json
+import re
+
+
+def extract_modified_files(patch):
+    modified_files = set()
+    file_pattern = re.compile(r'^diff --git a/(.*?) b/')
+
+    for line in patch.split('\n'):
+        match = file_pattern.match(line)
+        if match:
+            modified_files.add(match.group(1))
+
+    return modified_files
+
+
+def process_report(od_output_file):
+    succ = 0
+    fail = 0
+    for line in open(od_output_file):
+        line = json.loads(line)
+        instance_id = line['instance_id']
+        gold_patch = line['swe_instance']['patch']
+        generated_patch = line['git_patch']
+        gold_modified_files = extract_modified_files(gold_patch)
+        # swe-bench lite only: a gold patch always contains exactly one file
+        assert len(gold_modified_files) == 1
+        generated_modified_files = extract_modified_files(generated_patch)
+
+        # Check if all files in gold_patch are also in generated_patch
+        all_files_in_generated = gold_modified_files.issubset(generated_modified_files)
+        if all_files_in_generated:
+            succ += 1
+        else:
+            fail += 1
+            print(
+                f'{instance_id}: file mismatch, gold = {gold_modified_files}, generated = {generated_modified_files}'
+            )
+    print(
+        f'\nSUMMARY: {succ} out of {succ + fail} instances found correct files to edit, success rate = {succ / float(succ + fail)}'
+    )
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--od_output_file', help='Path to the OD output file')
+    args = parser.parse_args()
+
+    process_report(args.od_output_file)