From 2b5ad9812f24e5d89098d8d62e33eb32e19e7087 Mon Sep 17 00:00:00 2001 From: Jiaxin-Pei Date: Mon, 18 Mar 2024 23:50:55 -0400 Subject: [PATCH 1/6] Creating the folder for experiments, adding initial analysis of Devin's outputs on SWE-bench --- experiments/README.md | 11 + .../SWE-bench/data/devin_output_diffs.csv | 37687 ++++++++++++++++ .../notebooks/devin_eval_analysis.ipynb | 193 + 3 files changed, 37891 insertions(+) create mode 100644 experiments/README.md create mode 100644 experiments/SWE-bench/data/devin_output_diffs.csv create mode 100644 experiments/SWE-bench/notebooks/devin_eval_analysis.ipynb diff --git a/experiments/README.md b/experiments/README.md new file mode 100644 index 000000000000..d2b1bac65745 --- /dev/null +++ b/experiments/README.md @@ -0,0 +1,11 @@ +# Experiments and Evaluations + +This folder contains code and resources to run experiments and evaluations. Ideally +each subfolder contains a specific benchmark or experiment. + + +## SWE-bench +- data + - devin_output_diffs.csv: the outputs from devin +- notebooks + - devin_eval_analysis.ipynb: notebook analyzing devin's outputs diff --git a/experiments/SWE-bench/data/devin_output_diffs.csv b/experiments/SWE-bench/data/devin_output_diffs.csv new file mode 100644 index 000000000000..04e9899559ee --- /dev/null +++ b/experiments/SWE-bench/data/devin_output_diffs.csv @@ -0,0 +1,37687 @@ +repo,instance_id,content,pass or fail,subset +astropy/astropy,astropy__astropy-13745,"diff --git a/astropy/coordinates/angles.py b/astropy/coordinates/angles.py +index 8e3f56abd2..4abf34ee66 100644 +--- a/astropy/coordinates/angles.py ++++ b/astropy/coordinates/angles.py +@@ -591,11 +591,25 @@ class Latitude(Angle): + else: + limit = u.degree.to(angles.unit, 90.0) + +- # This invalid catch block can be removed when the minimum numpy +- # version is >= 1.19 (NUMPY_LT_1_19) +- with np.errstate(invalid='ignore'): +- invalid_angles = (np.any(angles.value < -limit) or +- np.any(angles.value > limit)) ++ # Determine the data type of the input angle ++ angle_dtype = angles.dtype ++ ++ # Set a tolerance level based on the data type ++ if angle_dtype == np.float32: ++ tolerance = 1e-6 # example tolerance for float32 ++ else: ++ tolerance = 0 # no tolerance for other data types ++ ++ # Ensure limit is treated as a float for the arithmetic operations ++ limit = float(limit) ++ ++ # Adjust the limit values based on the tolerance ++ adjusted_lower_limit = -limit - tolerance ++ adjusted_upper_limit = limit + tolerance ++ ++ # Perform the comparison with the adjusted limits ++ invalid_angles = (np.any(angles.value < adjusted_lower_limit) or ++ np.any(angles.value > adjusted_upper_limit)) + if invalid_angles: + raise ValueError('Latitude angle(s) must be within -90 deg <= angle <= 90 deg, ' + 'got {}'.format(angles.to(u.degree))) +",pass,devin +astropy/astropy,astropy__astropy-14995,"diff --git a/astropy/nddata/mixins/ndarithmetic.py b/astropy/nddata/mixins/ndarithmetic.py +index 4153dfccb2..8ce47ac370 100644 +--- a/astropy/nddata/mixins/ndarithmetic.py ++++ b/astropy/nddata/mixins/ndarithmetic.py +@@ -513,14 +513,10 @@ class NDArithmeticMixin: + ``handle_mask`` must create (and copy) the returned mask. + """""" + # If only one mask is present we need not bother about any type checks +- if ( +- self.mask is None and operand is not None and operand.mask is None +- ) or handle_mask is None: +- return None +- elif self.mask is None and operand is not None: ++ if self.mask is None and operand is not None: + # Make a copy so there is no reference in the result. + return deepcopy(operand.mask) +- elif operand is None: ++ elif operand is None or operand.mask is None: + return deepcopy(self.mask) + else: + # Now lets calculate the resulting mask (operation enforces copy) +",pass,devin +astropy/astropy,astropy__astropy-7606,"diff --git a/astropy/units/core.py b/astropy/units/core.py +index e9bd986d33..1b202395e9 100644 +--- a/astropy/units/core.py ++++ b/astropy/units/core.py +@@ -1710,6 +1710,8 @@ class UnrecognizedUnit(IrreducibleUnit): + _unrecognized_operator + + def __eq__(self, other): ++ if other is None: ++ return False + other = Unit(other, parse_strict='silent') + return isinstance(other, UnrecognizedUnit) and self.name == other.name + +",pass,devin +astropy/astropy,astropy__astropy-7671,"diff --git a/astropy/utils/collections.py b/astropy/utils/collections.py +deleted file mode 100644 +index 3a9b839548..0000000000 +--- a/astropy/utils/collections.py ++++ /dev/null +@@ -1,57 +0,0 @@ +-# Licensed under a 3-clause BSD style license - see LICENSE.rst +-"""""" +-A module containing specialized collection classes. +-"""""" +- +- +-class HomogeneousList(list): +- """""" +- A subclass of list that contains only elements of a given type or +- types. If an item that is not of the specified type is added to +- the list, a `TypeError` is raised. +- """""" +- def __init__(self, types, values=[]): +- """""" +- Parameters +- ---------- +- types : sequence of types +- The types to accept. +- +- values : sequence, optional +- An initial set of values. +- """""" +- self._types = types +- super().__init__() +- self.extend(values) +- +- def _assert(self, x): +- if not isinstance(x, self._types): +- raise TypeError( +- ""homogeneous list must contain only objects of "" +- ""type '{}'"".format(self._types)) +- +- def __iadd__(self, other): +- self.extend(other) +- return self +- +- def __setitem__(self, idx, value): +- if isinstance(idx, slice): +- value = list(value) +- for item in value: +- self._assert(item) +- else: +- self._assert(value) +- return super().__setitem__(idx, value) +- +- def append(self, x): +- self._assert(x) +- return super().append(x) +- +- def insert(self, i, x): +- self._assert(x) +- return super().insert(i, x) +- +- def extend(self, x): +- for item in x: +- self._assert(item) +- super().append(item) +diff --git a/astropy/utils/introspection.py b/astropy/utils/introspection.py +index 3e784f9fc3..a4f8ebc513 100644 +--- a/astropy/utils/introspection.py ++++ b/astropy/utils/introspection.py +@@ -139,10 +139,12 @@ def minversion(module, version, inclusive=True, version_path='__version__'): + else: + have_version = resolve_name(module.__name__, version_path) + ++ from pkg_resources import parse_version ++ + if inclusive: +- return LooseVersion(have_version) >= LooseVersion(version) ++ return parse_version(have_version) >= parse_version(version) + else: +- return LooseVersion(have_version) > LooseVersion(version) ++ return parse_version(have_version) > parse_version(version) + + + def find_current_module(depth=1, finddiff=False): +",pass,devin +django/django,django__django-10606,"diff --git a/django/db/models/fields/reverse_related.py b/django/db/models/fields/reverse_related.py +index 700410a086..71f940aeff 100644 +--- a/django/db/models/fields/reverse_related.py ++++ b/django/db/models/fields/reverse_related.py +@@ -114,7 +114,7 @@ class ForeignObjectRel(FieldCacheMixin): + self.related_model._meta.model_name, + ) + +- def get_choices(self, include_blank=True, blank_choice=BLANK_CHOICE_DASH, ordering=()): ++ def get_choices(self, include_blank=True, blank_choice=BLANK_CHOICE_DASH, limit_choices_to=None, ordering=()): + """""" + Return choices with a default blank choices included, for use + as