Skip to content

Commit

Permalink
Merge 6d0fa80 into 298df24
Browse files Browse the repository at this point in the history
  • Loading branch information
esantorella committed Jun 23, 2023
2 parents 298df24 + 6d0fa80 commit ca51f0f
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 13 deletions.
12 changes: 9 additions & 3 deletions ax/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def benchmark_replication(
)


def benchmark_test(
def benchmark_one_method_problem(
problem: BenchmarkProblem,
method: BenchmarkMethod,
seeds: Iterable[int],
Expand All @@ -140,12 +140,18 @@ def benchmark_test(
)


def benchmark_full_run(
def benchmark_multiple_problems_methods(
problems: Iterable[BenchmarkProblem],
methods: Iterable[BenchmarkMethod],
seeds: Iterable[int],
) -> List[AggregatedBenchmarkResult]:
"""
For each `problem` and `method` in the Cartesian product of `problems` and
`methods`, run the replication on each seed in `seeds` and get the results
as an `AggregatedBenchmarkResult`, then return a list of each
`AggregatedBenchmarkResult`.
"""
return [
benchmark_test(problem=p, method=m, seeds=seeds)
benchmark_one_method_problem(problem=p, method=m, seeds=seeds)
for p, m in product(problems, methods)
]
14 changes: 11 additions & 3 deletions ax/benchmark/benchmark_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import numpy as np
from ax.core.experiment import Experiment
from ax.utils.common.base import Base
from numpy import nanmean, ndarray
from numpy import nanmean, nanquantile, ndarray
from pandas import DataFrame
from scipy.stats import sem

Expand All @@ -18,6 +18,8 @@
# `BenchmarkResult` as return type annotation, used for serialization and rendering
# in the UI.

PERCENTILES = [0.25, 0.5, 0.75]


@dataclass(frozen=True, eq=False)
class BenchmarkResult(Base):
Expand Down Expand Up @@ -78,7 +80,7 @@ def from_benchmark_results(
trace_stats = {}
for name in ("optimization_trace", "score_trace"):
step_data = zip(*(getattr(res, name) for res in results))
stats = _get_stats(step_data=step_data)
stats = _get_stats(step_data=step_data, percentiles=PERCENTILES)
trace_stats[name] = stats

# Return aggregated results
Expand All @@ -91,9 +93,15 @@ def from_benchmark_results(
)


def _get_stats(step_data: Iterable[np.ndarray]) -> Dict[str, List[float]]:
def _get_stats(
step_data: Iterable[np.ndarray],
percentiles: List[float],
) -> Dict[str, List[float]]:
quantiles = []
stats = {"mean": [], "sem": []}
for step_vals in step_data:
stats["mean"].append(nanmean(step_vals))
stats["sem"].append(sem(step_vals, ddof=1, nan_policy="propagate"))
quantiles.append(nanquantile(step_vals, q=percentiles))
stats.update({f"P{100 * p:.0f}": q for p, q in zip(percentiles, zip(*quantiles))})
return stats
23 changes: 16 additions & 7 deletions ax/benchmark/tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

import numpy as np
from ax.benchmark.benchmark import (
benchmark_full_run,
benchmark_multiple_problems_methods,
benchmark_one_method_problem,
benchmark_replication,
benchmark_test,
)
from ax.benchmark.benchmark_method import BenchmarkMethod
from ax.benchmark.benchmark_problem import SingleObjectiveBenchmarkProblem
Expand Down Expand Up @@ -58,9 +58,9 @@ def test_replication_moo(self) -> None:

self.assertTrue(np.all(res.score_trace <= 100))

def test_test(self) -> None:
def test_benchmark_one_method_problem(self) -> None:
problem = get_single_objective_benchmark_problem()
agg = benchmark_test(
agg = benchmark_one_method_problem(
problem=problem,
method=get_sobol_benchmark_method(),
seeds=(0, 1),
Expand All @@ -75,16 +75,23 @@ def test_test(self) -> None:
"All experiments must have 4 trials",
)

for col in ["mean", "P25", "P50", "P75"]:
self.assertTrue((agg.score_trace[col] <= 100).all())

@fast_botorch_optimize
def test_full_run(self) -> None:
aggs = benchmark_full_run(
def test_benchmark_multiple_problems_methods(self) -> None:
aggs = benchmark_multiple_problems_methods(
problems=[get_single_objective_benchmark_problem()],
methods=[get_sobol_benchmark_method(), get_sobol_gpei_benchmark_method()],
seeds=(0, 1),
)

self.assertEqual(len(aggs), 2)

for agg in aggs:
for col in ["mean", "P25", "P50", "P75"]:
self.assertTrue((agg.score_trace[col] <= 100).all())

def test_timeout(self) -> None:
problem = SingleObjectiveBenchmarkProblem.from_botorch_synthetic(
test_problem_class=Branin,
Expand Down Expand Up @@ -116,7 +123,9 @@ def test_timeout(self) -> None:
)

# Each replication will have a different number of trials
result = benchmark_test(problem=problem, method=method, seeds=(0, 1, 2, 3))
result = benchmark_one_method_problem(
problem=problem, method=method, seeds=(0, 1, 2, 3)
)

# Test the traces get composited correctly. The AggregatedResult's traces
# should be the length of the shortest trace in the BenchmarkResults
Expand Down

0 comments on commit ca51f0f

Please sign in to comment.