forked from google-research/google-research
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bin_methods.py
88 lines (70 loc) · 2.79 KB
/
bin_methods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# coding=utf-8
# Copyright 2022 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Binning methods."""
import abc
import numpy as np
class BinMethod(abc.ABC):
"""General interface for specifying binning method."""
def __init__(self, num_bins):
self.num_bins = num_bins
@abc.abstractmethod
def compute_bin_indices(self, scores):
"""Assign a bin index for each score.
Args:
scores: np.array of shape (num_examples, num_classes) containing the
model's confidence scores
Returns:
bin_indices: np.array of shape (num_examples, num_classes) containing the
bin assignment for each score
"""
pass
class BinEqualWidth(BinMethod):
"""Divide the scores into equal-width bins."""
def compute_bin_indices(self, scores):
"""Assign a bin index for each score assuming equal width bins.
Args:
scores: np.array of shape (num_examples, num_classes) containing the
model's confidence scores
Returns:
bin_indices: np.array of shape (num_examples, num_classes) containing the
bin assignment for each score
"""
edges = np.linspace(0.0, 1.0, self.num_bins + 1)
bin_indices = np.digitize(scores, edges, right=False)
# np.digitze uses one-indexed bins, switch to using 0-indexed
bin_indices = bin_indices - 1
# Put examples with score equal to 1.0 in the last bin.
bin_indices = np.where(scores == 1.0, self.num_bins - 1, bin_indices)
return bin_indices
class BinEqualExamples(BinMethod):
"""Divide the scores into bins with equal number of examples."""
def compute_bin_indices(self, scores):
"""Assign a bin index for each score assumes equal num examples per bin.
Args:
scores: np.ndarray of shape [N, K] containing the model's confidence
Returns:
bin_indices: np.ndarray of shape [N, K] containing the bin assignment for
each score
"""
num_examples = scores.shape[0]
num_classes = scores.shape[1]
bin_indices = np.zeros((num_examples, num_classes), dtype=int)
for k in range(num_classes):
sort_ix = np.argsort(scores[:, k])
bin_indices[:, k][sort_ix] = np.minimum(
self.num_bins - 1,
np.floor((np.arange(num_examples) / num_examples) *
self.num_bins)).astype(int)
return bin_indices