Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics_impl fix 2 gpu hvd tests and ensure consistent detaching #1280

Merged
merged 51 commits into from
Sep 11, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
9f7daa1
update accuracy to accumulate _num_correct in a tensor on the right d…
n2cholas Aug 7, 2020
a87f93d
update loss metric to accumulate _sum in a tensor on the right device
n2cholas Aug 7, 2020
30b2e19
update mae metric to accumulate in a tensor on the right device
n2cholas Aug 7, 2020
a3e237c
update mpd metric to accumulate in a tensor on the right device
n2cholas Aug 7, 2020
7100176
update mse metric to accumulate in a tensor on the right device
n2cholas Aug 7, 2020
3228a0a
update top k accuracy metric to accumulate in a tensor on the right …
n2cholas Aug 7, 2020
412551e
update precision and recall metrics to accumulate in tensors on the r…
n2cholas Aug 8, 2020
4c4a76c
.....
n2cholas Aug 8, 2020
b1e6956
black formatting
n2cholas Aug 8, 2020
b081e92
reverted run*.sh
n2cholas Aug 10, 2020
a343c35
change all metrics default device to cpu except running_average
n2cholas Aug 16, 2020
8548601
Update ignite/metrics/precision.py
n2cholas Aug 16, 2020
b84226b
remove Optional type from metric devices since default is cpu
n2cholas Aug 16, 2020
685c23b
add comment explaining lack of detach in accuracy metrics
n2cholas Aug 16, 2020
0b4337d
update docstrings and docs
n2cholas Aug 17, 2020
b2fa213
Update ignite/metrics/accumulation.py
n2cholas Aug 17, 2020
90e0e9a
Update ignite/metrics/accumulation.py
n2cholas Aug 17, 2020
6c1fda4
Update ignite/metrics/accumulation.py
n2cholas Aug 17, 2020
c510e10
Update ignite/metrics/accuracy.py
n2cholas Aug 17, 2020
d5d4854
Update ignite/metrics/fbeta.py
n2cholas Aug 17, 2020
39515b7
Update ignite/metrics/loss.py
n2cholas Aug 17, 2020
3c49871
Update ignite/metrics/metric.py
n2cholas Aug 17, 2020
6de10dd
Update ignite/metrics/precision.py
n2cholas Aug 17, 2020
eca0bc3
Update ignite/metrics/recall.py
n2cholas Aug 17, 2020
ad7082e
add comment explaining lack of detach in metrics docs
n2cholas Aug 17, 2020
c057d52
Merge remote-tracking branch 'pytorch-ignite/metrics_impl' into metri…
n2cholas Aug 17, 2020
90b5b85
support device argument for running_average
n2cholas Aug 17, 2020
3481da1
update support for device argumenet for accumulation
n2cholas Aug 18, 2020
d340bb7
fix and improve device tests for metrics
n2cholas Aug 18, 2020
4824e24
fix and improve device tests for metrics
n2cholas Aug 18, 2020
1361866
fix TPU tests
n2cholas Aug 18, 2020
556262b
Apply suggestions from code review
vfdev-5 Aug 18, 2020
489620b
Apply suggestions from code review
vfdev-5 Aug 18, 2020
566e9bc
Merge branch 'metrics_impl' of https://github.com/pytorch/ignite into…
Aug 31, 2020
6edd30d
detach tensors earlier in update
Aug 31, 2020
375f91e
remove redundant to() call
Aug 31, 2020
960449c
ensure metrics aren't created on XLA devices
n2cholas Sep 7, 2020
96128fd
Merge branch 'metrics_impl' of https://github.com/pytorch/ignite into…
n2cholas Sep 7, 2020
d192a8f
Fixed isort
vfdev-5 Sep 8, 2020
23d72c0
move xla check to Metric.__init__ instead of individual metrics
n2cholas Sep 9, 2020
edc74a4
update xla tests
n2cholas Sep 9, 2020
2c6b7d2
replace deleted callable check
n2cholas Sep 9, 2020
6e64f37
remove redundant precision and recall __init__
n2cholas Sep 9, 2020
2828e74
replace precision/recall __init__ for docs rendering
n2cholas Sep 9, 2020
bcc3cbb
add support for metrics_lambda with components on diff devices
n2cholas Sep 9, 2020
bb257de
Merge branch 'metrics_impl' of https://github.com/pytorch/ignite into…
n2cholas Sep 9, 2020
2f7e542
fix epoch_metric xla test
n2cholas Sep 9, 2020
0d1ba42
Merge branch 'metrics_impl' of https://github.com/pytorch/ignite into…
n2cholas Sep 11, 2020
ebefd4b
detach output consistently for all metrics
n2cholas Sep 11, 2020
4ce863d
fix horovod two gpu tests
n2cholas Sep 11, 2020
8a94b8f
make confusion matrix detaches like other metrics
n2cholas Sep 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update mpd metric to accumulate in a tensor on the right device
  • Loading branch information
n2cholas committed Aug 7, 2020
commit a3e237c42ecd1860ea31d6f539c9678cdbb36267
6 changes: 3 additions & 3 deletions ignite/metrics/mean_pairwise_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,18 @@ def __init__(

@reinit__is_reduced
def reset(self):
self._sum_of_distances = 0.0
self._sum_of_distances = torch.tensor(0.0, device=self._device)
self._num_examples = 0

@reinit__is_reduced
def update(self, output: Sequence[torch.Tensor]) -> None:
y_pred, y = output
distances = pairwise_distance(y_pred, y, p=self._p, eps=self._eps)
self._sum_of_distances += torch.sum(distances).item()
self._sum_of_distances += torch.sum(distances).detach().to(self._device)
self._num_examples += y.shape[0]

@sync_all_reduce("_sum_of_distances", "_num_examples")
def compute(self) -> Union[float, torch.Tensor]:
if self._num_examples == 0:
raise NotComputableError("MeanAbsoluteError must have at least one example before it can be computed.")
return self._sum_of_distances / self._num_examples
return self._sum_of_distances.item() / self._num_examples
29 changes: 29 additions & 0 deletions tests/ignite/metrics/test_mean_pairwise_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,19 +78,43 @@ def update(engine, i):
assert pytest.approx(res) == true_res


def _test_distrib_accumulator_device(device):
device = torch.device(device)
mpd = MeanPairwiseDistance(device=device)
assert mpd._device == device

y_pred = torch.Tensor([[3.0, 4.0], [-3.0, -4.0]])
y = torch.zeros(2, 2)
mpd.update((y_pred, y))

assert mpd._sum_of_distances.device == device


def test_accumulator_detached():
mpd = MeanPairwiseDistance()

y_pred = torch.tensor([[3.0, 4.0], [-3.0, -4.0]], requires_grad=True)
y = torch.zeros(2, 2)
mpd.update((y_pred, y))

assert not mpd._sum_of_distances.requires_grad


@pytest.mark.distributed
@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
@pytest.mark.skipif(torch.cuda.device_count() < 1, reason="Skip if no GPU")
def test_distrib_gpu(local_rank, distributed_context_single_node_nccl):
device = "cuda:{}".format(local_rank)
_test_distrib_integration(device)
_test_distrib_accumulator_device(device)


@pytest.mark.distributed
@pytest.mark.skipif(not idist.has_native_dist_support, reason="Skip if no native dist support")
def test_distrib_cpu(distributed_context_single_node_gloo):
device = "cpu"
_test_distrib_integration(device)
_test_distrib_accumulator_device(device)


@pytest.mark.distributed
Expand All @@ -102,6 +126,7 @@ def test_distrib_hvd(gloo_hvd_executor):
nproc = 4 if not torch.cuda.is_available() else torch.cuda.device_count()

gloo_hvd_executor(_test_distrib_integration, (device,), np=nproc, do_init=True)
gloo_hvd_executor(_test_distrib_accumulator_device, (device,), np=nproc, do_init=True)


@pytest.mark.multinode_distributed
Expand All @@ -110,6 +135,7 @@ def test_distrib_hvd(gloo_hvd_executor):
def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
device = "cpu"
_test_distrib_integration(device)
_test_distrib_accumulator_device(device)


@pytest.mark.multinode_distributed
Expand All @@ -118,6 +144,7 @@ def test_multinode_distrib_cpu(distributed_context_multi_node_gloo):
def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
device = "cuda:{}".format(distributed_context_multi_node_nccl["local_rank"])
_test_distrib_integration(device)
_test_distrib_accumulator_device(device)


@pytest.mark.tpu
Expand All @@ -126,11 +153,13 @@ def test_multinode_distrib_gpu(distributed_context_multi_node_nccl):
def test_distrib_single_device_xla():
device = idist.device()
_test_distrib_integration(device)
_test_distrib_accumulator_device(device)


def _test_distrib_xla_nprocs(index):
device = idist.device()
_test_distrib_integration(device)
_test_distrib_accumulator_device(device)


@pytest.mark.tpu
Expand Down