Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-1107] Fix CPUPinned unexpected behaviour #12031

Merged
merged 20 commits into from
Oct 19, 2018
Prev Previous commit
Next Next commit
address Haibin and Lin's comments
  • Loading branch information
Carl Yang committed Oct 18, 2018
commit cfdcb297921e274bb779d46f659c7b3b79f8688d
15 changes: 6 additions & 9 deletions src/common/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,24 +283,21 @@ inline DType __device__ CudaMin(DType a, DType b) {
return a < b ? a : b;
}

class SetDevice {
class DeviceStore {
public:
/*! \brief default constructor- only optionally restores previous device */
explicit SetDevice(bool restore = true) : restore_(restore) {
explicit DeviceStore(bool restore = true) : restore_(restore) {
if (restore_)
CUDA_CALL(cudaGetDevice(&restore_device_));
}

/*! \brief standard constuctor- cudaSetDevice + optionally restore previous device */
explicit SetDevice(int device, bool restore = true) : restore_(restore) {
~DeviceStore() {
if (restore_)
CUDA_CALL(cudaGetDevice(&restore_device_));
CUDA_CALL(cudaSetDevice(device));
CUDA_CALL(cudaSetDevice(restore_device_));
}

~SetDevice() {
if (restore_)
CUDA_CALL(cudaSetDevice(restore_device_));
void SetDevice(int device) {
CUDA_CALL(cudaSetDevice(device));
}

private:
Expand Down
4 changes: 2 additions & 2 deletions src/common/rtc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@ CUfunction CudaModule::Chunk::GetFunction(
CHECK_EQ(ctx.dev_mask(), Context::kGPU)
<< "CUDA Runtime compilation only supports Nvidia GPU.";
auto iter = mod_.find(ctx.dev_id);
mxnet::common::cuda::SetDevice set_device;
mxnet::common::cuda::DeviceStore device_store;
CUmodule module;
if (iter != mod_.end()) {
module = iter->second;
} else {
CUDA_CALL(cudaSetDevice(ctx.dev_id));
device_store.SetDevice(ctx.dev_id);
CUDA_DRIVER_CALL(cuModuleLoadDataEx(&module, ptx_, 0, 0, 0));
mod_[ctx.dev_id] = module;
}
Expand Down
8 changes: 4 additions & 4 deletions src/engine/stream_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ RunContext StreamManager<kNumGpus, kStreams>::GetRunContext(
Context const& ctx) {
RunContext ret;
#if MXNET_USE_CUDA
mxnet::common::cuda::SetDevice set_device;
mxnet::common::cuda::DeviceStore device_store;
#endif
switch (ctx.dev_mask()) {
case cpu::kDevMask:
Expand All @@ -75,7 +75,7 @@ RunContext StreamManager<kNumGpus, kStreams>::GetRunContext(
case gpu::kDevMask: {
#if MXNET_USE_CUDA
std::size_t use_counter;
CUDA_CALL(cudaSetDevice(ctx.dev_id));
device_store.SetDevice(ctx.dev_id);
{
std::lock_guard<std::mutex> lock{mutex_};
auto&& counter = gpu_cnt_.at(ctx.dev_id);
Expand Down Expand Up @@ -105,15 +105,15 @@ RunContext StreamManager<kNumGpus, kStreams>::GetIORunContext(
Context const& ctx) {
RunContext ret;
#if MXNET_USE_CUDA
mxnet::common::cuda::SetDevice set_device;
mxnet::common::cuda::DeviceStore device_store;
#endif
switch (ctx.dev_mask()) {
case cpu::kDevMask:
ret = RunContext{ctx, nullptr};
break;
case gpu::kDevMask: {
#if MXNET_USE_CUDA
CUDA_CALL(cudaSetDevice(ctx.dev_id));
device_store.SetDevice(ctx.dev_id);
{
std::lock_guard<std::mutex> lock{mutex_};
if (gpu_io_streams_.at(ctx.dev_id) == nullptr) {
Expand Down
4 changes: 2 additions & 2 deletions src/kvstore/comm.h
Original file line number Diff line number Diff line change
Expand Up @@ -725,9 +725,9 @@ class CommDevice : public Comm {
std::vector<int> p2p(n*n);

// Restores active device to what it was before EnableP2P
mxnet::common::cuda::SetDevice set_device;
mxnet::common::cuda::DeviceStore device_store;
for (int i = 0; i < n; ++i) {
cudaSetDevice(gpus[i]);
device_store.SetDevice(gpus[i]);
for (int j = 0; j < n; j++) {
int access;
cudaDeviceCanAccessPeer(&access, gpus[i], gpus[j]);
Expand Down
4 changes: 2 additions & 2 deletions src/kvstore/comm_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,9 @@ class CommDeviceTree : public CommDevice {
int n = static_cast<int>(gpus.size());
int enabled = 0;
std::vector<int> p2p(n*n);
mxnet::common::cuda::SetDevice set_device;
mxnet::common::cuda::DeviceStore device_store;
for (int i = 0; i < n; ++i) {
cudaSetDevice(gpus[i]);
device_store.SetDevice(gpus[i]);
for (int j = 0; j < n; j++) {
int access;
cudaDeviceCanAccessPeer(&access, gpus[i], gpus[j]);
Expand Down
8 changes: 4 additions & 4 deletions src/kvstore/kvstore_nccl.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,9 +428,9 @@ class KVStoreNCCL : public KVStoreLocal {
mutate_vars.push_back(ptr(dst[i])->var());
}
Engine::Get()->PushSync([this](RunContext rctx) {
mxnet::common::cuda::SetDevice set_device;
mxnet::common::cuda::DeviceStore device_store;
for (auto cur : nccl_data_) {
CUDA_CALL(cudaSetDevice(cur.second.dev_id));
device_store.SetDevice(cur.second.dev_id);
CUDA_CALL(cudaStreamSynchronize(cur.second.stream));
}
},
Expand Down Expand Up @@ -480,13 +480,13 @@ class KVStoreNCCL : public KVStoreLocal {
std::lock_guard<std::mutex> l(Storage::Get()->GetMutex(Context::kGPU));
std::vector<ncclComm_t> comms(devs.size());
ncclCommInitAll(&(comms[0]), devs.size(), &(device_ids_[0]));
mxnet::common::cuda::SetDevice set_device;
mxnet::common::cuda::DeviceStore device_store;
for (size_t i = 0; i < devs.size(); ++i) {
NCCLEntry e;
e.dev_id = device_ids_[i];
e.comm = comms[i];
e.rank = i;
cudaSetDevice(e.dev_id);
device_store.SetDevice(e.dev_id);
cudaStreamCreate(&(e.stream));
nccl_data_[device_ids_[i]] = e;
}
Expand Down
6 changes: 3 additions & 3 deletions src/storage/storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ void StorageImpl::Alloc(Storage::Handle* handle) {
// Will restore gpu device to before ActivateDevice if necessary
bool restore = handle->ctx.dev_type == Context::kCPUPinned ||
handle->ctx.dev_type == Context::kGPU;
mxnet::common::cuda::SetDevice set_device(restore);
mxnet::common::cuda::DeviceStore device_store(restore);
#endif
this->ActivateDevice(handle->ctx);
manager->Alloc(handle);
Expand All @@ -172,7 +172,7 @@ void StorageImpl::Free(Storage::Handle handle) {
#if MXNET_USE_CUDA
// Will restore gpu device to before ActivateDevice if necessary
bool restore = ctx.dev_type == Context::kCPUPinned || ctx.dev_type == Context::kGPU;
mxnet::common::cuda::SetDevice set_device(restore);
mxnet::common::cuda::DeviceStore device_store(restore);
#endif
this->ActivateDevice(ctx);
manager->Free(handle);
Expand All @@ -191,7 +191,7 @@ void StorageImpl::DirectFree(Storage::Handle handle) {
#if MXNET_USE_CUDA
// Will restore gpu device to before ActivateDevice if necessary
bool restore = ctx.dev_type == Context::kCPUPinned || ctx.dev_type == Context::kGPU;
mxnet::common::cuda::SetDevice set_device(restore);
mxnet::common::cuda::DeviceStore device_store(restore);
#endif
this->ActivateDevice(ctx);
manager->DirectFree(handle);
Expand Down