address Haibin and Lin's comments

apache · eric-haibin-lin · Oct 19, 2018 · Aug 4, 2018 · Aug 4, 2018 · Aug 4, 2018
commit cfdcb297921e274bb779d46f659c7b3b79f8688d
diff --git a/src/common/cuda_utils.h b/src/common/cuda_utils.h
@@ -283,24 +283,21 @@ inline DType __device__ CudaMin(DType a, DType b) {
     return a < b ? a : b;
 }
 
-class SetDevice {
+class DeviceStore {
  public:
   /*! \brief default constructor- only optionally restores previous device */
-  explicit SetDevice(bool restore = true) : restore_(restore) {
+  explicit DeviceStore(bool restore = true) : restore_(restore) {
     if (restore_)
       CUDA_CALL(cudaGetDevice(&restore_device_));
   }
 
-  /*! \brief standard constuctor- cudaSetDevice + optionally restore previous device */
-  explicit SetDevice(int device, bool restore = true) : restore_(restore) {
+  ~DeviceStore() {
     if (restore_)
-      CUDA_CALL(cudaGetDevice(&restore_device_));
-    CUDA_CALL(cudaSetDevice(device));
+      CUDA_CALL(cudaSetDevice(restore_device_));
   }
 
-  ~SetDevice() {
-    if (restore_)
-      CUDA_CALL(cudaSetDevice(restore_device_));
+  void SetDevice(int device) {
+    CUDA_CALL(cudaSetDevice(device));
   }
 
  private:

diff --git a/src/common/rtc.cc b/src/common/rtc.cc
@@ -77,12 +77,12 @@ CUfunction CudaModule::Chunk::GetFunction(
   CHECK_EQ(ctx.dev_mask(), Context::kGPU)
       << "CUDA Runtime compilation only supports Nvidia GPU.";
   auto iter = mod_.find(ctx.dev_id);
-  mxnet::common::cuda::SetDevice set_device;
+  mxnet::common::cuda::DeviceStore device_store;
   CUmodule module;
   if (iter != mod_.end()) {
     module = iter->second;
   } else {
-    CUDA_CALL(cudaSetDevice(ctx.dev_id));
+    device_store.SetDevice(ctx.dev_id);
     CUDA_DRIVER_CALL(cuModuleLoadDataEx(&module, ptx_, 0, 0, 0));
     mod_[ctx.dev_id] = module;
   }

diff --git a/src/engine/stream_manager.h b/src/engine/stream_manager.h
@@ -66,7 +66,7 @@ RunContext StreamManager<kNumGpus, kStreams>::GetRunContext(
     Context const& ctx) {
   RunContext ret;
 #if MXNET_USE_CUDA
-  mxnet::common::cuda::SetDevice set_device;
+  mxnet::common::cuda::DeviceStore device_store;
 #endif
   switch (ctx.dev_mask()) {
     case cpu::kDevMask:
@@ -75,7 +75,7 @@ RunContext StreamManager<kNumGpus, kStreams>::GetRunContext(
     case gpu::kDevMask: {
 #if MXNET_USE_CUDA
       std::size_t use_counter;
-      CUDA_CALL(cudaSetDevice(ctx.dev_id));
+      device_store.SetDevice(ctx.dev_id);
       {
         std::lock_guard<std::mutex> lock{mutex_};
         auto&& counter = gpu_cnt_.at(ctx.dev_id);
@@ -105,15 +105,15 @@ RunContext StreamManager<kNumGpus, kStreams>::GetIORunContext(
     Context const& ctx) {
   RunContext ret;
 #if MXNET_USE_CUDA
-  mxnet::common::cuda::SetDevice set_device;
+  mxnet::common::cuda::DeviceStore device_store;
 #endif
   switch (ctx.dev_mask()) {
     case cpu::kDevMask:
       ret = RunContext{ctx, nullptr};
       break;
     case gpu::kDevMask: {
 #if MXNET_USE_CUDA
-      CUDA_CALL(cudaSetDevice(ctx.dev_id));
+      device_store.SetDevice(ctx.dev_id);
       {
         std::lock_guard<std::mutex> lock{mutex_};
         if (gpu_io_streams_.at(ctx.dev_id) == nullptr) {

diff --git a/src/kvstore/comm.h b/src/kvstore/comm.h
@@ -725,9 +725,9 @@ class CommDevice : public Comm {
     std::vector<int> p2p(n*n);
 
     // Restores active device to what it was before EnableP2P
-    mxnet::common::cuda::SetDevice set_device;
+    mxnet::common::cuda::DeviceStore device_store;
     for (int i = 0; i < n; ++i) {
-      cudaSetDevice(gpus[i]);
+     device_store.SetDevice(gpus[i]);
       for (int j = 0; j < n; j++) {
         int access;
         cudaDeviceCanAccessPeer(&access, gpus[i], gpus[j]);

diff --git a/src/kvstore/comm_tree.h b/src/kvstore/comm_tree.h
@@ -339,9 +339,9 @@ class CommDeviceTree : public CommDevice {
     int n = static_cast<int>(gpus.size());
     int enabled = 0;
     std::vector<int> p2p(n*n);
-    mxnet::common::cuda::SetDevice set_device;
+    mxnet::common::cuda::DeviceStore device_store;
     for (int i = 0; i < n; ++i) {
-      cudaSetDevice(gpus[i]);
+      device_store.SetDevice(gpus[i]);
       for (int j = 0; j < n; j++) {
         int access;
         cudaDeviceCanAccessPeer(&access, gpus[i], gpus[j]);

diff --git a/src/kvstore/kvstore_nccl.h b/src/kvstore/kvstore_nccl.h
@@ -428,9 +428,9 @@ class KVStoreNCCL : public KVStoreLocal {
         mutate_vars.push_back(ptr(dst[i])->var());
     }
     Engine::Get()->PushSync([this](RunContext rctx) {
-        mxnet::common::cuda::SetDevice set_device;
+        mxnet::common::cuda::DeviceStore device_store;
         for (auto cur : nccl_data_) {
-          CUDA_CALL(cudaSetDevice(cur.second.dev_id));
+          device_store.SetDevice(cur.second.dev_id);
           CUDA_CALL(cudaStreamSynchronize(cur.second.stream));
         }
       },
@@ -480,13 +480,13 @@ class KVStoreNCCL : public KVStoreLocal {
     std::lock_guard<std::mutex> l(Storage::Get()->GetMutex(Context::kGPU));
     std::vector<ncclComm_t> comms(devs.size());
     ncclCommInitAll(&(comms[0]), devs.size(), &(device_ids_[0]));
-    mxnet::common::cuda::SetDevice set_device;
+    mxnet::common::cuda::DeviceStore device_store;
     for (size_t i = 0; i < devs.size(); ++i) {
       NCCLEntry e;
       e.dev_id = device_ids_[i];
       e.comm = comms[i];
       e.rank = i;
-      cudaSetDevice(e.dev_id);
+      device_store.SetDevice(e.dev_id);
       cudaStreamCreate(&(e.stream));
       nccl_data_[device_ids_[i]] = e;
     }

diff --git a/src/storage/storage.cc b/src/storage/storage.cc
@@ -153,7 +153,7 @@ void StorageImpl::Alloc(Storage::Handle* handle) {
   // Will restore gpu device to before ActivateDevice if necessary
   bool restore = handle->ctx.dev_type == Context::kCPUPinned ||
                  handle->ctx.dev_type == Context::kGPU;
-  mxnet::common::cuda::SetDevice set_device(restore);
+  mxnet::common::cuda::DeviceStore device_store(restore);
 #endif
   this->ActivateDevice(handle->ctx);
   manager->Alloc(handle);
@@ -172,7 +172,7 @@ void StorageImpl::Free(Storage::Handle handle) {
 #if MXNET_USE_CUDA
   // Will restore gpu device to before ActivateDevice if necessary
   bool restore = ctx.dev_type == Context::kCPUPinned || ctx.dev_type == Context::kGPU;
-  mxnet::common::cuda::SetDevice set_device(restore);
+  mxnet::common::cuda::DeviceStore device_store(restore);
 #endif
   this->ActivateDevice(ctx);
   manager->Free(handle);
@@ -191,7 +191,7 @@ void StorageImpl::DirectFree(Storage::Handle handle) {
 #if MXNET_USE_CUDA
   // Will restore gpu device to before ActivateDevice if necessary
   bool restore = ctx.dev_type == Context::kCPUPinned || ctx.dev_type == Context::kGPU;
-  mxnet::common::cuda::SetDevice set_device(restore);
+  mxnet::common::cuda::DeviceStore device_store(restore);
 #endif
   this->ActivateDevice(ctx);
   manager->DirectFree(handle);