Miopen Pooling and Convolution Bug Fixes (LLNL#1982)

* fixed convolution workspace and added path for beta==1 * added fix for pooling index not matching tensor index * changes to arbitrary alpha and beta values pathway for backward convolution filter
benson31 · Oct 7, 2021 · 3214f18 · 3214f18
1 parent 2350378
commit 3214f18
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 21 deletions.
diff --git a/include/lbann/utils/dnn_lib/miopen/convolution.hpp b/include/lbann/utils/dnn_lib/miopen/convolution.hpp
@@ -268,19 +268,42 @@ void convolution_backward_filter(
   auto handle_manager = internal::make_default_handle_manager(si);
   auto alpha = El::To<LibScalingParamT>(alpha_in);
   auto beta = El::To<LibScalingParamT>(beta_in);
-  CHECK_MIOPEN(miopenConvolutionBackwardWeights(handle_manager.get(),
-                                                &alpha,
-                                                dyDesc,
-                                                dy.LockedBuffer(),
-                                                xDesc,
-                                                x.LockedBuffer(),
-                                                convDesc,
-                                                miopen::to_miopen(alg),
-                                                &beta,
-                                                dwDesc,
-                                                dw.Buffer(),
-                                                workSpace.Buffer(),
-                                                workSpace.Height()*sizeof(TensorDataType)));
+  auto one = El::TypeTraits<LibScalingParamT>::One();
+  auto zero = El::TypeTraits<LibScalingParamT>::Zero();
+  El::Matrix<TensorDataType, El::Device::GPU> dw_old;
+
+  if (alpha_in != El::TypeTraits<LibScalingParamT>::One() ||
+      beta_in != El::TypeTraits<LibScalingParamT>::Zero()) {
+    El::Copy(dw, dw_old);
+    CHECK_MIOPEN(miopenConvolutionBackwardWeights(handle_manager.get(),
+                                                  &one,
+                                                  dyDesc,
+                                                  dy.LockedBuffer(),
+                                                  xDesc,
+                                                  x.LockedBuffer(),
+                                                  convDesc,
+                                                  miopen::to_miopen(alg),
+                                                  &zero,
+                                                  dwDesc,
+                                                  dw.Buffer(),
+                                                  workSpace.Buffer(),
+                                                  workSpace.Height()*sizeof(TensorDataType)));
+    add_tensor(alpha_in, dwDesc, dw, beta_in, dwDesc, dw_old);
+  } else {
+    CHECK_MIOPEN(miopenConvolutionBackwardWeights(handle_manager.get(),
+                                                  &alpha,
+                                                  dyDesc,
+                                                  dy.LockedBuffer(),
+                                                  xDesc,
+                                                  x.LockedBuffer(),
+                                                  convDesc,
+                                                  miopen::to_miopen(alg),
+                                                  &beta,
+                                                  dwDesc,
+                                                  dw.Buffer(),
+                                                  workSpace.Buffer(),
+                                                  workSpace.Height()*sizeof(TensorDataType)));
+  }
 }
 
 template <typename TensorDataType, typename ScalarParameterType>

diff --git a/include/lbann/utils/dnn_lib/miopen/pooling.hpp b/include/lbann/utils/dnn_lib/miopen/pooling.hpp
@@ -44,6 +44,8 @@ using namespace miopen;
 inline size_t get_pooling_ws_size(PoolingDescriptor const& poolingDesc,
                                   TensorDescriptor const& yDesc)
 {
+  CHECK_MIOPEN(miopenSetPoolingIndexType(poolingDesc,
+                                         miopenIndexUint32));
   size_t size;
   CHECK_MIOPEN(miopenPoolingGetWorkSpaceSizeV2(poolingDesc,
                                                yDesc,

diff --git a/src/layers/learning/base_convolution.cpp b/src/layers/learning/base_convolution.cpp
@@ -672,17 +672,17 @@ ::compute_gradients_dnn(bool using_transposed_convolution) {
 
       // Get workspace size
       auto multisync = El::MakeMultiSync(gpu::get_sync_info(workspace));
-      size_t workspace_size =
-        dnn_lib::get_bwd_weights_conv_workspace_size(gradient_wrt_output_desc,
-                                                     input_desc,
-                                                     m_convolution_dnn_desc,
-                                                     m_kernel_dnn_desc,
-                                                     multisync);
-      workspace.Resize(workspace_size / sizeof(TensorDataType), 1);
-      workspace_size = workspace.Height() * sizeof(TensorDataType);
 
       // Determine algorithm and compute kernel gradient
       if (using_transposed_convolution) {
+        size_t workspace_size =
+          dnn_lib::get_bwd_weights_conv_workspace_size(input_desc,
+                                                       gradient_wrt_output_desc,
+                                                       m_convolution_dnn_desc,
+                                                       m_kernel_dnn_desc,
+                                                       multisync);
+        workspace.Resize(workspace_size / sizeof(TensorDataType), 1);
+        workspace_size = workspace.Height() * sizeof(TensorDataType);
         bwd_filter_conv_alg kernel_gradient_dnn_algorithm
           = get_backward_filter_algo_dnn(
             local_input.Width(),
@@ -704,6 +704,14 @@ ::compute_gradients_dnn(bool using_transposed_convolution) {
           m_kernel_dnn_desc,
           kernel_gradient.Matrix());
       } else {
+        size_t workspace_size =
+          dnn_lib::get_bwd_weights_conv_workspace_size(gradient_wrt_output_desc,
+                                                       input_desc,
+                                                       m_convolution_dnn_desc,
+                                                       m_kernel_dnn_desc,
+                                                       multisync);
+        workspace.Resize(workspace_size / sizeof(TensorDataType), 1);
+        workspace_size = workspace.Height() * sizeof(TensorDataType);
         bwd_filter_conv_alg kernel_gradient_dnn_algorithm
           = get_backward_filter_algo_dnn(
             local_input.Width(),