Skip to content

Commit

Permalink
Merge branch 'develop' into feature-layouttiled
Browse files Browse the repository at this point in the history
* develop:
  Inline values to silence warnings
  Separate function to silence warning
  Remove unused layout
  • Loading branch information
ndellingwood committed Sep 26, 2018
2 parents ffdcf99 + 0ee4749 commit 6efbf5d
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 41 deletions.
1 change: 0 additions & 1 deletion containers/src/Kokkos_DynRankView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2014,7 +2014,6 @@ create_mirror_view_and_copy(const Space& , const Kokkos::DynRankView<T,P...> & s
, typename std::enable_if<!Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
using Mirror = typename Impl::MirrorDRViewType<Space,T,P ...>::view_type;
std::string label = name.empty() ? src.label() : name;
auto reconstructed_layout = Impl::reconstructLayout(src.layout(), src.rank());
auto mirror = Mirror( Kokkos::ViewAllocateWithoutInitializing(label), Impl::reconstructLayout(src.layout(), src.rank()) );
deep_copy(mirror, src);
return mirror;
Expand Down
38 changes: 23 additions & 15 deletions core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -891,26 +891,23 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
*
* Global reduce result is in the last threads' 'shared_data' location.
*/

template< bool DoScan , class FunctorType , class ArgTag >
__device__
bool cuda_single_inter_block_reduce_scan( const FunctorType & functor ,
const Cuda::size_type block_id ,
const Cuda::size_type block_count ,
Cuda::size_type * const shared_data ,
Cuda::size_type * const global_data ,
Cuda::size_type * const global_flags )
bool cuda_single_inter_block_reduce_scan2( const FunctorType & functor ,
const Cuda::size_type block_id ,
const Cuda::size_type block_count ,
Cuda::size_type * const shared_data ,
Cuda::size_type * const global_data ,
Cuda::size_type * const global_flags )
{
typedef Cuda::size_type size_type ;
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
typedef FunctorValueJoin< FunctorType , ArgTag > ValueJoin ;
typedef FunctorValueInit< FunctorType , ArgTag > ValueInit ;
typedef FunctorValueOps< FunctorType , ArgTag > ValueOps ;

if(!DoScan && ValueTraits::StaticValueSize)
return Kokkos::Impl::CudaReductionsFunctor<FunctorType,ArgTag,false,(ValueTraits::StaticValueSize>16)>::scalar_inter_block_reduction(functor,block_id,block_count,shared_data,global_data,global_flags);

typedef typename ValueTraits::pointer_type pointer_type ;
//typedef typename ValueTraits::reference_type reference_type ;

// '__ffs' = position of the least significant bit set to 1.
// 'blockDim.y' is guaranteed to be a power of two so this
Expand All @@ -933,12 +930,7 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType & functor ,
size_type * const shared = shared_data + word_count.value * BlockSizeMask ;
size_type * const global = global_data + word_count.value * block_id ;

//#if (__CUDA_ARCH__ < 500)
for ( int i = int(threadIdx.y) ; i < int(word_count.value) ; i += int(blockDim.y) ) { global[i] = shared[i] ; }
//#else
// for ( size_type i = 0 ; i < word_count.value ; i += 1 ) { global[i] = shared[i] ; }
//#endif

}

// Contributing blocks note that their contribution has been completed via an atomic-increment flag
Expand Down Expand Up @@ -980,6 +972,22 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType & functor ,
return is_last_block ;
}

template< bool DoScan , class FunctorType , class ArgTag >
__device__
bool cuda_single_inter_block_reduce_scan( const FunctorType & functor ,
const Cuda::size_type block_id ,
const Cuda::size_type block_count ,
Cuda::size_type * const shared_data ,
Cuda::size_type * const global_data ,
Cuda::size_type * const global_flags )
{
typedef FunctorValueTraits< FunctorType , ArgTag > ValueTraits ;
if(!DoScan && ValueTraits::StaticValueSize)
return Kokkos::Impl::CudaReductionsFunctor<FunctorType,ArgTag,false,(ValueTraits::StaticValueSize>16)>::scalar_inter_block_reduction(functor,block_id,block_count,shared_data,global_data,global_flags);
else
return cuda_single_inter_block_reduce_scan2<DoScan, FunctorType, ArgTag>(functor, block_id, block_count, shared_data, global_data, global_flags);
}

// Size in bytes required for inter block reduce or scan
template< bool DoScan , class FunctorType , class ArgTag >
inline
Expand Down
30 changes: 5 additions & 25 deletions core/unit_test/TestMDRange.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,7 @@ struct TestMDRange_2D {
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
{
typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<2>, Kokkos::IndexType<int> > range_type;
int s0 = 1;
int s1 = 1;
range_type range( {{ s0, s1 }}, {{ N0, N1 }}, {{ 3, 3 }} );
range_type range( {{ 1, 1 }}, {{ N0, N1 }}, {{ 3, 3 }} );

Kokkos::View< double**, ExecSpace > v_in("v_in", N0, N1 );

Expand Down Expand Up @@ -942,11 +940,8 @@ struct TestMDRange_3D {
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
{
typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<3>, Kokkos::IndexType<int> > range_type;
int s0 = 1;
int s1 = 1;
int s2 = 1;

range_type range( {{ s0, s1, s2 }}, {{ N0, N1, N2 }}, {{ 3, 3, 3 }} );
range_type range( {{ 1, 1, 1 }}, {{ N0, N1, N2 }}, {{ 3, 3, 3 }} );

Kokkos::View< double***, ExecSpace > v_in("v_in", N0, N1, N2 );

Expand Down Expand Up @@ -1512,12 +1507,8 @@ struct TestMDRange_4D {
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
{
typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<4>, Kokkos::IndexType<int> > range_type;
int s0 = 1;
int s1 = 1;
int s2 = 1;
int s3 = 1;

range_type range( {{ s0, s1, s2, s3 }}, {{ N0, N1, N2, N3 }}, {{ 3, 3, 3, 3 }} );
range_type range( {{ 1, 1, 1, 1 }}, {{ N0, N1, N2, N3 }}, {{ 3, 3, 3, 3 }} );

Kokkos::View< double****, ExecSpace > v_in("v_in", N0, N1, N2, N3 );

Expand Down Expand Up @@ -2103,13 +2094,8 @@ struct TestMDRange_5D {
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
{
typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<5>, Kokkos::IndexType<int> > range_type;
int s0 = 1;
int s1 = 1;
int s2 = 1;
int s3 = 1;
int s4 = 1;

range_type range( {{ s0, s1, s2, s3, s4 }}, {{ N0, N1, N2, N3, N4 }}, {{ 3, 3, 3, 2, 2 }} );
range_type range( {{ 1, 1, 1, 1, 1 }}, {{ N0, N1, N2, N3, N4 }}, {{ 3, 3, 3, 2, 2 }} );

Kokkos::View< double*****, ExecSpace > v_in("v_in", N0, N1, N2, N3, N4 );

Expand Down Expand Up @@ -2626,14 +2612,8 @@ struct TestMDRange_6D {
#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION )
{
typedef typename Kokkos::MDRangePolicy< ExecSpace, Kokkos::Rank<6>, Kokkos::IndexType<int> > range_type;
int s0 = 1;
int s1 = 1;
int s2 = 1;
int s3 = 1;
int s4 = 1;
int s5 = 1;

range_type range( {{ s0, s1, s2, s3, s4, s5 }}, {{ N0, N1, N2, N3, N4, N5 }}, {{ 3, 3, 3, 2, 2, 1 }} );
range_type range( {{ 1, 1, 1, 1, 1, 1 }}, {{ N0, N1, N2, N3, N4, N5 }}, {{ 3, 3, 3, 2, 2, 1 }} );

Kokkos::View< double******, ExecSpace > v_in("v_in", N0, N1, N2, N3, N4, N5 );

Expand Down

0 comments on commit 6efbf5d

Please sign in to comment.