Skip to content

Commit

Permalink
Merge pull request #12911 from jgfouca/jgfouca/ifpack2_block_spiluk
Browse files Browse the repository at this point in the history
Ifpack2: Add support for using kokkos kernels under RBILUK
  • Loading branch information
brian-kelley committed Apr 29, 2024
2 parents b740b09 + 3802f36 commit 7764616
Show file tree
Hide file tree
Showing 9 changed files with 550 additions and 360 deletions.
11 changes: 8 additions & 3 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix
local_ordinal_type,
global_ordinal_type,
node_type> crs_matrix_type;

using crs_graph_type = Tpetra::CrsGraph<local_ordinal_type,
global_ordinal_type,
node_type>;
Expand All @@ -175,11 +175,16 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix

template <class NewMatrixType> friend class RBILUK;

typedef typename block_crs_matrix_type::nonconst_global_inds_host_view_type nonconst_global_inds_host_view_type;
typedef typename block_crs_matrix_type::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
typedef typename block_crs_matrix_type::nonconst_values_host_view_type nonconst_values_host_view_type;

//@}
//! \name Implementation of KK ILU(k).
//@{

typedef typename crs_matrix_type::local_matrix_device_type local_matrix_device_type;

typedef typename block_crs_matrix_type::local_matrix_device_type local_matrix_device_type;
typedef typename block_crs_matrix_type::local_matrix_host_type local_matrix_host_type;
typedef typename local_matrix_device_type::StaticCrsGraphType::row_map_type lno_row_view_t;
typedef typename local_matrix_device_type::StaticCrsGraphType::entries_type lno_nonzero_view_t;
typedef typename local_matrix_device_type::values_type scalar_nonzero_view_t;
Expand Down
723 changes: 428 additions & 295 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions packages/ifpack2/src/Ifpack2_IlukGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class IlukGraph : public Teuchos::Describable {
size_t getNumGlobalDiagonals() const { return NumGlobalDiagonals_; }

private:
typedef typename GraphType::map_type map_type;
typedef typename GraphType::map_type map_type;

/// \brief Copy constructor (UNIMPLEMENTED; DO NOT USE).
///
Expand Down Expand Up @@ -313,7 +313,7 @@ void IlukGraph<GraphType, KKHandleType>::initialize()
: Kokkos::ceil(static_cast<double>(RowMaxNumIndices)
* Kokkos::pow(overalloc, levelfill));
});

};

bool insertError; // No error found yet while inserting entries
Expand Down Expand Up @@ -415,7 +415,7 @@ void IlukGraph<GraphType, KKHandleType>::initialize()
L_Graph_->getLocalRowCopy(i, CurrentRow_view, LenL); // Get L Indices
CurrentRow[LenL] = i; // Put in Diagonal
if (LenU > 0) {
ArrayView<local_ordinal_type> URowView = CurrentRow.view (LenL+1,LenU);
ArrayView<local_ordinal_type> URowView = CurrentRow.view (LenL+1,LenU);
nonconst_local_inds_host_view_type URowView_v(URowView.data(),URowView.size());

// Get U Indices
Expand Down Expand Up @@ -584,7 +584,7 @@ void IlukGraph<GraphType, KKHandleType>::initialize(const Teuchos::RCP<KKHandleT

typedef typename Kokkos::View<size_type*, array_layout, device_type> lno_row_view_t;
typedef typename Kokkos::View<data_type*, array_layout, device_type> lno_nonzero_view_t;

constructOverlapGraph();

// FIXME (mfh 23 Dec 2013) Use size_t or whatever
Expand Down Expand Up @@ -614,7 +614,7 @@ void IlukGraph<GraphType, KKHandleType>::initialize(const Teuchos::RCP<KKHandleT
catch (std::runtime_error &e) {
symbolicError = true;
data_type nnzL = static_cast<data_type>(Overalloc_)*L_entries.extent(0);
data_type nnzU = static_cast<data_type>(Overalloc_)*U_entries.extent(0);
data_type nnzU = static_cast<data_type>(Overalloc_)*U_entries.extent(0);
KernelHandle->get_spiluk_handle()->reset_handle(NumMyRows, nnzL, nnzU);
Kokkos::resize(L_entries, KernelHandle->get_spiluk_handle()->get_nnzL());
Kokkos::resize(U_entries, KernelHandle->get_spiluk_handle()->get_nnzU());
Expand All @@ -628,17 +628,17 @@ void IlukGraph<GraphType, KKHandleType>::initialize(const Teuchos::RCP<KKHandleT

Kokkos::resize(L_entries, KernelHandle->get_spiluk_handle()->get_nnzL());
Kokkos::resize(U_entries, KernelHandle->get_spiluk_handle()->get_nnzU());

RCP<Teuchos::ParameterList> params = Teuchos::parameterList ();
params->set ("Optimize Storage",false);

L_Graph_ = rcp (new crs_graph_type (OverlapGraph_->getRowMap (),
OverlapGraph_->getRowMap (),
OverlapGraph_->getRowMap (),
L_row_map, L_entries));
U_Graph_ = rcp (new crs_graph_type (OverlapGraph_->getRowMap (),
OverlapGraph_->getRowMap (),
OverlapGraph_->getRowMap (),
U_row_map, U_entries));

RCP<const map_type> L_DomainMap = OverlapGraph_->getRowMap ();
RCP<const map_type> L_RangeMap = Graph_->getRangeMap ();
RCP<const map_type> U_DomainMap = Graph_->getDomainMap ();
Expand Down
6 changes: 3 additions & 3 deletions packages/ifpack2/src/Ifpack2_RILUK_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ class RILUK:
<typename lno_row_view_t::const_value_type, typename lno_nonzero_view_t::const_value_type, typename scalar_nonzero_view_t::value_type,
HandleExecSpace, TemporaryMemorySpace,PersistentMemorySpace > kk_handle_type;
typedef Ifpack2::IlukGraph<Tpetra::CrsGraph<local_ordinal_type, global_ordinal_type, node_type>, kk_handle_type> iluk_graph_type;

/// \brief Constructor that takes a Tpetra::RowMatrix.
///
/// \param A_in [in] The input matrix.
Expand Down Expand Up @@ -402,7 +402,7 @@ class RILUK:
}

//! Get a rough estimate of cost per iteration
size_t getNodeSmootherComplexity() const;
size_t getNodeSmootherComplexity() const;



Expand Down Expand Up @@ -599,7 +599,7 @@ class RILUK:
/// may be computed using a crs_matrix_type that initialize() constructs
/// temporarily.
Teuchos::RCP<const row_matrix_type> A_local_;
lno_row_view_t A_local_rowmap_;
lno_row_view_t A_local_rowmap_;
lno_nonzero_view_t A_local_entries_;
scalar_nonzero_view_t A_local_values_;
std::vector<local_matrix_device_type> A_local_diagblks;
Expand Down
44 changes: 22 additions & 22 deletions packages/ifpack2/src/Ifpack2_RILUK_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ RILUK<MatrixType>::RILUK (const Teuchos::RCP<const crs_matrix_type>& Matrix_in)


template<class MatrixType>
RILUK<MatrixType>::~RILUK()
RILUK<MatrixType>::~RILUK()
{
if (!isKokkosKernelsStream_) {
if (Teuchos::nonnull (KernelHandle_)) {
Expand Down Expand Up @@ -315,12 +315,12 @@ void RILUK<MatrixType>::allocate_L_and_U ()
for (int i = 0; i < num_streams_; i++) {
L_v_[i] = null;
U_v_[i] = null;

L_v_[i] = rcp (new crs_matrix_type (Graph_v_[i]->getL_Graph ()));
U_v_[i] = rcp (new crs_matrix_type (Graph_v_[i]->getU_Graph ()));
L_v_[i]->setAllToScalar (STS::zero ()); // Zero out L and U matrices
U_v_[i]->setAllToScalar (STS::zero ());

L_v_[i]->fillComplete ();
U_v_[i]->fillComplete ();
}
Expand Down Expand Up @@ -620,19 +620,19 @@ void RILUK<MatrixType>::initialize ()
if (this->isKokkosKernelsSpiluk_) {
if (!isKokkosKernelsStream_) {
this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
A_local_->getLocalNumRows(),
2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
2*A_local_->getLocalNumEntries()*(LevelOfFill_+1),
2*A_local_->getLocalNumEntries()*(LevelOfFill_+1) );
Graph_->initialize (KernelHandle_); // this calls spiluk_symbolic
}
else {
KernelHandle_v_ = std::vector< Teuchos::RCP<kk_handle_type> >(num_streams_);
for (int i = 0; i < num_streams_; i++) {
KernelHandle_v_[i] = Teuchos::rcp (new kk_handle_type ());
KernelHandle_v_[i]->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
KernelHandle_v_[i]->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
A_local_diagblks[i].numRows(),
2*A_local_diagblks[i].nnz()*(LevelOfFill_+1),
2*A_local_diagblks[i].nnz()*(LevelOfFill_+1),
2*A_local_diagblks[i].nnz()*(LevelOfFill_+1) );
Graph_v_[i]->initialize (KernelHandle_v_[i]); // this calls spiluk_symbolic
}
Expand All @@ -655,7 +655,7 @@ void RILUK<MatrixType>::initialize ()
L_solver_->setMatrices (L_v_);
}
L_solver_->initialize ();
//NOTE (Nov-09-2022):
//NOTE (Nov-09-2022):
//For Cuda >= 11.3 (using cusparseSpSV), skip trisolve computes here.
//Instead, call trisolve computes within RILUK compute
#if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030)
Expand Down Expand Up @@ -925,7 +925,7 @@ void RILUK<MatrixType>::compute ()

nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1);
nonconst_values_host_view_type InV_sub(reinterpret_cast<IST*>(InV.data())+NumL+1,MaxNumEntries-NumL-1);

U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU);
NumIn = NumL+NumU+1;

Expand All @@ -939,12 +939,12 @@ void RILUK<MatrixType>::compute ()
for (size_t jj = 0; jj < NumL; ++jj) {
local_ordinal_type j = InI[jj];
IST multiplier = InV[jj]; // current_mults++;

InV[jj] *= static_cast<scalar_type>(DV(j));

U_->getLocalRowView(j, UUI, UUV); // View of row above
NumUU = UUI.size();

if (RelaxValue_ == STM::zero ()) {
for (size_t k = 0; k < NumUU; ++k) {
const int kk = colflag[UUI[k]];
Expand Down Expand Up @@ -1001,7 +1001,7 @@ void RILUK<MatrixType>::compute ()
}

if (NumU) {
// Replace current row of L and U
// Replace current row of L and U
U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
}

Expand Down Expand Up @@ -1087,7 +1087,7 @@ void RILUK<MatrixType>::compute ()
for(int i = 0; i < num_streams_; i++) {
L_v_[i]->resumeFill ();
U_v_[i]->resumeFill ();

if (L_v_[i]->isStaticGraph () || L_v_[i]->isLocallyIndexed ()) {
L_v_[i]->setAllToScalar (STS::zero ()); // Zero out L and U matrices
U_v_[i]->setAllToScalar (STS::zero ());
Expand All @@ -1108,16 +1108,16 @@ void RILUK<MatrixType>::compute ()
auto U_entries = lclU.graph.entries;
auto U_values = lclU.values;

KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
A_local_rowmap_, A_local_entries_, A_local_values_,
KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_,
A_local_rowmap_, A_local_entries_, A_local_values_,
L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );

L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());

L_solver_->setMatrix (L_);
U_solver_->setMatrix (U_);
}
}
else {
std::vector<lno_row_view_t> L_rowmap_v(num_streams_);
std::vector<lno_nonzero_view_t> L_entries_v(num_streams_);
Expand All @@ -1131,7 +1131,7 @@ void RILUK<MatrixType>::compute ()
L_rowmap_v[i] = lclL.graph.row_map;
L_entries_v[i] = lclL.graph.entries;
L_values_v[i] = lclL.values;

auto lclU = U_v_[i]->getLocalMatrixDevice();
U_rowmap_v[i] = lclU.graph.row_map;
U_entries_v[i] = lclU.graph.entries;
Expand Down Expand Up @@ -1210,7 +1210,7 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t

const scalar_type one = STS::one ();
const scalar_type zero = STS::zero ();

Teuchos::Time timer ("RILUK::apply");
double startTime = timer.wallTime();
{ // Start timing
Expand Down Expand Up @@ -1301,7 +1301,7 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t
U_solver_->apply (Y, Y, mode); // Solve U Y = Y.
#endif
}
else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030)
//NOTE (Nov-15-2022):
//This is a workaround for Cuda >= 11.3 (using cusparseSpSV)
Expand Down Expand Up @@ -1425,8 +1425,8 @@ std::string RILUK<MatrixType>::description () const
os << "Level-of-fill: " << getLevelOfFill() << ", ";

if(isKokkosKernelsSpiluk_) os<<"KK-SPILUK, ";
if(isKokkosKernelsStream_) os<<"KK-Stream, ";
if(isKokkosKernelsStream_) os<<"KK-Stream, ";

if (A_.is_null ()) {
os << "Matrix: null";
}
Expand Down
Loading

0 comments on commit 7764616

Please sign in to comment.