Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ifpack2: Add support for using kokkos kernels under RBILUK #12911

Merged
merged 12 commits into from
Apr 29, 2024
Merged
Prev Previous commit
Next Next commit
Progress. Running nut not passing.
  • Loading branch information
jgfouca committed Apr 10, 2024
commit a1b021b79a8a4496fd9a3f8404aa05562bfe2085
91 changes: 51 additions & 40 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@

#include "Tpetra_BlockMultiVector.hpp"
#include "Tpetra_BlockView.hpp"
#include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"
#include "Ifpack2_OverlappingRowMatrix.hpp"
#include "Ifpack2_Details_getCrsMatrix.hpp"
#include "Ifpack2_LocalFilter.hpp"
#include "Ifpack2_Utilities.hpp"
#include "Ifpack2_RILUK.hpp"
Expand Down Expand Up @@ -383,18 +385,21 @@ void RBILUK<MatrixType>::initialize ()
this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type,kk_handle_type> (matrixCrsGraph,
this->LevelOfFill_, 0));

this->Graph_->initialize ();
allocate_L_and_U_blocks ();

if (this->isKokkosKernelsSpiluk_) {
this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
this->A_local_->getLocalNumRows(),
2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
blockSize_);
this->Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic
}
else {
this->Graph_->initialize ();
}

allocate_L_and_U_blocks ();

#ifdef IFPACK2_RBILUK_INITIAL
initAllValues ();
#endif
Expand Down Expand Up @@ -589,6 +594,7 @@ template<class MatrixType>
void RBILUK<MatrixType>::compute ()
{
using Teuchos::RCP;
using Teuchos::rcp;
using Teuchos::Array;

typedef impl_scalar_type IST;
Expand Down Expand Up @@ -907,50 +913,61 @@ void RBILUK<MatrixType>::compute ()
}
} // !this->isKokkosKernelsSpiluk_
else {
RCP<const block_crs_matrix_type> A_local_crs = Details::getBrsMatrix(this->A_local_);
if (A_local_crs.is_null()) {
local_ordinal_type numRows = this->A_local_->getLocalNumRows();
Array<size_t> entriesPerRow(numRows);
for(local_ordinal_type i = 0; i < numRows; i++) {
entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
}
RCP<block_crs_matrix_type> A_local_crs_nc =
Teuchos::rcp (new block_crs_matrix_type (this->A_local_->getRowMap (),
this->A_local_->getColMap (),
entriesPerRow(), blockSize_));
// copy entries into A_local_crs
nonconst_local_inds_host_view_type indices("indices", this->A_local_->getLocalMaxNumRowEntries());
nonconst_values_host_view_type values("values", this->A_local_->getLocalMaxNumRowEntries());
for(local_ordinal_type i = 0; i < numRows; i++) {
size_t numEntries = 0;
this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
RCP<const block_crs_matrix_type> A_local_bcrs = Details::getBcrsMatrix(this->A_local_);
RCP<const crs_matrix_type> A_local_crs = Details::getCrsMatrix(this->A_local_);
if (A_local_bcrs.is_null()) {
if (A_local_crs.is_null()) {
local_ordinal_type numRows = this->A_local_->getLocalNumRows();
Array<size_t> entriesPerRow(numRows);
for(local_ordinal_type i = 0; i < numRows; i++) {
entriesPerRow[i] = this->A_local_->getNumEntriesInLocalRow(i);
}
RCP<crs_matrix_type> A_local_crs_nc =
rcp (new crs_matrix_type (this->A_local_->getRowMap (),
this->A_local_->getColMap (),
entriesPerRow()));
// copy entries into A_local_crs
nonconst_local_inds_host_view_type indices("indices",this->A_local_->getLocalMaxNumRowEntries());
nonconst_values_host_view_type values("values",this->A_local_->getLocalMaxNumRowEntries());
for(local_ordinal_type i = 0; i < numRows; i++) {
size_t numEntries = 0;
this->A_local_->getLocalRowCopy(i, indices, values, numEntries);
A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast<scalar_type*>(values.data()),indices.data());
}
A_local_crs_nc->fillComplete (this->A_local_->getDomainMap (), this->A_local_->getRangeMap ());
A_local_crs = Teuchos::rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
}
A_local_crs_nc->fillComplete (this->A_local_->getDomainMap (), this->A_local_->getRangeMap ());
A_local_crs = Teuchos::rcp_const_cast<const block_crs_matrix_type> (A_local_crs_nc);
// Create bcrs from crs
// We can skip fillLogicalBlocks if we know the input is already filled
auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_local_crs, blockSize_);
A_local_bcrs = Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize_);
}
auto lclMtx = A_local_crs->getLocalMatrixDevice();
assert(!this->isKokkosKernelsStream_); // Not yet supported

TEUCHOS_TEST_FOR_EXCEPTION(
this->isKokkosKernelsStream_, std::runtime_error, "Ifpack2::RBILUK::compute: "
"streams are not yet supported.");

auto lclMtx = A_local_bcrs->getLocalMatrixDevice();
this->A_local_rowmap_ = lclMtx.graph.row_map;
this->A_local_entries_ = lclMtx.graph.entries;
this->A_local_values_ = lclMtx.values;

this->L_->resumeFill ();
this->U_->resumeFill ();
// L_block_->resumeFill ();
// U_block_->resumeFill ();

if (this->L_->isStaticGraph () || this->L_->isLocallyIndexed ()) {
this->L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
this->U_->setAllToScalar (STS::zero ());
if (L_block_->isLocallyIndexed ()) {
L_block_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
U_block_->setAllToScalar (STS::zero ());
}

using row_map_type = typename block_crs_matrix_type::local_matrix_device_type::row_map_type;

auto lclL = this->L_->getLocalMatrixDevice();
auto lclL = L_block_->getLocalMatrixDevice();
row_map_type L_rowmap = lclL.graph.row_map;
auto L_entries = lclL.graph.entries;
auto L_values = lclL.values;

auto lclU = this->U_->getLocalMatrixDevice();
auto lclU = U_block_->getLocalMatrixDevice();
row_map_type U_rowmap = lclU.graph.row_map;
auto U_entries = lclU.graph.entries;
auto U_values = lclU.values;
Expand All @@ -959,14 +976,8 @@ void RBILUK<MatrixType>::compute ()
this->A_local_rowmap_, this->A_local_entries_, this->A_local_values_,
L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );

this->L_->fillComplete (this->L_->getColMap (), this->A_local_->getRangeMap ());
this->U_->fillComplete (this->A_local_->getDomainMap (), this->U_->getRowMap ());

this->L_solver_->setMatrix (this->L_);
this->U_solver_->setMatrix (this->U_);

this->L_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
this->U_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing
// L_block_->fillComplete (L_block_->getColMap (), this->A_local_->getRangeMap ());
// U_block_->fillComplete (this->A_local_->getDomainMap (), U_block_->getRowMap ());
}
} // Stop timing

Expand Down
10 changes: 5 additions & 5 deletions packages/ifpack2/test/belos/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ TRIBITS_COPY_FILES_TO_BINARY_DIR(Ifpack2BelosCopyFiles
bcsstk14.hb
test_2_ILUT_nos1_hb.xml
test_2_RILUK_nos1_hb.xml
test_2_RILUK_nos1_hb_block.xml
test_RBILUK_nos1_hb_block.xml
test_2_RILUK_HTS_nos1_hb.xml
test_2_RILUK_2streams_nos1_hb.xml
test_2_RILUK_4streams_nos1_hb.xml
Expand Down Expand Up @@ -351,17 +351,17 @@ TRIBITS_ADD_TEST(

TRIBITS_ADD_TEST(
tif_belos
NAME RILUK_hb_belos_block
ARGS "--xml_file=test_2_RILUK_nos1_hb_block.xml"
NAME RBILUK_hb_belos_block
ARGS "--xml_file=test_RBILUK_nos1_hb_block.xml"
COMM serial mpi
NUM_MPI_PROCS 2
STANDARD_PASS_OUTPUT
)

TRIBITS_ADD_TEST(
tif_belos
NAME RILUK_hb_belos_block_serial
ARGS "--xml_file=test_2_RILUK_nos1_hb_block.xml"
NAME RBILUK_hb_belos_block_serial
ARGS "--xml_file=test_RBILUK_nos1_hb_block.xml"
COMM serial mpi
NUM_MPI_PROCS 1
STANDARD_PASS_OUTPUT
Expand Down
19 changes: 17 additions & 2 deletions packages/ifpack2/test/belos/belos_solve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@

#include "Tpetra_Core.hpp"
#include "Tpetra_Map.hpp"
#include "Tpetra_BlockCrsMatrix_Helpers_decl.hpp"

#include "build_problem.hpp"
#include "build_solver.hpp"
Expand Down Expand Up @@ -95,6 +96,7 @@ int main (int argc, char* argv[])
typedef Tpetra::MultiVector<Scalar,LO,GO> TMV;
typedef Tpetra::Operator<Scalar,LO,GO> TOP;
typedef Tpetra::CrsMatrix<Scalar,LO,GO,Node> crs_matrix_type;
typedef Tpetra::BlockCrsMatrix<Scalar,LO,GO,Node> block_crs_matrix_type;
typedef Belos::LinearProblem<Scalar,TMV,TOP> BLinProb;
typedef Belos::SolverManager<Scalar,TMV,TOP> BSolverMgr;
using Teuchos::RCP;
Expand Down Expand Up @@ -171,8 +173,21 @@ int main (int argc, char* argv[])
std::string prec_side("Left");
Ifpack2::getParameter (test_params, "Preconditioner Side", prec_side);
if (tifpack_precond != "not specified") {
auto A = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(problem->getOperator());
RCP<TOP> precond = build_precond<Scalar,LO,GO,Node> (test_params, A);
RCP<TOP> precond;
if (tifpack_precond == "RBILUK") {
int blockSize = 0;
Teuchos::ParameterList& prec_params = test_params.sublist("Ifpack2");
Ifpack2::getParameter (prec_params, "fact: block size", blockSize);
assert(blockSize > 1);
auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(problem->getOperator());
auto crs_matrix_block_filled = Tpetra::fillLogicalBlocks(*A_crs, blockSize);
auto A = Teuchos::rcp_const_cast<const block_crs_matrix_type>(Tpetra::convertToBlockCrsMatrix(*crs_matrix_block_filled, blockSize));
precond = build_precond<Scalar,LO,GO,Node> (test_params, A);
}
else {
auto A = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(problem->getOperator());
precond = build_precond<Scalar,LO,GO,Node> (test_params, A);
}
if (prec_side == "Left")
problem->setLeftPrec (precond);
else if (prec_side == "Right")
Expand Down
4 changes: 2 additions & 2 deletions packages/ifpack2/test/belos/build_precond.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@

#include "Ifpack2_Factory.hpp"

template<class Scalar,class LocalOrdinal,class GlobalOrdinal,class Node>
template<class Scalar,class LocalOrdinal,class GlobalOrdinal,class Node,class MatrixType>
Teuchos::RCP<Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
build_precond (Teuchos::ParameterList& test_params,
const Teuchos::RCP<const Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A)
const Teuchos::RCP<const MatrixType>& A)
{
using Teuchos::FancyOStream;
using Teuchos::getFancyOStream;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
<ParameterList name="Ifpack2">
<Parameter name="fact: iluk level-of-fill" type="int" value="2"/>
<Parameter name="fact: type" type="string" value="KSPILUK"/>
<Parameter name="fact: block size" type="int" value="4"/>
<Parameter name="fact: block size" type="int" value="3"/>
<Parameter name="trisolver: type" type="string" value="KSPTRSV"/>
</ParameterList>

Expand Down