46 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 47 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 50 #include <Kokkos_CrsMatrix.hpp> 54 #include "MueLu_AmalgamationInfo.hpp" 57 #include "MueLu_LWGraph_kokkos.hpp" 60 #include "MueLu_Utilities_kokkos.hpp" 66 template<
class LO,
class RowType>
69 ScanFunctor(RowType rows_) : rows(rows_) { }
71 KOKKOS_INLINE_FUNCTION
72 void operator()(
const LO i,
LO& upd,
const bool&
final)
const {
82 template<
class MatrixType,
class GhostedDiagType,
class RowType>
83 class Stage1ScalarFunctor {
85 typedef typename MatrixType::ordinal_type
LO;
86 typedef typename MatrixType::value_type
SC;
87 typedef Kokkos::ArithTraits<SC> ATS;
88 typedef typename ATS::magnitudeType magnitudeType;
91 Stage1ScalarFunctor(MatrixType kokkosMatrix_,
double threshold_, GhostedDiagType ghostedDiag_, RowType rows_) :
92 kokkosMatrix(kokkosMatrix_),
93 threshold(threshold_),
94 ghostedDiag(ghostedDiag_),
98 KOKKOS_INLINE_FUNCTION
99 void operator()(
const LO row,
LO& nnz)
const {
100 auto rowView = kokkosMatrix.row (row);
101 auto length = rowView.length;
104 for (decltype(length) colID = 0; colID < length; colID++) {
105 LO col = rowView.colidx(colID);
108 magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0));
109 magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID));
111 if (aij2 > aiiajj || row == col)
114 rows(row+1) = rownnz;
119 MatrixType kokkosMatrix;
121 GhostedDiagType ghostedDiag;
125 template<
class MatrixType,
class GhostedDiagType,
class RowType,
class ColType,
class BndNodesType>
126 class Stage2ScalarFunctor {
128 typedef typename MatrixType::ordinal_type
LO;
129 typedef typename MatrixType::size_type
GO;
130 typedef typename MatrixType::value_type
SC;
131 typedef Kokkos::ArithTraits<SC> ATS;
132 typedef typename ATS::magnitudeType magnitudeType;
135 Stage2ScalarFunctor(MatrixType kokkosMatrix_, GhostedDiagType ghostedDiag_, RowType rows_, ColType cols_, BndNodesType bndNodes_,
double threshold_) :
136 kokkosMatrix(kokkosMatrix_),
137 ghostedDiag(ghostedDiag_),
141 threshold(threshold_)
144 KOKKOS_INLINE_FUNCTION
145 void operator()(
const LO row,
GO& dropped)
const {
146 auto rowView = kokkosMatrix.row (row);
147 auto length = rowView.length;
150 for (decltype(length) colID = 0; colID < length; colID++) {
151 LO col = rowView.colidx(colID);
154 magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0));
155 magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID));
157 if (aij2 > aiiajj || row == col) {
158 cols(rows(row) + rownnz) = col;
170 bndNodes(row) =
true;
176 MatrixType kokkosMatrix;
177 GhostedDiagType ghostedDiag;
180 BndNodesType bndNodes;
186 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
187 RCP<const ParameterList> CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList()
const {
188 RCP<ParameterList> validParamList =
rcp(
new ParameterList());
190 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) 196 validParamList->getEntry(
"aggregation: drop scheme").setValidator(
197 rcp(
new validatorType(Teuchos::tuple<std::string>(
"classical",
"distance laplacian"),
"aggregation: drop scheme")));
199 #undef SET_VALID_ENTRY 200 validParamList->set<
bool > (
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
202 validParamList->set< RCP<const FactoryBase> >(
"A", Teuchos::null,
"Generating factory of the matrix A");
203 validParamList->set< RCP<const FactoryBase> >(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
204 validParamList->set< RCP<const FactoryBase> >(
"Coordinates", Teuchos::null,
"Generating factory for Coordinates");
206 return validParamList;
209 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
210 void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level ¤tLevel)
const {
211 Input(currentLevel,
"A");
212 Input(currentLevel,
"UnAmalgamationInfo");
214 const ParameterList& pL = GetParameterList();
215 if (pL.get<
bool>(
"lightweight wrap") ==
true) {
216 if (pL.get<std::string>(
"aggregation: drop scheme") ==
"distance laplacian")
217 Input(currentLevel,
"Coordinates");
221 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
222 void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& currentLevel)
const {
223 FactoryMonitor m(*
this,
"Build", currentLevel);
226 SC zero = STS::zero();
228 auto A = Get< RCP<Matrix> >(currentLevel,
"A");
229 LO blkSize = A->GetFixedBlockSize();
230 GO indexBase = A->getRowMap()->getIndexBase();
232 auto amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel,
"UnAmalgamationInfo");
234 const ParameterList& pL = GetParameterList();
237 GetOStream(
Warnings0) <<
"lightweight wrap is deprecated" << std::endl;
239 std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
241 double threshold = pL.get<
double>(
"aggregation: drop tol");
242 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
244 Set(currentLevel,
"Filtering", (threshold != zero));
246 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
248 GO numDropped = 0, numTotal = 0;
250 RCP<LWGraph_kokkos> graph;
253 typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type;
254 boundary_nodes_type boundaryNodes;
256 if (blkSize == 1 && threshold == zero) {
259 graph =
rcp(
new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(),
"graph of A"));
263 graph->SetBoundaryNodeMap(boundaryNodes);
265 numTotal = A->getNodeNumEntries();
269 }
else if (blkSize > 1 && threshold == zero) {
277 }
else if (algo ==
"classical") {
279 if (blkSize == 1 && threshold != zero) {
282 RCP<Vector> ghostedDiagVec = Utilities_kokkos::GetMatrixOverlappedDiagonal(*A);
283 auto ghostedDiag = ghostedDiagVec->template getLocalView<typename NO::device_type>();
285 typedef typename Matrix::local_matrix_type local_matrix_type;
286 typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
287 typedef typename kokkos_graph_type::row_map_type row_map_type;
288 typedef typename kokkos_graph_type::entries_type entries_type;
290 LO numRows = A->getNodeNumRows();
291 auto kokkosMatrix = A->getLocalMatrix();
293 typedef Kokkos::ArithTraits<SC> ATS;
294 typedef typename ATS::magnitudeType magnitudeType;
297 typename row_map_type::non_const_type rows(
"row_map", numRows+1);
301 Stage1ScalarFunctor<decltype(kokkosMatrix), decltype(ghostedDiag), decltype(rows)> stage1Functor(kokkosMatrix, threshold, ghostedDiag, rows);
302 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_reduce", numRows, stage1Functor, realnnz);
305 ScanFunctor<LO,decltype(rows)> scanFunctor(rows);
306 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", numRows+1, scanFunctor);
310 typename boundary_nodes_type::non_const_type bndNodes(
"boundaryNodes", numRows);
311 typename entries_type::non_const_type cols (
"entries", realnnz);
313 typename decltype(kokkosMatrix)::size_type kokkos_numDropped;
315 Stage2ScalarFunctor<decltype(kokkosMatrix), decltype(ghostedDiag), decltype(rows), decltype(cols), decltype(bndNodes)>
316 stage2Functor(kokkosMatrix, ghostedDiag, rows, cols, bndNodes, threshold);
317 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage2_reduce", numRows, stage2Functor, kokkos_numDropped);
318 numDropped = kokkos_numDropped;
320 boundaryNodes = bndNodes;
322 kokkos_graph_type kokkosGraph(cols, rows);
324 graph =
rcp(
new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(),
"filtered graph of A"));
325 graph->SetBoundaryNodeMap(boundaryNodes);
327 numTotal = A->getNodeNumEntries();
331 }
else if (blkSize > 1 && threshold != zero) {
334 throw Exceptions::RuntimeError(
"Block systems with filtering are not implemented");
340 }
else if (algo ==
"distance laplacian") {
343 auto coords = Get<RCP<doubleMultiVector> >(currentLevel,
"Coordinates");
345 if (blkSize == 1 && threshold != zero) {
348 throw Exceptions::RuntimeError(
"not implemented");
350 }
else if (blkSize > 1 && threshold != zero) {
353 throw Exceptions::RuntimeError(
"Block systems with filtering are not implemented");
358 GO numLocalBoundaryNodes = 0;
359 GO numGlobalBoundaryNodes = 0;
362 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:bnd", boundaryNodes.dimension_0(), KOKKOS_LAMBDA(
const LO i,
GO& n) {
363 if (boundaryNodes(i))
365 }, numLocalBoundaryNodes);
368 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
369 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
370 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
373 if ((GetVerbLevel() &
Statistics1) && threshold != zero) {
374 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
375 GO numGlobalTotal, numGlobalDropped;
378 if (numGlobalTotal != 0) {
379 GetOStream(
Statistics1) <<
"Number of dropped entries: " 380 << numGlobalDropped <<
"/" << numGlobalTotal
381 <<
" (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
385 Set(currentLevel,
"DofsPerNode", dofsPerNode);
386 Set(currentLevel,
"Graph", graph);
389 #endif // HAVE_MUELU_KOKKOS_REFACTOR 390 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
#define SET_VALID_ENTRY(name)