MueLu  Version of the Day
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 #include <Kokkos_CrsMatrix.hpp>
51 
53 
54 #include "MueLu_AmalgamationInfo.hpp"
55 #include "MueLu_Exceptions.hpp"
56 #include "MueLu_Level.hpp"
57 #include "MueLu_LWGraph_kokkos.hpp"
58 #include "MueLu_MasterList.hpp"
59 #include "MueLu_Monitor.hpp"
60 #include "MueLu_Utilities_kokkos.hpp"
61 
62 namespace MueLu {
63 
64  namespace { // anonymous
65 
66  template<class LO, class RowType>
67  class ScanFunctor {
68  public:
69  ScanFunctor(RowType rows_) : rows(rows_) { }
70 
71  KOKKOS_INLINE_FUNCTION
72  void operator()(const LO i, LO& upd, const bool& final) const {
73  upd += rows(i);
74  if (final)
75  rows(i) = upd;
76  }
77 
78  private:
79  RowType rows;
80  };
81 
82  template<class MatrixType, class GhostedDiagType, class RowType>
83  class Stage1ScalarFunctor {
84  private:
85  typedef typename MatrixType::ordinal_type LO;
86  typedef typename MatrixType::value_type SC;
87  typedef Kokkos::ArithTraits<SC> ATS;
88  typedef typename ATS::magnitudeType magnitudeType;
89 
90  public:
91  Stage1ScalarFunctor(MatrixType kokkosMatrix_, double threshold_, GhostedDiagType ghostedDiag_, RowType rows_) :
92  kokkosMatrix(kokkosMatrix_),
93  threshold(threshold_),
94  ghostedDiag(ghostedDiag_),
95  rows(rows_)
96  { }
97 
98  KOKKOS_INLINE_FUNCTION
99  void operator()(const LO row, LO& nnz) const {
100  auto rowView = kokkosMatrix.row (row);
101  auto length = rowView.length;
102 
103  LO rownnz = 0;
104  for (decltype(length) colID = 0; colID < length; colID++) {
105  LO col = rowView.colidx(colID);
106 
107  // Avoid square root by using squared values
108  magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0)); // eps^2*|a_ii|*|a_jj|
109  magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID)); // |a_ij|^2
110 
111  if (aij2 > aiiajj || row == col)
112  rownnz++;
113  }
114  rows(row+1) = rownnz;
115  nnz += rownnz;
116  }
117 
118  private:
119  MatrixType kokkosMatrix;
120  double threshold;
121  GhostedDiagType ghostedDiag;
122  RowType rows;
123  };
124 
125  template<class MatrixType, class GhostedDiagType, class RowType, class ColType, class BndNodesType>
126  class Stage2ScalarFunctor {
127  private:
128  typedef typename MatrixType::ordinal_type LO;
129  typedef typename MatrixType::size_type GO;
130  typedef typename MatrixType::value_type SC;
131  typedef Kokkos::ArithTraits<SC> ATS;
132  typedef typename ATS::magnitudeType magnitudeType;
133 
134  public:
135  Stage2ScalarFunctor(MatrixType kokkosMatrix_, GhostedDiagType ghostedDiag_, RowType rows_, ColType cols_, BndNodesType bndNodes_, double threshold_) :
136  kokkosMatrix(kokkosMatrix_),
137  ghostedDiag(ghostedDiag_),
138  rows(rows_),
139  cols(cols_),
140  bndNodes(bndNodes_),
141  threshold(threshold_)
142  { }
143 
144  KOKKOS_INLINE_FUNCTION
145  void operator()(const LO row, GO& dropped) const {
146  auto rowView = kokkosMatrix.row (row);
147  auto length = rowView.length;
148 
149  LO rownnz = 0;
150  for (decltype(length) colID = 0; colID < length; colID++) {
151  LO col = rowView.colidx(colID);
152 
153  // Avoid square root by using squared values
154  magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0)); // eps^2*|a_ii|*|a_jj|
155  magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID)); // |a_ij|^2
156 
157  if (aij2 > aiiajj || row == col) {
158  cols(rows(row) + rownnz) = col;
159  rownnz++;
160  } else {
161  dropped++;
162  }
163  if (rownnz == 1) {
164  // If the only element remaining after filtering is diagonal, mark node as boundary
165  // FIXME: this should really be replaced by the following
166  // if (indices.size() == 1 && indices[0] == row)
167  // boundaryNodes[row] = true;
168  // We do not do it this way now because there is no framework for distinguishing isolated
169  // and boundary nodes in the aggregation algorithms
170  bndNodes(row) = true;
171  }
172  }
173  }
174 
175  private:
176  MatrixType kokkosMatrix;
177  GhostedDiagType ghostedDiag;
178  RowType rows;
179  ColType cols;
180  BndNodesType bndNodes;
181  double threshold;
182  };
183 
184  } // namespace
185 
186  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
187  RCP<const ParameterList> CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList() const {
188  RCP<ParameterList> validParamList = rcp(new ParameterList());
189 
190 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
191  SET_VALID_ENTRY("aggregation: drop tol");
192  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
193  SET_VALID_ENTRY("aggregation: drop scheme");
194  {
196  validParamList->getEntry("aggregation: drop scheme").setValidator(
197  rcp(new validatorType(Teuchos::tuple<std::string>("classical", "distance laplacian"), "aggregation: drop scheme")));
198  }
199 #undef SET_VALID_ENTRY
200  validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
201 
202  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
203  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
204  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
205 
206  return validParamList;
207  }
208 
209  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class DeviceType>
210  void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level &currentLevel) const {
211  Input(currentLevel, "A");
212  Input(currentLevel, "UnAmalgamationInfo");
213 
214  const ParameterList& pL = GetParameterList();
215  if (pL.get<bool>("lightweight wrap") == true) {
216  if (pL.get<std::string>("aggregation: drop scheme") == "distance laplacian")
217  Input(currentLevel, "Coordinates");
218  }
219  }
220 
221  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class DeviceType>
222  void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& currentLevel) const {
223  FactoryMonitor m(*this, "Build", currentLevel);
224 
225  typedef Teuchos::ScalarTraits<SC> STS;
226  SC zero = STS::zero();
227 
228  auto A = Get< RCP<Matrix> >(currentLevel, "A");
229  LO blkSize = A->GetFixedBlockSize();
230  GO indexBase = A->getRowMap()->getIndexBase();
231 
232  auto amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
233 
234  const ParameterList& pL = GetParameterList();
235 
236  // bool doLightWeightWrap = pL.get<bool>("lightweight wrap");
237  GetOStream(Warnings0) << "lightweight wrap is deprecated" << std::endl;
238 
239  std::string algo = pL.get<std::string>("aggregation: drop scheme");
240 
241  double threshold = pL.get<double>("aggregation: drop tol");
242  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
243 
244  Set(currentLevel, "Filtering", (threshold != zero));
245 
246  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
247 
248  GO numDropped = 0, numTotal = 0;
249 
250  RCP<LWGraph_kokkos> graph;
251  LO dofsPerNode = -1;
252 
253  typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type;
254  boundary_nodes_type boundaryNodes;
255 
256  if (blkSize == 1 && threshold == zero) {
257  // Case 1: scalar problem without dropping
258 
259  graph = rcp(new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(), "graph of A"));
260 
261  // Detect and record rows that correspond to Dirichlet boundary conditions
262  boundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
263  graph->SetBoundaryNodeMap(boundaryNodes);
264 
265  numTotal = A->getNodeNumEntries();
266 
267  dofsPerNode = 1;
268 
269  } else if (blkSize > 1 && threshold == zero) {
270  // Case 3: block problem without filtering
271 
272  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Block systems without filtering are not implemented");
273 
274  // Detect and record rows that correspond to Dirichlet boundary conditions
275  // boundary_nodes_type pointBoundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
276 
277  } else if (algo == "classical") {
278 
279  if (blkSize == 1 && threshold != zero) {
280  // Case 2: scalar problem with filtering
281 
282  RCP<Vector> ghostedDiagVec = Utilities_kokkos::GetMatrixOverlappedDiagonal(*A);
283  auto ghostedDiag = ghostedDiagVec->template getLocalView<typename NO::device_type>();
284 
285  typedef typename Matrix::local_matrix_type local_matrix_type;
286  typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
287  typedef typename kokkos_graph_type::row_map_type row_map_type;
288  typedef typename kokkos_graph_type::entries_type entries_type;
289 
290  LO numRows = A->getNodeNumRows();
291  auto kokkosMatrix = A->getLocalMatrix();
292 
293  typedef Kokkos::ArithTraits<SC> ATS;
294  typedef typename ATS::magnitudeType magnitudeType;
295 
296  // Stage 1: calculate the number of remaining entries per row
297  typename row_map_type::non_const_type rows("row_map", numRows+1); // rows(0) = 0 automatically
298 
299  LO realnnz = 0;
300 
301  Stage1ScalarFunctor<decltype(kokkosMatrix), decltype(ghostedDiag), decltype(rows)> stage1Functor(kokkosMatrix, threshold, ghostedDiag, rows);
302  Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1_reduce", numRows, stage1Functor, realnnz);
303 
304  // parallel_scan (exclusive)
305  ScanFunctor<LO,decltype(rows)> scanFunctor(rows);
306  Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", numRows+1, scanFunctor);
307 
308 
309  // Stage 2: fill in the column indices
310  typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numRows);
311  typename entries_type::non_const_type cols ("entries", realnnz);
312 
313  typename decltype(kokkosMatrix)::size_type kokkos_numDropped;
314 
315  Stage2ScalarFunctor<decltype(kokkosMatrix), decltype(ghostedDiag), decltype(rows), decltype(cols), decltype(bndNodes)>
316  stage2Functor(kokkosMatrix, ghostedDiag, rows, cols, bndNodes, threshold);
317  Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage2_reduce", numRows, stage2Functor, kokkos_numDropped);
318  numDropped = kokkos_numDropped;
319 
320  boundaryNodes = bndNodes;
321 
322  kokkos_graph_type kokkosGraph(cols, rows);
323 
324  graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A"));
325  graph->SetBoundaryNodeMap(boundaryNodes);
326 
327  numTotal = A->getNodeNumEntries();
328 
329  dofsPerNode = 1;
330 
331  } else if (blkSize > 1 && threshold != zero) {
332  // Case 4: block problem with filtering
333 
334  throw Exceptions::RuntimeError("Block systems with filtering are not implemented");
335 
336  // Detect and record rows that correspond to Dirichlet boundary conditions
337  // boundary_nodes_type pointBoundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
338  }
339 
340  } else if (algo == "distance laplacian") {
341  typedef Xpetra::MultiVector<double,LO,GO,NO> doubleMultiVector;
342 
343  auto coords = Get<RCP<doubleMultiVector> >(currentLevel, "Coordinates");
344 
345  if (blkSize == 1 && threshold != zero) {
346  // Case 2: scalar problem with filtering
347 
348  throw Exceptions::RuntimeError("not implemented");
349 
350  } else if (blkSize > 1 && threshold != zero) {
351  // Case 4: block problem with filtering
352 
353  throw Exceptions::RuntimeError("Block systems with filtering are not implemented");
354  }
355  }
356 
357  if (GetVerbLevel() & Statistics1) {
358  GO numLocalBoundaryNodes = 0;
359  GO numGlobalBoundaryNodes = 0;
360 #if 0
361  // Convert to functors later
362  Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:bnd", boundaryNodes.dimension_0(), KOKKOS_LAMBDA(const LO i, GO& n) {
363  if (boundaryNodes(i))
364  n++;
365  }, numLocalBoundaryNodes);
366 #endif
367 
368  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
369  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
370  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
371  }
372 
373  if ((GetVerbLevel() & Statistics1) && threshold != zero) {
374  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
375  GO numGlobalTotal, numGlobalDropped;
376  MueLu_sumAll(comm, numTotal, numGlobalTotal);
377  MueLu_sumAll(comm, numDropped, numGlobalDropped);
378  if (numGlobalTotal != 0) {
379  GetOStream(Statistics1) << "Number of dropped entries: "
380  << numGlobalDropped << "/" << numGlobalTotal
381  << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
382  }
383  }
384 
385  Set(currentLevel, "DofsPerNode", dofsPerNode);
386  Set(currentLevel, "Graph", graph);
387  }
388 }
389 #endif // HAVE_MUELU_KOKKOS_REFACTOR
390 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
GlobalOrdinal GO
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Print more statistics.
LocalOrdinal LO
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Scalar SC
#define SET_VALID_ENTRY(name)