71 template <
typename Scalar,
typename Device,
76 const int use_trials ,
77 const int use_nodes[] ,
78 Kokkos::View< Scalar* , Device >& residual,
83 using Teuchos::rcpFromRef;
84 using Teuchos::arrayView;
90 typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType ;
96 typedef typename ElementComputationType::vector_type VectorType ;
102 const double bubble_x = 1.0 ;
103 const double bubble_y = 1.0 ;
104 const double bubble_z = 1.0 ;
108 use_nodes[0] , use_nodes[1] , use_nodes[2] ,
109 bubble_x , bubble_y , bubble_z );
113 Kokkos::Impl::Timer wall_clock ;
117 for (
int itrial = 0 ; itrial < use_trials ; ++itrial ) {
129 typename NodeNodeGraphType::Times graph_times;
130 const NodeNodeGraphType
131 mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
135 jacobian = LocalMatrixType( mesh_to_graph.graph );
140 VectorType solution(
"solution" , fixture.node_count() );
141 residual = VectorType(
"residual" , fixture.node_count_owned() );
144 const ElementComputationType elemcomp( fixture , solution ,
145 mesh_to_graph.elem_graph ,
146 jacobian , residual );
148 Kokkos::deep_copy( solution , Scalar(1.2345) );
153 Kokkos::deep_copy( residual , Scalar(0) );
154 Kokkos::deep_copy( jacobian.coeff , Scalar(0) );
172 template<
class ValueType>
174 const std::string& a1_name,
176 const std::string& a2_name,
177 const ValueType& rel_tol,
const ValueType& abs_tol,
185 out <<
"\nError, relErr(" << a1_name <<
"," 186 << a2_name <<
") = relErr(" << a1 <<
"," << a2 <<
") = " 187 << err <<
" <= tol = " <<
tol <<
": failed!\n";
194 template <
typename VectorType,
typename MatrixType>
196 const MatrixType& analytic_jacobian,
197 const VectorType& fad_residual,
198 const MatrixType& fad_jacobian,
199 const std::string& test_name)
201 const double tol = 1e-14;
205 std::stringstream buf;
208 typename VectorType::HostMirror host_analytic_residual =
209 Kokkos::create_mirror_view(analytic_residual);
210 typename VectorType::HostMirror host_fad_residual =
211 Kokkos::create_mirror_view(fad_residual);
212 Kokkos::deep_copy( host_analytic_residual, analytic_residual );
213 Kokkos::deep_copy( host_fad_residual, fad_residual );
215 fbuf << test_name <<
":" << std::endl;
217 if (host_analytic_residual.dimension_0() != host_fad_residual.dimension_0()) {
218 fbuf <<
"Analytic residual dimension " 219 << host_analytic_residual.dimension_0()
220 <<
" does not match Fad residual dimension " 221 << host_fad_residual.dimension_0() << std::endl;
225 const size_t num_node = host_analytic_residual.dimension_0();
226 for (
size_t i=0; i<num_node; ++i) {
228 host_analytic_residual(i),
"analytic residual",
229 host_fad_residual(i),
"Fad residual",
234 typename MatrixType::HostMirror host_analytic_jacobian =
235 Kokkos::create_mirror_view(analytic_jacobian);
236 typename MatrixType::HostMirror host_fad_jacobian =
237 Kokkos::create_mirror_view(fad_jacobian);
238 Kokkos::deep_copy( host_analytic_jacobian, analytic_jacobian );
239 Kokkos::deep_copy( host_fad_jacobian, fad_jacobian );
241 if (host_analytic_jacobian.dimension_0() != host_fad_jacobian.dimension_0()) {
242 fbuf <<
"Analytic Jacobian dimension " 243 << host_analytic_jacobian.dimension_0()
244 <<
" does not match Fad Jacobian dimension " 245 << host_fad_jacobian.dimension_0() << std::endl;
249 const size_t num_entry = host_analytic_jacobian.dimension_0();
250 for (
size_t i=0; i<num_entry; ++i) {
252 host_analytic_jacobian(i),
"analytic Jacobian",
253 host_fad_jacobian(i),
"Fad Jacobian",
264 template <
class Device>
266 const int use_print ,
267 const int use_trials ,
271 const bool quadratic ,
280 std::cout.precision(8);
281 std::cout << std::endl
282 <<
"\"Grid Size\" , " 284 <<
"\"Analytic Fill Time\" , " 285 <<
"\"Fad Element Fill Slowdown\" , " 286 <<
"\"Fad Optimized Element Fill Slowdown\" , " 287 <<
"\"Fad QP Fill Slowdown\" , " 290 typedef Kokkos::View< double* , Device > vector_type ;
292 vector_type analytic_residual, fad_residual, fad_opt_residual,
294 matrix_type analytic_jacobian, fad_jacobian, fad_opt_jacobian,
297 for (
int n=n_begin;
n<=n_end;
n+=n_step) {
298 const int use_nodes[] = {
n,
n,
n };
299 Perf perf_analytic, perf_fad, perf_fad_opt, perf_fad_qp;
303 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,Analytic>(
304 use_print, use_trials, use_nodes,
305 analytic_residual, analytic_jacobian );
308 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElement>(
309 use_print, use_trials, use_nodes,
310 fad_residual, fad_jacobian);
313 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElementOptimized>(
314 use_print, use_trials, use_nodes,
315 fad_opt_residual, fad_opt_jacobian);
318 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadQuadPoint>(
319 use_print, use_trials, use_nodes,
320 fad_qp_residual, fad_qp_jacobian);
324 fenl_assembly<double,Device,BoxElemPart::ElemLinear,Analytic>(
325 use_print, use_trials, use_nodes,
326 analytic_residual, analytic_jacobian );
329 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElement>(
330 use_print, use_trials, use_nodes,
331 fad_residual, fad_jacobian);
334 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElementOptimized>(
335 use_print, use_trials, use_nodes,
336 fad_opt_residual, fad_opt_jacobian);
339 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadQuadPoint>(
340 use_print, use_trials, use_nodes,
341 fad_qp_residual, fad_qp_jacobian);
345 fad_residual, fad_jacobian.coeff,
348 fad_opt_residual, fad_opt_jacobian.coeff,
351 fad_qp_residual, fad_qp_jacobian.coeff,
357 perf_analytic.
scale(s);
359 perf_fad_opt.
scale(s);
360 perf_fad_qp.
scale(s);
362 std::cout.precision(3);
363 std::cout <<
n <<
" , " 368 << std::fixed << std::setw(6)
Perf fenl_assembly(const int use_print, const int use_trials, const int use_nodes[], Kokkos::View< Scalar *, Device > &residual, Kokkos::Example::FENL::CrsMatrix< Scalar, Device > &jacobian)
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
bool check_assembly(const VectorType &analytic_residual, const MatrixType &analytic_jacobian, const VectorType &fad_residual, const MatrixType &fad_jacobian, const std::string &test_name)
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
static RCP< FancyOStream > getDefaultOStream()
void increment(const Perf &p)
bool compareValues(const ValueType &a1, const std::string &a1_name, const ValueType &a2, const std::string &a2_name, const ValueType &rel_tol, const ValueType &abs_tol, Teuchos::FancyOStream &out)
SimpleFad< ValueT > max(const SimpleFad< ValueT > &a, const SimpleFad< ValueT > &b)
Partition a box of hexahedral elements among subdomains.
KOKKOS_INLINE_FUNCTION Expr< AbsOp< Expr< T > > > abs(const Expr< T > &expr)
void performance_test_driver(const int use_print, const int use_trials, const int n_begin, const int n_end, const int n_step, const bool quadratic, const bool check)
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements...