Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
TestStochastic.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <utility>
42 #include <cmath>
43 #include <iostream>
44 
45 #include "Kokkos_Core.hpp"
46 #include "impl/Kokkos_Timer.hpp"
47 
48 #include "Stokhos_Update.hpp"
49 #include "Stokhos_CrsMatrix.hpp"
61 
62 #if defined(HAVE_MPI) && 0
63 #include "Epetra_MpiComm.h"
64 #else
65 #include "Epetra_SerialComm.h"
66 #endif
68 #include "Stokhos_JacobiBasis.hpp"
74 
75 #ifdef HAVE_STOKHOS_KOKKOSLINALG
76 #include "Kokkos_Sparse.hpp"
77 #include "Kokkos_Blas1_MV.hpp"
78 #endif
79 
80 namespace unit_test {
81 
82 template< typename IntType >
83 inline
84 IntType map_fem_graph_coord( const IntType & N ,
85  const IntType & i ,
86  const IntType & j ,
87  const IntType & k )
88 {
89  return k + N * ( j + N * i );
90 }
91 
92 inline
93 size_t generate_fem_graph( size_t N ,
94  std::vector< std::vector<size_t> > & graph )
95 {
96  graph.resize( N * N * N , std::vector<size_t>() );
97 
98  size_t total = 0 ;
99 
100  for ( int i = 0 ; i < (int) N ; ++i ) {
101  for ( int j = 0 ; j < (int) N ; ++j ) {
102  for ( int k = 0 ; k < (int) N ; ++k ) {
103 
104  const size_t row = map_fem_graph_coord((int)N,i,j,k);
105 
106  graph[row].reserve(27);
107 
108  for ( int ii = -1 ; ii < 2 ; ++ii ) {
109  for ( int jj = -1 ; jj < 2 ; ++jj ) {
110  for ( int kk = -1 ; kk < 2 ; ++kk ) {
111  if ( 0 <= i + ii && i + ii < (int) N &&
112  0 <= j + jj && j + jj < (int) N &&
113  0 <= k + kk && k + kk < (int) N ) {
114  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
115 
116  graph[row].push_back(col);
117  }
118  }}}
119  total += graph[row].size();
120  }}}
121 
122  return total ;
123 }
124 
125 template <typename Scalar> struct ScalarTolerances {};
126 
127 template <> struct ScalarTolerances<float> {
128  typedef float scalar_type;
129  static scalar_type sparse_cijk_tol() { return 1e-5; }
130 };
131 
132 template <> struct ScalarTolerances<double> {
133  typedef double scalar_type;
134  static scalar_type sparse_cijk_tol() { return 1e-12; }
135 };
136 
137 template< typename ScalarType , typename TensorType, class Device >
138 std::vector<double>
140  const std::vector<int> & var_degree ,
141  const int nGrid ,
142  const int iterCount ,
143  const bool symmetric )
144 {
145  typedef ScalarType value_type ;
146  typedef Kokkos::View< value_type** ,
147  Kokkos::LayoutLeft ,
148  Device > block_vector_type ;
149 
151 
153  typedef typename matrix_type::graph_type graph_type ;
154 
155  typedef ScalarType value_type ;
156  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
159  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
161 
162  using Teuchos::rcp;
163  using Teuchos::RCP;
164  using Teuchos::Array;
165 
166  // Create Stochastic Galerkin basis and expansion
167  const size_t num_KL = var_degree.size();
168  Array< RCP<const abstract_basis_type> > bases(num_KL);
169  for (size_t i=0; i<num_KL; i++) {
170  if (symmetric)
171  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
172  else
173  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
174  }
175  RCP<const product_basis_type> basis =
176  rcp(new product_basis_type(
178  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
179 
180  //------------------------------
181  // Generate graph for "FEM" box structure:
182 
183  std::vector< std::vector<size_t> > graph ;
184 
185  const size_t outer_length = nGrid * nGrid * nGrid ;
186  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
187 
188  //------------------------------
189  // Generate CRS block-tensor matrix:
190 
191  matrix_type matrix ;
192 
193  matrix.block =
194  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
195  *Cijk );
196  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
197 
198  const size_t inner_length = matrix.block.dimension();
199  const size_t inner_length_aligned = matrix.block.aligned_dimension();
200 
201  matrix.values =
202  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
203 
204  block_vector_type x =
205  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
206  block_vector_type y =
207  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
208 
209  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
210 
211  //------------------------------
212  // Generate input multivector:
213 
214  Kokkos::deep_copy( x , ScalarType(1.0) );
215  block_vector_type x0 =
216  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"),
217  inner_length_aligned , outer_length );
218  Kokkos::deep_copy( x0 , ScalarType(1.0) );
219 
220  //------------------------------
221 
222  Device::fence();
223  Kokkos::Impl::Timer clock ;
224  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
225  Kokkos::deep_copy( x, x0 ); // akin to import
226  Stokhos::multiply( matrix , x , y );
227  }
228  Device::fence();
229 
230  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
231  const double flops_per_block = matrix.block.tensor().num_flops();
232  const double flops = 1.0e-9*graph_length*flops_per_block;
233 
234  std::vector<double> perf(6) ;
235 
236  perf[0] = outer_length * inner_length ;
237  perf[1] = seconds_per_iter ;
238  perf[2] = flops / seconds_per_iter;
239  perf[3] = matrix.block.tensor().entry_count();
240  perf[4] = inner_length ;
241  perf[5] = flops_per_block;
242 
243  return perf ;
244 }
245 
246 template< typename ScalarType , class Device >
247 std::vector<double>
249  const std::vector<int> & var_degree ,
250  const int nGrid ,
251  const int iterCount ,
252  const bool symmetric )
253 {
254  typedef ScalarType value_type ;
255  typedef Kokkos::View< value_type**,
256  Kokkos::LayoutLeft ,
257  Device > block_vector_type ;
258 
259  //------------------------------
260 
262  value_type , Device > matrix_type ;
263 
264  typedef typename matrix_type::graph_type graph_type ;
265 
266  typedef ScalarType value_type ;
267  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
270  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
272 
273  using Teuchos::rcp;
274  using Teuchos::RCP;
275  using Teuchos::Array;
276 
277  // Create Stochastic Galerkin basis and expansion
278  const size_t num_KL = var_degree.size();
279  Array< RCP<const abstract_basis_type> > bases(num_KL);
280  for (size_t i=0; i<num_KL; i++) {
281  if (symmetric)
282  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
283  else
284  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
285  }
286  RCP<const product_basis_type> basis =
287  rcp(new product_basis_type(
289  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
290 
291  //------------------------------
292  // Generate FEM graph:
293 
294  std::vector< std::vector<size_t> > fem_graph ;
295 
296  const size_t fem_length = nGrid * nGrid * nGrid ;
297  const size_t fem_graph_length = unit_test::generate_fem_graph( nGrid , fem_graph );
298 
299  const size_t stoch_length = basis->size();
300 
301  //------------------------------
302 
303  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), stoch_length , fem_length );
304  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), stoch_length , fem_length );
305 
306  Kokkos::deep_copy( x , ScalarType(1.0) );
307 
308  //------------------------------
309  // Generate CRS matrix of blocks with symmetric diagonal storage
310 
311  matrix_type matrix ;
312 
313  matrix.block = Stokhos::SymmetricDiagonalSpec< Device >( stoch_length );
314  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>(
315  std::string("test product tensor graph") , fem_graph );
316  matrix.values = block_vector_type(
317  Kokkos::ViewAllocateWithoutInitializing("matrix"), matrix.block.matrix_size() , fem_graph_length );
318 
319  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
320 
321  //------------------------------
322 
323  Device::fence();
324  Kokkos::Impl::Timer clock ;
325  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
326  Stokhos::multiply( matrix , x , y );
327  }
328  Device::fence();
329 
330  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
331  const double flops_per_block = 2.0*stoch_length*stoch_length;
332  const double flops = 1e-9*fem_graph_length*flops_per_block;
333 
334  std::vector<double> perf(6);
335  perf[0] = fem_length * stoch_length ;
336  perf[1] = seconds_per_iter;
337  perf[2] = flops / seconds_per_iter;
338  perf[3] = Cijk->num_entries();
339  perf[4] = stoch_length;
340  perf[5] = flops_per_block;
341  return perf;
342 }
343 
344 //----------------------------------------------------------------------------
345 // Flatten to a plain CRS matrix
346 //
347 // Outer DOF == fem
348 // Inner DOF == stochastic
349 
350 template< typename ScalarType , class Device >
351 std::vector<double>
353  const std::vector<int> & var_degree ,
354  const int nGrid ,
355  const int iterCount ,
356  const bool symmetric )
357 {
358  typedef ScalarType value_type ;
359  typedef Kokkos::View< value_type* , Device > vector_type ;
360 
361  //------------------------------
362 
363  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
364  typedef typename matrix_type::values_type matrix_values_type;
365  typedef typename matrix_type::graph_type matrix_graph_type;
366 
367  typedef ScalarType value_type ;
368  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
371  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
373 
374  using Teuchos::rcp;
375  using Teuchos::RCP;
376  using Teuchos::Array;
377 
378  // Create Stochastic Galerkin basis and expansion
379  const size_t num_KL = var_degree.size();
380  Array< RCP<const abstract_basis_type> > bases(num_KL);
381  for (size_t i=0; i<num_KL; i++) {
382  if (symmetric)
383  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
384  else
385  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
386  }
387  RCP<const product_basis_type> basis =
388  rcp(new product_basis_type(
390  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
391 
392  //------------------------------
393  // Generate FEM graph:
394 
395  std::vector< std::vector<size_t> > fem_graph ;
396 
397  const size_t fem_length = nGrid * nGrid * nGrid ;
398 
399  unit_test::generate_fem_graph( nGrid , fem_graph );
400 
401  //------------------------------
402  // Stochastic graph
403 
404  const size_t stoch_length = basis->size();
405  std::vector< std::vector< int > > stoch_graph( stoch_length );
406 #if defined(HAVE_MPI) && 0
407  Epetra_MpiComm comm(MPI_COMM_WORLD);
408 #else
409  Epetra_SerialComm comm;
410 #endif
411  Teuchos::RCP<Epetra_CrsGraph> cijk_graph = Stokhos::sparse3Tensor2CrsGraph(
412  *basis, *Cijk, comm);
413  for ( size_t i = 0 ; i < stoch_length ; ++i ) {
414  int len = cijk_graph->NumGlobalIndices(i);
415  stoch_graph[i].resize(len);
416  int len2;
417  cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
418  }
419 
420  //------------------------------
421  // Generate flattened graph with FEM outer and stochastic inner
422 
423  const size_t flat_length = fem_length * stoch_length ;
424 
425  std::vector< std::vector<size_t> > flat_graph( flat_length );
426 
427  for ( size_t iOuterRow = 0 ; iOuterRow < fem_length ; ++iOuterRow ) {
428 
429  const size_t iOuterRowNZ = fem_graph[iOuterRow].size();
430 
431  for ( size_t iInnerRow = 0 ; iInnerRow < stoch_length ; ++iInnerRow ) {
432 
433  const size_t iInnerRowNZ = stoch_graph[ iInnerRow ].size(); ;
434  const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
435  const size_t iFlatRow = iInnerRow + iOuterRow * stoch_length ;
436 
437  flat_graph[iFlatRow].resize( iFlatRowNZ );
438 
439  size_t iFlatEntry = 0 ;
440 
441  for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
442 
443  const size_t iOuterCol = fem_graph[iOuterRow][iOuterEntry];
444 
445  for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
446 
447  const size_t iInnerCol = stoch_graph[iInnerRow][iInnerEntry] ;
448  const size_t iFlatColumn = iInnerCol + iOuterCol * stoch_length ;
449 
450  flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
451 
452  ++iFlatEntry ;
453  }
454  }
455  }
456  }
457 
458  //------------------------------
459 
460  vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
461  vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
462 
463  Kokkos::deep_copy( x , ScalarType(1.0) );
464 
465  //------------------------------
466 
467  matrix_type matrix ;
468 
469  matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
470  std::string("testing") , flat_graph );
471 
472  const size_t flat_graph_length = matrix.graph.entries.dimension_0();
473 
474  matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
475 
476  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
477 
478  //Kokkos::write_matrix_market(matrix, "flat_commuted.mm");
479 
480  //------------------------------
481 
482  Device::fence();
483  Kokkos::Impl::Timer clock ;
484  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
485  Stokhos::multiply( matrix , x , y );
486  }
487  Device::fence();
488 
489  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
490  const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
491 
492  std::vector<double> perf(4);
493  perf[0] = flat_length ;
494  perf[1] = seconds_per_iter;
495  perf[2] = flops;
496  perf[3] = flat_graph_length ;
497  return perf;
498 }
499 
500 //----------------------------------------------------------------------------
501 //----------------------------------------------------------------------------
502 // Flatten to a plain CRS matrix
503 //
504 // Outer DOF == stochastic
505 // Inner DOF == fem
506 
507 template< typename ScalarType , class Device >
508 std::vector<double>
510  const std::vector<int> & var_degree ,
511  const int nGrid ,
512  const int iterCount ,
513  const bool symmetric )
514 {
515  typedef ScalarType value_type ;
516  typedef Kokkos::View< value_type* , Device > vector_type ;
517 
518  //------------------------------
519 
520  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
521  typedef typename matrix_type::values_type matrix_values_type;
522  typedef typename matrix_type::graph_type matrix_graph_type;
523 
524  typedef ScalarType value_type ;
525  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
528  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
530 
531  using Teuchos::rcp;
532  using Teuchos::RCP;
533  using Teuchos::Array;
534 
535  // Create Stochastic Galerkin basis and expansion
536  const size_t num_KL = var_degree.size();
537  Array< RCP<const abstract_basis_type> > bases(num_KL);
538  for (size_t i=0; i<num_KL; i++) {
539  if (symmetric)
540  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
541  else
542  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
543  }
544  RCP<const product_basis_type> basis =
545  rcp(new product_basis_type(
547  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
548 
549  //------------------------------
550  // Generate FEM graph:
551 
552  std::vector< std::vector<size_t> > fem_graph ;
553 
554  const size_t fem_length = nGrid * nGrid * nGrid ;
555 
556  unit_test::generate_fem_graph( nGrid , fem_graph );
557 
558  //------------------------------
559  // Stochastic graph
560 
561  const size_t stoch_length = basis->size();
562  std::vector< std::vector< int > > stoch_graph( stoch_length );
563 #if defined(HAVE_MPI) && 0
564  Epetra_MpiComm comm(MPI_COMM_WORLD);
565 #else
566  Epetra_SerialComm comm;
567 #endif
568  Teuchos::RCP<Epetra_CrsGraph> cijk_graph = Stokhos::sparse3Tensor2CrsGraph(
569  *basis, *Cijk, comm);
570  for ( size_t i = 0 ; i < stoch_length ; ++i ) {
571  int len = cijk_graph->NumGlobalIndices(i);
572  stoch_graph[i].resize(len);
573  int len2;
574  cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
575  }
576 
577  //------------------------------
578  // Generate flattened graph with stochastic outer and FEM inner
579 
580  const size_t flat_length = fem_length * stoch_length ;
581 
582  std::vector< std::vector<size_t> > flat_graph( flat_length );
583 
584  for ( size_t iOuterRow = 0 ; iOuterRow < stoch_length ; ++iOuterRow ) {
585 
586  const size_t iOuterRowNZ = stoch_graph[iOuterRow].size();
587 
588  for ( size_t iInnerRow = 0 ; iInnerRow < fem_length ; ++iInnerRow ) {
589 
590  const size_t iInnerRowNZ = fem_graph[iInnerRow].size();
591  const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
592  const size_t iFlatRow = iInnerRow + iOuterRow * fem_length ;
593 
594  flat_graph[iFlatRow].resize( iFlatRowNZ );
595 
596  size_t iFlatEntry = 0 ;
597 
598  for ( size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
599 
600  const size_t iOuterCol = stoch_graph[ iOuterRow ][ iOuterEntry ];
601 
602  for ( size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
603 
604  const size_t iInnerCol = fem_graph[ iInnerRow][iInnerEntry];
605  const size_t iFlatColumn = iInnerCol + iOuterCol * fem_length ;
606 
607  flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
608  ++iFlatEntry ;
609  }
610  }
611  }
612  }
613 
614  //------------------------------
615 
616  vector_type x = vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), flat_length );
617  vector_type y = vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), flat_length );
618 
619  Kokkos::deep_copy( x , ScalarType(1.0) );
620 
621  //------------------------------
622 
623  matrix_type matrix ;
624 
625  matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , flat_graph );
626 
627  const size_t flat_graph_length = matrix.graph.entries.dimension_0();
628 
629  matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), flat_graph_length );
630 
631  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
632 
633  //Kokkos::write_matrix_market(matrix, "flat_original.mm");
634 
635  //------------------------------
636 
637  Device::fence();
638  Kokkos::Impl::Timer clock ;
639  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
640  Stokhos::multiply( matrix , x , y );
641  }
642  Device::fence();
643 
644  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
645  const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
646 
647  std::vector<double> perf(4);
648  perf[0] = flat_length ;
649  perf[1] = seconds_per_iter;
650  perf[2] = flops;
651  perf[3] = flat_graph_length ;
652  return perf;
653 }
654 
655 template< typename ScalarType , class Device >
656 std::vector<double>
658  const std::vector<int> & var_degree ,
659  const int nGrid ,
660  const int iterCount ,
661  const bool symmetric )
662 {
663  typedef ScalarType value_type ;
664  typedef Kokkos::View< value_type** ,
665  Kokkos::LayoutLeft ,
666  Device > block_vector_type ;
667 
670 
672  typedef typename matrix_type::graph_type graph_type ;
673 
674  typedef ScalarType value_type ;
675  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
678  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
680 
681  using Teuchos::rcp;
682  using Teuchos::RCP;
683  using Teuchos::Array;
684 
685  // Create Stochastic Galerkin basis and expansion
686  const size_t num_KL = var_degree.size();
687  Array< RCP<const abstract_basis_type> > bases(num_KL);
688  for (size_t i=0; i<num_KL; i++) {
689  if (symmetric)
690  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
691  else
692  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
693  }
694  RCP<const product_basis_type> basis =
695  rcp(new product_basis_type(
697  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
698 
699  //------------------------------
700  // Generate graph for "FEM" box structure:
701 
702  std::vector< std::vector<size_t> > graph ;
703 
704  const size_t outer_length = nGrid * nGrid * nGrid ;
705  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
706 
707  //------------------------------
708  // Generate CRS block-tensor matrix:
709 
710  matrix_type matrix ;
711 
712  Teuchos::ParameterList params;
713  params.set("Tile Size", 128);
714  params.set("Max Tiles", 10000);
715  matrix.block =
716  Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
717  params);
718  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
719 
720  const size_t inner_length = matrix.block.dimension();
721  const size_t inner_length_aligned = matrix.block.aligned_dimension();
722 
723  matrix.values =
724  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
725 
726  block_vector_type x =
727  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
728  block_vector_type y =
729  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
730 
731  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
732 
733  //------------------------------
734  // Generate input multivector:
735 
736  Kokkos::deep_copy( x , ScalarType(1.0) );
737 
738  //------------------------------
739 
740  Device::fence();
741  Kokkos::Impl::Timer clock ;
742  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
743  Stokhos::multiply( matrix , x , y );
744  }
745  Device::fence();
746 
747  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
748  const double flops_per_block = matrix.block.tensor().num_flops();
749  const double flops = 1.0e-9*graph_length*flops_per_block;
750 
751  // std::cout << "tensor: flops = " << flops
752  // << " time = " << seconds_per_iter << std::endl;
753 
754  std::vector<double> perf(6) ;
755 
756  perf[0] = outer_length * inner_length ;
757  perf[1] = seconds_per_iter ;
758  perf[2] = flops / seconds_per_iter;
759  perf[3] = matrix.block.tensor().entry_count();
760  perf[4] = inner_length ;
761  perf[5] = flops_per_block;
762 
763  return perf ;
764 }
765 
766 template< typename ScalarType , class Device >
767 std::vector<double>
769  const std::vector<int> & var_degree ,
770  const int nGrid ,
771  const int iterCount ,
772  const bool symmetric )
773 {
774  typedef ScalarType value_type ;
775  typedef Kokkos::View< value_type** ,
776  Kokkos::LayoutLeft ,
777  Device > block_vector_type ;
778 
781 
783  typedef typename matrix_type::graph_type graph_type ;
784 
785  typedef ScalarType value_type ;
786  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
789  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
791 
792  using Teuchos::rcp;
793  using Teuchos::RCP;
794  using Teuchos::Array;
795 
796  // Create Stochastic Galerkin basis and expansion
797  const size_t num_KL = var_degree.size();
798  Array< RCP<const abstract_basis_type> > bases(num_KL);
799  for (size_t i=0; i<num_KL; i++) {
800  if (symmetric)
801  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
802  else
803  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
804  }
805  RCP<const product_basis_type> basis =
806  rcp(new product_basis_type(
808  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
809 
810  //------------------------------
811  // Generate graph for "FEM" box structure:
812 
813  std::vector< std::vector<size_t> > graph ;
814 
815  const size_t outer_length = nGrid * nGrid * nGrid ;
816  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
817 
818  //------------------------------
819  // Generate CRS block-tensor matrix:
820 
821  matrix_type matrix ;
822 
823  Teuchos::ParameterList params;
824  params.set("Tile Size", 128);
825  matrix.block =
826  Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
827  params);
828  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
829 
830  const size_t inner_length = matrix.block.dimension();
831  const size_t inner_length_aligned = matrix.block.aligned_dimension();
832 
833  matrix.values =
834  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
835 
836  block_vector_type x =
837  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
838  block_vector_type y =
839  block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
840 
841  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
842 
843  //------------------------------
844  // Generate input multivector:
845 
846  Kokkos::deep_copy( x , ScalarType(1.0) );
847 
848  //------------------------------
849 
850  Device::fence();
851  Kokkos::Impl::Timer clock ;
852  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
853  Stokhos::multiply( matrix , x , y );
854  }
855  Device::fence();
856 
857  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
858  const double flops_per_block = matrix.block.tensor().num_flops();
859  const double flops = 1.0e-9*graph_length*flops_per_block;
860 
861  // std::cout << "tensor: flops = " << flops
862  // << " time = " << seconds_per_iter << std::endl;
863 
864  std::vector<double> perf(6) ;
865 
866  perf[0] = outer_length * inner_length ;
867  perf[1] = seconds_per_iter ;
868  perf[2] = flops / seconds_per_iter;
869  perf[3] = matrix.block.tensor().entry_count();
870  perf[4] = inner_length ;
871  perf[5] = flops_per_block;
872 
873  return perf ;
874 }
875 
876 template< typename ScalarType , class Device >
877 std::vector<double>
879  const std::vector<int> & var_degree ,
880  const int nGrid ,
881  const int iterCount ,
882  const bool symmetric )
883 {
884  typedef ScalarType value_type ;
885  typedef Kokkos::View< value_type** ,
886  Kokkos::LayoutLeft ,
887  Device > block_vector_type ;
888 
891 
893  typedef typename matrix_type::graph_type graph_type ;
894 
895  typedef ScalarType value_type ;
896  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
899  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
901 
902  using Teuchos::rcp;
903  using Teuchos::RCP;
904  using Teuchos::Array;
905 
906  // Create Stochastic Galerkin basis and expansion
907  const size_t num_KL = var_degree.size();
908  Array< RCP<const abstract_basis_type> > bases(num_KL);
909  for (size_t i=0; i<num_KL; i++) {
910  if (symmetric)
911  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
912  else
913  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
914  }
915  RCP<const product_basis_type> basis =
916  rcp(new product_basis_type(
918  RCP<Cijk_type> Cijk =
920 
921  //------------------------------
922  // Generate graph for "FEM" box structure:
923 
924  std::vector< std::vector<size_t> > graph ;
925 
926  const size_t outer_length = nGrid * nGrid * nGrid ;
927  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
928 
929  //------------------------------
930  // Generate CRS block-tensor matrix:
931 
932  matrix_type matrix ;
933 
934  matrix.block =
935  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
936  *Cijk );
937  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
938 
939  const size_t inner_length = matrix.block.dimension();
940 
941  matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length , graph_length );
942 
943  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length , outer_length );
944  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length , outer_length );
945 
946  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
947 
948  //------------------------------
949  // Generate input multivector:
950 
951  Kokkos::deep_copy( x , ScalarType(1.0) );
952 
953  //------------------------------
954 
955  Device::fence();
956  Kokkos::Impl::Timer clock ;
957  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
958  Stokhos::multiply( matrix , x , y );
959  }
960  Device::fence();
961 
962  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
963  const double flops_per_block = matrix.block.tensor().num_flops();
964  const double flops = 1.0e-9*graph_length*flops_per_block;
965 
966  // std::cout << "tensor: flops = " << flops
967  // << " time = " << seconds_per_iter << std::endl;
968 
969  std::vector<double> perf(6) ;
970 
971  perf[0] = outer_length * inner_length ;
972  perf[1] = seconds_per_iter ;
973  perf[2] = flops / seconds_per_iter;
974  perf[3] = matrix.block.tensor().num_non_zeros();
975  perf[4] = inner_length ;
976  perf[5] = flops_per_block;
977 
978  return perf ;
979 }
980 
981 template< typename ScalarType , class Device >
982 std::vector<double>
984  const std::vector<int> & var_degree ,
985  const int nGrid ,
986  const int iterCount ,
987  const bool symmetric )
988 {
989  typedef ScalarType value_type ;
990  typedef Kokkos::View< value_type** ,
991  Kokkos::LayoutLeft ,
992  Device > block_vector_type ;
993 
996 
998  typedef typename matrix_type::graph_type graph_type ;
999 
1000  typedef ScalarType value_type ;
1001  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1004  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1006 
1007  using Teuchos::rcp;
1008  using Teuchos::RCP;
1009  using Teuchos::Array;
1010 
1011  // Create Stochastic Galerkin basis and expansion
1012  const size_t num_KL = var_degree.size();
1013  Array< RCP<const abstract_basis_type> > bases(num_KL);
1014  for (size_t i=0; i<num_KL; i++) {
1015  if (symmetric)
1016  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1017  else
1018  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1019  }
1020  RCP<const product_basis_type> basis =
1021  rcp(new product_basis_type(
1023  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1024 
1025  //------------------------------
1026  // Generate graph for "FEM" box structure:
1027 
1028  std::vector< std::vector<size_t> > graph ;
1029 
1030  const size_t outer_length = nGrid * nGrid * nGrid ;
1031  const size_t graph_length = unit_test::generate_fem_graph( nGrid , graph );
1032 
1033  //------------------------------
1034  // Generate CRS block-tensor matrix:
1035 
1036  matrix_type matrix ;
1037 
1038  Teuchos::ParameterList params;
1039  params.set("Symmetric", symmetric);
1040  matrix.block =
1041  Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
1042  *Cijk,
1043  params );
1044  matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string("test crs graph") , graph );
1045 
1046  const size_t inner_length = matrix.block.tensor().dimension();
1047  const size_t inner_length_aligned = matrix.block.tensor().aligned_dimension();
1048 
1049  matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), inner_length_aligned , graph_length );
1050 
1051  block_vector_type x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length_aligned , outer_length );
1052  block_vector_type y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length_aligned , outer_length );
1053 
1054  Kokkos::deep_copy( matrix.values , ScalarType(1.0) );
1055 
1056  //------------------------------
1057  // Generate input multivector:
1058 
1059  Kokkos::deep_copy( x , ScalarType(1.0) );
1060 
1061  //------------------------------
1062 
1063  Device::fence();
1064  Kokkos::Impl::Timer clock ;
1065  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1066  Stokhos::multiply( matrix , x , y );
1067  }
1068  Device::fence();
1069 
1070  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1071  const double flops_per_block = matrix.block.tensor().num_flops();
1072  const double flops = 1.0e-9*graph_length*flops_per_block;
1073 
1074  // std::cout << "tensor: flops = " << flops
1075  // << " time = " << seconds_per_iter << std::endl;
1076 
1077  std::vector<double> perf(6) ;
1078 
1079  perf[0] = outer_length * inner_length ;
1080  perf[1] = seconds_per_iter ;
1081  perf[2] = flops / seconds_per_iter;
1082  perf[3] = matrix.block.tensor().num_non_zeros();
1083  perf[4] = inner_length ;
1084  perf[5] = flops_per_block;
1085 
1086  return perf ;
1087 }
1088 
1089 template< typename ScalarType , class Device , class SparseMatOps >
1090 std::vector<double>
1092  const std::vector<int> & var_degree ,
1093  const int nGrid ,
1094  const int iterCount ,
1095  const bool test_block ,
1096  const bool symmetric )
1097 {
1098  typedef ScalarType value_type ;
1099  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1102  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1104 
1105  using Teuchos::rcp;
1106  using Teuchos::RCP;
1107  using Teuchos::Array;
1108 
1109  // Create Stochastic Galerkin basis and expansion
1110  const size_t num_KL = var_degree.size();
1111  Array< RCP<const abstract_basis_type> > bases(num_KL);
1112  for (size_t i=0; i<num_KL; i++) {
1113  if (symmetric)
1114  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1115  else
1116  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1117  }
1118  RCP<const product_basis_type> basis =
1119  rcp(new product_basis_type(
1121  const size_t outer_length = basis->size();
1122  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1123 
1124  //------------------------------
1125 
1126  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1127  typedef typename matrix_type::values_type matrix_values_type;
1128  typedef typename matrix_type::graph_type matrix_graph_type;
1129 
1130  //------------------------------
1131  // Generate FEM graph:
1132 
1133  std::vector< std::vector<size_t> > fem_graph ;
1134 
1135  const size_t inner_length = nGrid * nGrid * nGrid ;
1136  const size_t graph_length =
1137  unit_test::generate_fem_graph( nGrid , fem_graph );
1138 
1139  //------------------------------
1140 
1141  typedef Kokkos::View<value_type*,Device> vec_type ;
1142 
1143  std::vector<matrix_type> matrix( outer_length ) ;
1144  std::vector<vec_type> x( outer_length ) ;
1145  std::vector<vec_type> y( outer_length ) ;
1146  std::vector<vec_type> tmp( outer_length ) ;
1147 
1148  for (size_t block=0; block<outer_length; ++block) {
1149  matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string("testing") , fem_graph );
1150 
1151  matrix[block].values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1152 
1153  x[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("x"), inner_length );
1154  y[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("y"), inner_length );
1155  tmp[block] = vec_type( Kokkos::ViewAllocateWithoutInitializing("tmp"), inner_length );
1156 
1157  Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1158 
1159  Kokkos::deep_copy( x[block] , ScalarType(1.0) );
1160  Kokkos::deep_copy( y[block] , ScalarType(1.0) );
1161  }
1162 
1163  Device::fence();
1164  SparseMatOps smo;
1165  Kokkos::Impl::Timer clock ;
1166  int n_apply = 0;
1167  int n_add = 0;
1168  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1169 
1170  // Original matrix-free multiply algorithm using a block apply
1171  n_apply = 0;
1172  n_add = 0;
1173  typename Cijk_type::k_iterator k_begin = Cijk->k_begin();
1174  typename Cijk_type::k_iterator k_end = Cijk->k_end();
1175  for (typename Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1176  int nj = Cijk->num_j(k_it);
1177  if (nj > 0) {
1178  int k = index(k_it);
1179  typename Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
1180  typename Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
1181  std::vector<vec_type> xx(nj), yy(nj);
1182  int jdx = 0;
1183  for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1184  ++j_it) {
1185  int j = index(j_it);
1186  xx[jdx] = x[j];
1187  yy[jdx] = tmp[j];
1188  jdx++;
1189  }
1190  Stokhos::multiply( matrix[k] , xx , yy, smo );
1191  n_apply += nj;
1192  jdx = 0;
1193  for (typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1194  ++j_it) {
1195  typename Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
1196  typename Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
1197  for (typename Cijk_type::kji_iterator i_it = i_begin; i_it != i_end;
1198  ++i_it) {
1199  int i = index(i_it);
1200  value_type c = value(i_it);
1201  Stokhos::update( value_type(1.0) , y[i] , c , yy[jdx] );
1202  ++n_add;
1203  }
1204  jdx++;
1205  }
1206  }
1207  }
1208 
1209  }
1210  Device::fence();
1211 
1212  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1213  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1214  static_cast<double>(n_add)*inner_length);
1215 
1216  // std::cout << "mat-free: flops = " << flops
1217  // << " time = " << seconds_per_iter << std::endl;
1218 
1219  std::vector<double> perf(4);
1220  perf[0] = outer_length * inner_length;
1221  perf[1] = seconds_per_iter ;
1222  perf[2] = flops/seconds_per_iter;
1223  perf[3] = flops;
1224 
1225  return perf;
1226 }
1227 
1228 template< typename ScalarType , class Device , class SparseMatOps >
1229 std::vector<double>
1231  const std::vector<int> & var_degree ,
1232  const int nGrid ,
1233  const int iterCount ,
1234  const bool test_block ,
1235  const bool symmetric )
1236 {
1237  typedef ScalarType value_type ;
1238  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1241  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1243 
1244  using Teuchos::rcp;
1245  using Teuchos::RCP;
1246  using Teuchos::Array;
1247 
1248  // Create Stochastic Galerkin basis and expansion
1249  const size_t num_KL = var_degree.size();
1250  Array< RCP<const abstract_basis_type> > bases(num_KL);
1251  for (size_t i=0; i<num_KL; i++) {
1252  if (symmetric)
1253  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1254  else
1255  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1256  }
1257  RCP<const product_basis_type> basis =
1258  rcp(new product_basis_type(
1260  const size_t outer_length = basis->size();
1261  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1262 
1263  //------------------------------
1264 
1265  typedef Stokhos::CrsMatrix<value_type,Device> matrix_type;
1266  typedef typename matrix_type::values_type matrix_values_type;
1267  typedef typename matrix_type::graph_type matrix_graph_type;
1268 
1269  //------------------------------
1270  // Generate FEM graph:
1271 
1272  std::vector< std::vector<size_t> > fem_graph ;
1273 
1274  const size_t inner_length = nGrid * nGrid * nGrid ;
1275  const size_t graph_length =
1276  unit_test::generate_fem_graph( nGrid , fem_graph );
1277 
1278  //------------------------------
1279 
1280  typedef Kokkos::View<value_type*, Kokkos::LayoutLeft, Device, Kokkos::MemoryUnmanaged> vec_type ;
1281  typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type ;
1282 
1283  std::vector<matrix_type> matrix( outer_length ) ;
1284  multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1285  inner_length, outer_length ) ;
1286  multi_vec_type y("y", inner_length, outer_length ) ;
1287  multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1288  multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1289 
1290  Kokkos::deep_copy( x , ScalarType(1.0) );
1291 
1292  for (size_t block=0; block<outer_length; ++block) {
1293  matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
1294  std::string("testing") , fem_graph );
1295 
1296  matrix[block].values = matrix_values_type( "matrix" , graph_length );
1297 
1298  Kokkos::deep_copy( matrix[block].values , ScalarType(1.0) );
1299  }
1300 
1301  Device::fence();
1302  SparseMatOps smo;
1303  Kokkos::Impl::Timer clock ;
1304  int n_apply = 0;
1305  int n_add = 0;
1306  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1307 
1308  // Original matrix-free multiply algorithm using a block apply
1309  typedef typename Cijk_type::k_iterator k_iterator;
1310  typedef typename Cijk_type::kj_iterator kj_iterator;
1311  typedef typename Cijk_type::kji_iterator kji_iterator;
1312  n_apply = 0;
1313  n_add = 0;
1314  k_iterator k_begin = Cijk->k_begin();
1315  k_iterator k_end = Cijk->k_end();
1316  for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1317  unsigned nj = Cijk->num_j(k_it);
1318  if (nj > 0) {
1319  int k = index(k_it);
1320  kj_iterator j_begin = Cijk->j_begin(k_it);
1321  kj_iterator j_end = Cijk->j_end(k_it);
1322  std::vector<int> j_indices(nj);
1323  unsigned jdx = 0;
1324  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1325  int j = index(j_it);
1326  vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1327  vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1328  Kokkos::deep_copy(tt, xx);
1329  }
1330  multi_vec_type tmp_x_view =
1331  Kokkos::subview( tmp_x, Kokkos::ALL(),
1332  std::make_pair(0u,nj));
1333  multi_vec_type tmp_y_view =
1334  Kokkos::subview( tmp_y, Kokkos::ALL(),
1335  std::make_pair(0u,nj));
1336  Stokhos::multiply( matrix[k] , tmp_x_view , tmp_y_view, smo );
1337  n_apply += nj;
1338  jdx = 0;
1339  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1340  vec_type tmp_y_view =
1341  Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1342  kji_iterator i_begin = Cijk->i_begin(j_it);
1343  kji_iterator i_end = Cijk->i_end(j_it);
1344  for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1345  int i = index(i_it);
1346  value_type c = value(i_it);
1347  vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1348  Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1349  ++n_add;
1350  }
1351  }
1352  }
1353  }
1354 
1355  }
1356  Device::fence();
1357 
1358  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1359  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1360  static_cast<double>(n_add)*inner_length);
1361 
1362  // std::cout << "mat-free: flops = " << flops
1363  // << " time = " << seconds_per_iter << std::endl;
1364 
1365  std::vector<double> perf(4);
1366  perf[0] = outer_length * inner_length;
1367  perf[1] = seconds_per_iter ;
1368  perf[2] = flops/seconds_per_iter;
1369  perf[3] = flops;
1370 
1371  return perf;
1372 }
1373 
1374 #ifdef HAVE_STOKHOS_KOKKOSLINALG
1375 template< typename ScalarType , class Device >
1376 std::vector<double>
1377 test_original_matrix_free_kokkos(
1378  const std::vector<int> & var_degree ,
1379  const int nGrid ,
1380  const int iterCount ,
1381  const bool test_block ,
1382  const bool symmetric )
1383 {
1384  typedef ScalarType value_type ;
1385  typedef Stokhos::OneDOrthogPolyBasis<int,value_type> abstract_basis_type;
1388  typedef Stokhos::TotalOrderBasis<int,value_type,order_type> product_basis_type;
1390 
1391  using Teuchos::rcp;
1392  using Teuchos::RCP;
1393  using Teuchos::Array;
1394 
1395  // Create Stochastic Galerkin basis and expansion
1396  const size_t num_KL = var_degree.size();
1397  Array< RCP<const abstract_basis_type> > bases(num_KL);
1398  for (size_t i=0; i<num_KL; i++) {
1399  if (symmetric)
1400  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,1.0,true));
1401  else
1402  bases[i] = Teuchos::rcp(new basis_type(var_degree[i],1.0,2.0,true));
1403  }
1404  RCP<const product_basis_type> basis =
1405  rcp(new product_basis_type(
1406  bases, ScalarTolerances<value_type>::sparse_cijk_tol()));
1407  const size_t outer_length = basis->size();
1408  RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1409 
1410  //------------------------------
1411 
1412  typedef int ordinal_type;
1413  typedef KokkosSparse::CrsMatrix<value_type,ordinal_type,Device> matrix_type;
1414  typedef typename matrix_type::values_type matrix_values_type;
1415  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
1416 
1417  //------------------------------
1418  // Generate FEM graph:
1419 
1420  std::vector< std::vector<size_t> > fem_graph ;
1421 
1422  const size_t inner_length = nGrid * nGrid * nGrid ;
1423  const size_t graph_length =
1424  unit_test::generate_fem_graph( nGrid , fem_graph );
1425 
1426  //------------------------------
1427 
1428  typedef Kokkos::View<value_type*,Kokkos::LayoutLeft,Device, Kokkos::MemoryUnmanaged> vec_type ;
1429  typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type;
1430 
1431  std::vector<matrix_type> matrix( outer_length ) ;
1432  multi_vec_type x( Kokkos::ViewAllocateWithoutInitializing("x"),
1433  inner_length, outer_length ) ;
1434  multi_vec_type y( "y", inner_length, outer_length ) ;
1435  multi_vec_type tmp_x( "tmp_x", inner_length, outer_length ) ;
1436  multi_vec_type tmp_y( "tmp_y", inner_length, outer_length ) ;
1437 
1438  Kokkos::deep_copy( x , ScalarType(1.0) );
1439 
1440  for (size_t block=0; block<outer_length; ++block) {
1441  matrix_graph_type matrix_graph =
1442  Kokkos::create_staticcrsgraph<matrix_graph_type>(
1443  std::string("test crs graph") , fem_graph );
1444 
1445  matrix_values_type matrix_values = matrix_values_type(
1446  Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length );
1447  Kokkos::deep_copy(matrix_values , ScalarType(1.0) );
1448  matrix[block] = matrix_type("matrix", outer_length, matrix_values,
1449  matrix_graph);
1450  }
1451 
1452  Device::fence();
1453  Kokkos::Impl::Timer clock ;
1454  int n_apply = 0;
1455  int n_add = 0;
1456  for ( int iter = 0 ; iter < iterCount ; ++iter ) {
1457 
1458  // Original matrix-free multiply algorithm using a block apply
1459  typedef typename Cijk_type::k_iterator k_iterator;
1460  typedef typename Cijk_type::kj_iterator kj_iterator;
1461  typedef typename Cijk_type::kji_iterator kji_iterator;
1462  n_apply = 0;
1463  n_add = 0;
1464  k_iterator k_begin = Cijk->k_begin();
1465  k_iterator k_end = Cijk->k_end();
1466  for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1467  unsigned nj = Cijk->num_j(k_it);
1468  if (nj > 0) {
1469  int k = index(k_it);
1470  kj_iterator j_begin = Cijk->j_begin(k_it);
1471  kj_iterator j_end = Cijk->j_end(k_it);
1472  unsigned jdx = 0;
1473  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1474  int j = index(j_it);
1475  vec_type xx = Kokkos::subview( x, Kokkos::ALL(), j );
1476  vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1477  Kokkos::deep_copy(tt, xx);
1478  }
1479  multi_vec_type tmp_x_view =
1480  Kokkos::subview( tmp_x, Kokkos::ALL(),
1481  std::make_pair(0u,nj));
1482  multi_vec_type tmp_y_view =
1483  Kokkos::subview( tmp_y, Kokkos::ALL(),
1484  std::make_pair(0u,nj));
1485  KokkosSparse::spmv( "N" , value_type(1.0) , matrix[k] , tmp_x_view , value_type(0.0) , tmp_y_view );
1486  n_apply += nj;
1487  jdx = 0;
1488  for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1489  vec_type tmp_y_view =
1490  Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1491  kji_iterator i_begin = Cijk->i_begin(j_it);
1492  kji_iterator i_end = Cijk->i_end(j_it);
1493  for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1494  int i = index(i_it);
1495  value_type c = value(i_it);
1496  vec_type y_view = Kokkos::subview( y, Kokkos::ALL(), i );
1497  //Stokhos::update( value_type(1.0) , y_view , c , tmp_y_view );
1498  KokkosBlas::update(value_type(1.0) , y_view, c, tmp_y_view, value_type(0.0), y_view);
1499  ++n_add;
1500  }
1501  }
1502  }
1503  }
1504 
1505  }
1506  Device::fence();
1507 
1508  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
1509  const double flops = 1.0e-9*(2.0*static_cast<double>(n_apply)*graph_length+
1510  static_cast<double>(n_add)*inner_length);
1511 
1512  // std::cout << "mat-free: flops = " << flops
1513  // << " time = " << seconds_per_iter << std::endl;
1514 
1515  std::vector<double> perf(4);
1516  perf[0] = outer_length * inner_length;
1517  perf[1] = seconds_per_iter ;
1518  perf[2] = flops/seconds_per_iter;
1519  perf[3] = flops;
1520 
1521  return perf;
1522 }
1523 #endif
1524 
1525 template< class Scalar, class Device >
1526 void performance_test_driver_all( const int pdeg ,
1527  const int minvar ,
1528  const int maxvar ,
1529  const int nGrid ,
1530  const int nIter ,
1531  const bool test_block ,
1532  const bool symmetric )
1533 {
1534  //------------------------------
1535  // Generate FEM graph:
1536 
1537  std::vector< std::vector<size_t> > fem_graph ;
1538 
1539  const size_t fem_nonzeros =
1540  unit_test::generate_fem_graph( nGrid , fem_graph );
1541 
1542  //------------------------------
1543 
1544  std::cout.precision(8);
1545 
1546  //------------------------------
1547 
1548  std::cout << std::endl << "\"FEM NNZ = " << fem_nonzeros << "\"" << std::endl;
1549 
1550  std::cout << std::endl
1551  << "\"#nGrid\" , "
1552  << "\"#Variable\" , "
1553  << "\"PolyDegree\" , "
1554  << "\"#Bases\" , "
1555  << "\"#TensorEntry\" , "
1556  << "\"VectorSize\" , "
1557  << "\"Original-Flat MXV-Time\" , "
1558  << "\"Original-Flat MXV-Speedup\" , "
1559  << "\"Original-Flat MXV-GFLOPS\" , "
1560  << "\"Commuted-Flat MXV-Speedup\" , "
1561  << "\"Commuted-Flat MXV-GFLOPS\" , "
1562  << "\"Block-Diagonal MXV-Speedup\" , "
1563  << "\"Block-Diagonal MXV-GFLOPS\" , "
1564  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1565  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1566  << std::endl ;
1567 
1568  for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1569 
1570  std::vector<int> var_degree( nvar , pdeg );
1571 
1572  //------------------------------
1573 
1574  const std::vector<double> perf_flat_original =
1575  test_product_flat_original_matrix<Scalar,Device>(
1576  var_degree , nGrid , nIter , symmetric );
1577 
1578  const std::vector<double> perf_flat_commuted =
1579  test_product_flat_commuted_matrix<Scalar,Device>(
1580  var_degree , nGrid , nIter , symmetric );
1581 
1582  const std::vector<double> perf_matrix =
1583  test_product_tensor_diagonal_matrix<Scalar,Device>(
1584  var_degree , nGrid , nIter , symmetric );
1585 
1586  const std::vector<double> perf_crs_tensor =
1587  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1588  var_degree , nGrid , nIter , symmetric );
1589 
1590  if ( perf_flat_commuted[0] != perf_flat_original[0] ||
1591  perf_flat_commuted[3] != perf_flat_original[3] ) {
1592  std::cout << "ERROR: Original and commuted matrix sizes do not match"
1593  << std::endl
1594  << " original size = " << perf_flat_original[0]
1595  << " , nonzero = " << perf_flat_original[3]
1596  << std::endl
1597  << " commuted size = " << perf_flat_commuted[0]
1598  << " , nonzero = " << perf_flat_commuted[3]
1599  << std::endl ;
1600  }
1601 
1602  std::cout << nGrid << " , "
1603  << nvar << " , "
1604  << pdeg << " , "
1605  << perf_crs_tensor[4] << " , "
1606  << perf_crs_tensor[3] << " , "
1607  << perf_flat_original[0] << " , "
1608  << perf_flat_original[1] << " , "
1609  << perf_flat_original[1] / perf_flat_original[1] << " , "
1610  << perf_flat_original[2] << " , "
1611  << perf_flat_original[1] / perf_flat_commuted[1] << " , "
1612  << perf_flat_commuted[2] << " , "
1613  << perf_flat_original[1] / perf_matrix[1] << " , "
1614  << perf_matrix[2] << " , "
1615  << perf_flat_original[1] / perf_crs_tensor[1] << " , "
1616  << perf_crs_tensor[2] << " , "
1617  << std::endl ;
1618  }
1619 
1620  //------------------------------
1621 }
1622 
1623 template< class Scalar, class Device , class SparseMatOps >
1624 void performance_test_driver_poly( const int pdeg ,
1625  const int minvar ,
1626  const int maxvar ,
1627  const int nGrid ,
1628  const int nIter ,
1629  const bool test_block ,
1630  const bool symmetric )
1631 {
1632  std::cout.precision(8);
1633 
1634  //------------------------------
1635 
1636  std::vector< std::vector<size_t> > fem_graph ;
1637  const size_t graph_length =
1638  unit_test::generate_fem_graph( nGrid , fem_graph );
1639  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1640 
1641  std::cout << std::endl
1642  << "\"#nGrid\" , "
1643  << "\"#Variable\" , "
1644  << "\"PolyDegree\" , "
1645  << "\"#Bases\" , "
1646  << "\"#TensorEntry\" , "
1647  << "\"VectorSize\" , "
1648  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1649  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1650  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1651  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1652  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1653  // << "\"Block-Coo-Tensor MXV-Speedup\" , "
1654  // << "\"Block-Coo-Tensor MXV-GFLOPS\" , "
1655  // << "\"Block-Tiled Crs-Tensor MXV-Speedup\" , "
1656  // << "\"Block-Tiled Crs-3-Tensor MXV-GFLOPS\" , "
1657  << std::endl ;
1658 
1659  for ( int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1660  std::vector<int> var_degree( nvar , pdeg );
1661 
1662  const std::vector<double> perf_crs_tensor =
1663  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1664  var_degree , nGrid , nIter , symmetric );
1665 
1666  // const bool Pack = Kokkos::Impl::is_same<Device,Kokkos::Cuda>::value;
1667  // const std::vector<double> perf_coo_tensor =
1668  // test_product_tensor_matrix<Scalar,Stokhos::CooProductTensor<Scalar,Device,Pack>,Device>(
1669  // var_degree , nGrid , nIter , symmetric );
1670 
1671  // const std::vector<double> perf_tiled_crs_tensor =
1672  // test_simple_tiled_product_tensor_matrix<Scalar,Device>(
1673  // var_degree , nGrid , nIter , symmetric );
1674 
1675  std::vector<double> perf_original_mat_free_block;
1676 #if defined(HAVE_STOKHOS_KOKKOSLINALG)
1677 #if defined( KOKKOS_HAVE_CUDA )
1678  enum { is_cuda = Kokkos::Impl::is_same<Device,Kokkos::Cuda>::value };
1679 #else
1680  enum { is_cuda = false };
1681 #endif
1682  if ( is_cuda )
1683  perf_original_mat_free_block =
1684  test_original_matrix_free_kokkos<Scalar,Device>(
1685  var_degree , nGrid , nIter , test_block , symmetric );
1686  else
1687  perf_original_mat_free_block =
1688  test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1689  var_degree , nGrid , nIter , test_block , symmetric );
1690 #else
1691  perf_original_mat_free_block =
1692  test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1693  var_degree , nGrid , nIter , test_block , symmetric );
1694 #endif
1695 
1696  std::cout << nGrid << " , "
1697  << nvar << " , "
1698  << pdeg << " , "
1699  << perf_crs_tensor[4] << " , "
1700  << perf_crs_tensor[3] << " , "
1701  << perf_original_mat_free_block[0] << " , "
1702  << perf_original_mat_free_block[1] << " , "
1703  << perf_original_mat_free_block[1] /
1704  perf_original_mat_free_block[1] << " , "
1705  << perf_original_mat_free_block[2] << " , "
1706  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1707  << perf_crs_tensor[2] << " , "
1708  // << perf_original_mat_free_block[1] / perf_coo_tensor[1] << " , "
1709  // << perf_coo_tensor[2] << " , "
1710  // << perf_original_mat_free_block[1] / perf_tiled_crs_tensor[1] << " , "
1711  // << perf_tiled_crs_tensor[2]
1712  << std::endl ;
1713  }
1714 
1715  //------------------------------
1716 }
1717 
1718 template< class Scalar, class Device , class SparseMatOps >
1719 void performance_test_driver_poly_deg( const int nvar ,
1720  const int minp ,
1721  const int maxp ,
1722  const int nGrid ,
1723  const int nIter ,
1724  const bool test_block ,
1725  const bool symmetric )
1726 {
1727  bool do_flat_sparse =
1728  Kokkos::Impl::is_same<typename Device::memory_space,Kokkos::HostSpace>::value ;
1729 
1730  std::cout.precision(8);
1731 
1732  //------------------------------
1733 
1734  std::vector< std::vector<size_t> > fem_graph ;
1735  const size_t graph_length =
1736  unit_test::generate_fem_graph( nGrid , fem_graph );
1737  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1738 
1739  std::cout << std::endl
1740  << "\"#nGrid\" , "
1741  << "\"#Variable\" , "
1742  << "\"PolyDegree\" , "
1743  << "\"#Bases\" , "
1744  << "\"#TensorEntry\" , "
1745  << "\"VectorSize\" , "
1746  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1747  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1748  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1749  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1750  << "\"Block-Crs-Tensor MXV-GFLOPS\" , ";
1751  if (do_flat_sparse)
1752  std::cout << "\"Block-Lexicographic-Sparse-3-Tensor MXV-Speedup\" , "
1753  << "\"Block-Lexicographic-Sparse-3-Tensor MXV-GFLOPS\" , "
1754  << "\"Lexicographic FLOPS / Crs FLOPS\" , ";
1755  std::cout << std::endl ;
1756 
1757  for ( int p = minp ; p <= maxp ; ++p ) {
1758  std::vector<int> var_degree( nvar , p );
1759 
1760  const std::vector<double> perf_crs_tensor =
1761  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1762  var_degree , nGrid , nIter , symmetric );
1763 
1764  std::vector<double> perf_lexo_sparse_3_tensor;
1765  if (do_flat_sparse) {
1766  perf_lexo_sparse_3_tensor =
1767  test_lexo_block_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1768  }
1769 
1770  const std::vector<double> perf_original_mat_free_block =
1771  test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1772  var_degree , nGrid , nIter , test_block , symmetric );
1773 
1774  std::cout << nGrid << " , "
1775  << nvar << " , "
1776  << p << " , "
1777  << perf_crs_tensor[4] << " , "
1778  << perf_crs_tensor[3] << " , "
1779  << perf_original_mat_free_block[0] << " , "
1780  << perf_original_mat_free_block[1] << " , "
1781  << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1782  << perf_original_mat_free_block[2] << " , "
1783  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1784  << perf_crs_tensor[2] << " , ";
1785  if (do_flat_sparse) {
1786  std::cout << perf_original_mat_free_block[1] / perf_lexo_sparse_3_tensor[1] << " , "
1787  << perf_lexo_sparse_3_tensor[2] << " , "
1788  << perf_lexo_sparse_3_tensor[5] / perf_crs_tensor[5];
1789  }
1790 
1791 
1792  std::cout << std::endl ;
1793  }
1794 
1795  //------------------------------
1796 }
1797 
1798 template< class Scalar, class Device , class SparseMatOps >
1799 void performance_test_driver_linear( const int minvar ,
1800  const int maxvar ,
1801  const int varinc ,
1802  const int nGrid ,
1803  const int nIter ,
1804  const bool test_block ,
1805  const bool symmetric )
1806 {
1807  std::cout.precision(8);
1808 
1809  //------------------------------
1810 
1811  std::vector< std::vector<size_t> > fem_graph ;
1812  const size_t graph_length =
1813  unit_test::generate_fem_graph( nGrid , fem_graph );
1814  std::cout << std::endl << "\"FEM NNZ = " << graph_length << "\"" << std::endl;
1815 
1816  std::cout << std::endl
1817  << "\"#nGrid\" , "
1818  << "\"#Variable\" , "
1819  << "\"PolyDegree\" , "
1820  << "\"#Bases\" , "
1821  << "\"#TensorEntry\" , "
1822  << "\"VectorSize\" , "
1823  << "\"Original-Matrix-Free-Block-MXV-Time\" , "
1824  << "\"Original-Matrix-Free-Block-MXV-Speedup\" , "
1825  << "\"Original-Matrix-Free-Block-MXV-GFLOPS\" , "
1826  << "\"Block-Crs-Tensor MXV-Speedup\" , "
1827  << "\"Block-Crs-Tensor MXV-GFLOPS\" , "
1828  << "\"Linear-Sparse-3-Tensor MXV-Speedup\" , "
1829  << "\"Linear-Sparse-3-Tensor MXV-GFLOPS\" , "
1830  << std::endl ;
1831 
1832  for ( int nvar = minvar ; nvar <= maxvar ; nvar+=varinc ) {
1833  std::vector<int> var_degree( nvar , 1 );
1834 
1835  const std::vector<double> perf_crs_tensor =
1836  test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1837  var_degree , nGrid , nIter , symmetric );
1838 
1839  const std::vector<double> perf_linear_sparse_3_tensor =
1840  test_linear_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1841 
1842  const std::vector<double> perf_original_mat_free_block =
1843  test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1844  var_degree , nGrid , nIter , test_block , symmetric );
1845 
1846  std::cout << nGrid << " , "
1847  << nvar << " , "
1848  << 1 << " , "
1849  << perf_crs_tensor[4] << " , "
1850  << perf_crs_tensor[3] << " , "
1851  << perf_original_mat_free_block[0] << " , "
1852  << perf_original_mat_free_block[1] << " , "
1853  << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] << " , "
1854  << perf_original_mat_free_block[2] << " , "
1855  << perf_original_mat_free_block[1] / perf_crs_tensor[1] << " , "
1856  << perf_crs_tensor[2] << " , "
1857  << perf_original_mat_free_block[1] / perf_linear_sparse_3_tensor[1] << " , "
1858  << perf_linear_sparse_3_tensor[2] << " , "
1859  << std::endl ;
1860  }
1861 
1862  //------------------------------
1863 }
1864 
1865 template< class Scalar, class Device >
1867 
1868 //----------------------------------------------------------------------------
1869 
1870 }
std::vector< double > test_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void performance_test_driver_linear(const int minvar, const int maxvar, const int varinc, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Bases defined by combinatorial product of polynomial bases.
std::vector< double > test_product_tensor_diagonal_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
void performance_test_driver_poly_deg(const int nvar, const int minp, const int maxp, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Symmetric diagonal storage for a dense matrix.
std::vector< double > test_product_flat_original_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Teuchos::RCP< LTBSparse3Tensor< ordinal_type, value_type > > computeTripleProductTensorLTBBlockLeaf(const TotalOrderBasis< ordinal_type, value_type, LexographicLess< MultiIndex< ordinal_type > > > &product_basis, bool symmetric=false)
std::vector< double > test_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Stokhos::LegendreBasis< int, double > basis_type
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
std::vector< double > test_lexo_block_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
void performance_test_driver_all(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Jacobi polynomial basis.
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
std::vector< double > test_simple_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
expr expr expr expr j
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< YD, YP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< ZD, ZP... > >::value >::type update(const typename Kokkos::View< XD, XP... >::array_type::non_const_value_type &alpha, const Kokkos::View< XD, XP... > &x, const typename Kokkos::View< YD, YP... >::array_type::non_const_value_type &beta, const Kokkos::View< YD, YP... > &y, const typename Kokkos::View< ZD, ZP... >::array_type::non_const_value_type &gamma, const Kokkos::View< ZD, ZP... > &z)
Stokhos::Sparse3Tensor< int, double > Cijk_type
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Abstract base class for 1-D orthogonal polynomials.
Teuchos::RCP< Epetra_CrsGraph > sparse3Tensor2CrsGraph(const Stokhos::OrthogPolyBasis< ordinal_type, value_type > &basis, const Stokhos::Sparse3Tensor< ordinal_type, value_type > &Cijk, const Epetra_Comm &comm)
Build an Epetra_CrsGraph from a sparse 3 tensor.
CRS matrix of dense blocks.
A comparison functor implementing a strict weak ordering based lexographic ordering.
Sacado::MP::Vector< storage_type > vec_type
std::vector< double > test_linear_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_original_matrix_free_vec(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)
void performance_test_driver_poly(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
std::vector< double > test_product_flat_commuted_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
std::vector< double > test_original_matrix_free_view(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)