Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
TestMeanMultiply.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include <iostream>
43 
44 // PCE scalar type
46 
47 // Kokkos CrsMatrix
48 #include "Kokkos_Sparse_CrsMatrix.hpp"
51 
52 // Stokhos
56 
57 // Utilities
58 #include "impl/Kokkos_Timer.hpp"
59 
60 template< typename IntType >
61 inline
62 IntType map_fem_graph_coord( const IntType & N ,
63  const IntType & i ,
64  const IntType & j ,
65  const IntType & k )
66 {
67  return k + N * ( j + N * i );
68 }
69 
70 inline
71 size_t generate_fem_graph( size_t N ,
72  std::vector< std::vector<size_t> > & graph )
73 {
74  graph.resize( N * N * N , std::vector<size_t>() );
75 
76  size_t total = 0 ;
77 
78  for ( int i = 0 ; i < (int) N ; ++i ) {
79  for ( int j = 0 ; j < (int) N ; ++j ) {
80  for ( int k = 0 ; k < (int) N ; ++k ) {
81 
82  const size_t row = map_fem_graph_coord((int)N,i,j,k);
83 
84  graph[row].reserve(27);
85 
86  for ( int ii = -1 ; ii < 2 ; ++ii ) {
87  for ( int jj = -1 ; jj < 2 ; ++jj ) {
88  for ( int kk = -1 ; kk < 2 ; ++kk ) {
89  if ( 0 <= i + ii && i + ii < (int) N &&
90  0 <= j + jj && j + jj < (int) N &&
91  0 <= k + kk && k + kk < (int) N ) {
92  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
93 
94  graph[row].push_back(col);
95  }
96  }}}
97  total += graph[row].size();
98  }}}
99 
100  return total ;
101 }
102 
103 template <typename ScalarType, typename OrdinalType, typename Device>
104 void
105 test_mean_multiply(const OrdinalType order,
106  const OrdinalType dim,
107  const OrdinalType nGrid,
108  const OrdinalType iterCount,
109  std::vector<double>& scalar_perf,
110  std::vector<double>& block_left_perf,
111  std::vector<double>& block_right_perf,
112  std::vector<double>& pce_perf,
113  std::vector<double>& block_pce_perf)
114 {
115  typedef ScalarType value_type;
116  typedef OrdinalType ordinal_type;
117  typedef Device execution_space;
118 
121 
122  typedef Kokkos::View< value_type*, Kokkos::LayoutLeft, execution_space > scalar_vector_type;
123  typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > scalar_left_multi_vector_type;
124  typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > scalar_right_multi_vector_type;
125  typedef Kokkos::View< pce_type*, Kokkos::LayoutLeft, execution_space > pce_vector_type;
126  typedef Kokkos::View< pce_type**, Kokkos::LayoutLeft, execution_space > pce_multi_vector_type;
127 
128  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > scalar_matrix_type;
129  typedef KokkosSparse::CrsMatrix< pce_type, ordinal_type, execution_space > pce_matrix_type;
130  typedef typename scalar_matrix_type::StaticCrsGraphType matrix_graph_type;
131  typedef typename scalar_matrix_type::values_type scalar_matrix_values_type;
132  typedef typename pce_matrix_type::values_type pce_matrix_values_type;
133 
134  typedef Stokhos::OneDOrthogPolyBasis<ordinal_type,value_type> abstract_basis_type;
139  typedef typename pce_type::cijk_type kokkos_cijk_type;
140 
141  using Teuchos::rcp;
142  using Teuchos::RCP;
143  using Teuchos::Array;
144 
145  // Number of columns for PCE multi-vector apply
146  const ordinal_type num_pce_col = 5;
147 
148  // Create Stochastic Galerkin basis and expansion
149  Array< RCP<const abstract_basis_type> > bases(dim);
150  for (ordinal_type i=0; i<dim; ++i) {
151  bases[i] = Teuchos::rcp(new basis_type(order, true));
152  }
153  RCP<const product_basis_type> basis = rcp(new product_basis_type(bases));
154  RCP<cijk_type> cijk = basis->computeTripleProductTensor();
155  kokkos_cijk_type kokkos_cijk =
156  Stokhos::create_product_tensor<execution_space>(*basis, *cijk);
157  Kokkos::setGlobalCijkTensor(kokkos_cijk);
158 
159  //------------------------------
160  // Generate graph for "FEM" box structure:
161 
162  std::vector< std::vector<size_t> > fem_graph;
163  const size_t fem_length = nGrid * nGrid * nGrid;
164  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
165 
166  //------------------------------
167  // Generate input vectors:
168 
169  ordinal_type pce_size = basis->size();
170  scalar_left_multi_vector_type xl(Kokkos::ViewAllocateWithoutInitializing("scalar left x"), fem_length, pce_size);
171  scalar_left_multi_vector_type yl(Kokkos::ViewAllocateWithoutInitializing("scalar right y"), fem_length, pce_size);
172  scalar_right_multi_vector_type xr(Kokkos::ViewAllocateWithoutInitializing("scalar right x"), fem_length, pce_size);
173  scalar_right_multi_vector_type yr(Kokkos::ViewAllocateWithoutInitializing("scalar right y"), fem_length, pce_size);
174  std::vector<scalar_vector_type> x_col(pce_size), y_col(pce_size);
175  for (ordinal_type i=0; i<pce_size; ++i) {
176  x_col[i] = scalar_vector_type (Kokkos::ViewAllocateWithoutInitializing("scalar x col"), fem_length);
177  y_col[i] = scalar_vector_type(Kokkos::ViewAllocateWithoutInitializing("scalar y col"), fem_length);
178  Kokkos::deep_copy( x_col[i] , value_type(1.0) );
179  Kokkos::deep_copy( y_col[i] , value_type(0.0) );
180  }
181  pce_vector_type x_pce =
182  Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce x"),
183  kokkos_cijk, fem_length, pce_size);
184  pce_vector_type y_pce =
185  Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce y"),
186  kokkos_cijk, fem_length, pce_size);
187  pce_multi_vector_type x_multi_pce =
188  Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce multi x"),
189  kokkos_cijk, fem_length,
190  num_pce_col, pce_size);
191  pce_multi_vector_type y_multi_pce =
192  Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing("pce multi y"),
193  kokkos_cijk, fem_length,
194  num_pce_col, pce_size);
195 
196  Kokkos::deep_copy( xl , value_type(1.0) );
197  Kokkos::deep_copy( yl , value_type(0.0) );
198  Kokkos::deep_copy( xr , value_type(1.0) );
199  Kokkos::deep_copy( yr , value_type(0.0) );
200  Kokkos::deep_copy( x_pce , value_type(1.0) );
201  Kokkos::deep_copy( y_pce , value_type(0.0) );
202  Kokkos::deep_copy( x_multi_pce , value_type(1.0) );
203  Kokkos::deep_copy( y_multi_pce , value_type(0.0) );
204 
205  //------------------------------
206  // Generate matrix
207 
208  matrix_graph_type matrix_graph =
209  Kokkos::create_staticcrsgraph<matrix_graph_type>(
210  std::string("test crs graph"), fem_graph);
211  scalar_matrix_values_type scalar_matrix_values =
212  scalar_matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("scalar matrix"), graph_length);
213  pce_matrix_values_type pce_matrix_values =
214  Kokkos::make_view<pce_matrix_values_type>(Kokkos::ViewAllocateWithoutInitializing("pce matrix"), kokkos_cijk, graph_length, 1);
215  scalar_matrix_type scalar_matrix("scalar matrix", fem_length,
216  scalar_matrix_values, matrix_graph);
217  pce_matrix_type pce_matrix("pce matrix", fem_length,
218  pce_matrix_values, matrix_graph);
219 
220  Kokkos::deep_copy( scalar_matrix_values , value_type(1.0) );
221  Kokkos::deep_copy( pce_matrix_values , value_type(1.0) );
222 
223  //------------------------------
224  // Scalar multiply
225 
226  {
227  // warm up
228  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
229  for (ordinal_type col=0; col<pce_size; ++col) {
230  // scalar_vector_type xc =
231  // Kokkos::subview(x, Kokkos::ALL(), col);
232  // scalar_vector_type yc =
233  // Kokkos::subview(y, Kokkos::ALL(), col);
234  // Kokkos::MV_Multiply( yc, scalar_matrix, xc );
235  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, x_col[col] , value_type(0.0) ,y_col[col]);
236  }
237  }
238 
239  execution_space::fence();
240  Kokkos::Impl::Timer clock ;
241  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
242  for (ordinal_type col=0; col<pce_size; ++col) {
243  // scalar_vector_type xc =
244  // Kokkos::subview(x, Kokkos::ALL(), col);
245  // scalar_vector_type yc =
246  // Kokkos::subview(y, Kokkos::ALL(), col);
247  // Kokkos::MV_Multiply( yc, scalar_matrix, xc );
248  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, x_col[col] , value_type(0.0) ,y_col[col]);
249  }
250  }
251  execution_space::fence();
252 
253  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
254  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
255 
256  scalar_perf.resize(5);
257  scalar_perf[0] = fem_length;
258  scalar_perf[1] = pce_size;
259  scalar_perf[2] = graph_length;
260  scalar_perf[3] = seconds_per_iter;
261  scalar_perf[4] = flops / seconds_per_iter;
262  }
263 
264  //------------------------------
265  // Block-left multiply
266 
267  {
268  // warm up
269  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
270  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xl , value_type(0.0) ,yl);
271  }
272 
273  execution_space::fence();
274  Kokkos::Impl::Timer clock ;
275  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
276  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xl , value_type(0.0) ,yl);
277  }
278  execution_space::fence();
279 
280  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
281  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
282 
283  block_left_perf.resize(5);
284  block_left_perf[0] = fem_length;
285  block_left_perf[1] = pce_size;
286  block_left_perf[2] = graph_length;
287  block_left_perf[3] = seconds_per_iter;
288  block_left_perf[4] = flops / seconds_per_iter;
289  }
290 
291  //------------------------------
292  // Block-right multiply
293 
294  {
295  // warm up
296  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
297  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xr , value_type(0.0) ,yr);
298  }
299 
300  execution_space::fence();
301  Kokkos::Impl::Timer clock ;
302  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
303  KokkosSparse::spmv( "N" , value_type(1.0) , scalar_matrix, xr , value_type(0.0) ,yr);
304  }
305  execution_space::fence();
306 
307  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
308  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
309 
310  block_right_perf.resize(5);
311  block_right_perf[0] = fem_length;
312  block_right_perf[1] = pce_size;
313  block_right_perf[2] = graph_length;
314  block_right_perf[3] = seconds_per_iter;
315  block_right_perf[4] = flops / seconds_per_iter;
316  }
317 
318  //------------------------------
319  // PCE multiply
320 
321  {
322  // warm up
323  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
324  KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_pce , value_type(0.0) ,y_pce);
325  }
326 
327  execution_space::fence();
328  Kokkos::Impl::Timer clock ;
329  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
330  KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_pce , value_type(0.0) ,y_pce);
331  }
332  execution_space::fence();
333 
334  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
335  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
336 
337  pce_perf.resize(5);
338  pce_perf[0] = fem_length;
339  pce_perf[1] = pce_size;
340  pce_perf[2] = graph_length;
341  pce_perf[3] = seconds_per_iter;
342  pce_perf[4] = flops / seconds_per_iter;
343  }
344 
345  //------------------------------
346  // PCE multi-vector multiply
347 
348  {
349  // warm up
350  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
351  KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_multi_pce , value_type(0.0) ,y_multi_pce);
352  }
353 
354  execution_space::fence();
355  Kokkos::Impl::Timer clock ;
356  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
357  KokkosSparse::spmv( "N" , value_type(1.0) , pce_matrix, x_multi_pce , value_type(0.0) ,y_multi_pce);
358  }
359  execution_space::fence();
360 
361  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
362  const double flops = 1.0e-9 * 2.0 * graph_length * pce_size * num_pce_col;
363 
364  block_pce_perf.resize(5);
365  block_pce_perf[0] = fem_length;
366  block_pce_perf[1] = pce_size;
367  block_pce_perf[2] = graph_length;
368  block_pce_perf[3] = seconds_per_iter;
369  block_pce_perf[4] = flops / seconds_per_iter;
370  }
371 
372 }
373 
374 template <typename Scalar, typename Ordinal, typename Device>
376  const Ordinal nIter,
377  const Ordinal order,
378  const Ordinal min_var,
379  const Ordinal max_var )
380 {
381  std::cout.precision(8);
382  std::cout << std::endl
383  << "\"Grid Size\" , "
384  << "\"FEM Size\" , "
385  << "\"FEM Graph Size\" , "
386  << "\"Dimension\" , "
387  << "\"Order\" , "
388  << "\"PCE Size\" , "
389  << "\"Scalar SpMM Time\" , "
390  << "\"Scalar SpMM Speedup\" , "
391  << "\"Scalar SpMM GFLOPS\" , "
392  << "\"Block-Left SpMM Speedup\" , "
393  << "\"Block-Left SpMM GFLOPS\" , "
394  << "\"Block-Right SpMM Speedup\" , "
395  << "\"Block-Right SpMM GFLOPS\" , "
396  << "\"PCE SpMM Speedup\" , "
397  << "\"PCE SpMM GFLOPS\" , "
398  << "\"Block PCE SpMM Speedup\" , "
399  << "\"Block PCE SpMM GFLOPS\" , "
400  << std::endl;
401 
402  std::vector<double> perf_scalar, perf_block_left, perf_block_right,
403  perf_pce, perf_block_pce;
404  for (Ordinal dim=min_var; dim<=max_var; ++dim) {
405 
406  test_mean_multiply<Scalar,Ordinal,Device>(
407  order, dim, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right,
408  perf_pce, perf_block_pce );
409 
410  std::cout << nGrid << " , "
411  << perf_scalar[0] << " , "
412  << perf_scalar[2] << " , "
413  << dim << " , "
414  << order << " , "
415  << perf_scalar[1] << " , "
416  << perf_scalar[3] << " , "
417  << perf_scalar[4] / perf_scalar[4] << " , "
418  << perf_scalar[4] << " , "
419  << perf_block_left[4]/ perf_scalar[4] << " , "
420  << perf_block_left[4] << " , "
421  << perf_block_right[4]/ perf_scalar[4] << " , "
422  << perf_block_right[4] << " , "
423  << perf_pce[4]/ perf_scalar[4] << " , "
424  << perf_pce[4] << " , "
425  << perf_block_pce[4]/ perf_scalar[4] << " , "
426  << perf_block_pce[4] << " , "
427  << std::endl;
428 
429  }
430 }
431 
432 #define INST_PERF_DRIVER(SCALAR, ORDINAL, DEVICE) \
433  template void performance_test_driver< SCALAR, ORDINAL, DEVICE >( \
434  const ORDINAL nGrid, const ORDINAL nIter, const ORDINAL order, \
435  const ORDINAL min_var, const ORDINAL max_var);
Stokhos::StandardStorage< int, double > storage_type
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
void performance_test_driver(const Ordinal nGrid, const Ordinal nIter, const Ordinal order, const Ordinal min_var, const Ordinal max_var)
Kokkos::DefaultExecutionSpace execution_space
Sacado::PCE::OrthogPoly< double, Storage > pce_type
void test_mean_multiply(const OrdinalType order, const OrdinalType dim, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf, std::vector< double > &pce_perf, std::vector< double > &block_pce_perf)
Stokhos::LegendreBasis< int, double > basis_type
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
expr expr expr expr j
void setGlobalCijkTensor(const cijk_type &cijk)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< view_type >::value, typename CijkType< view_type >::type >::type cijk(const view_type &view)
Legendre polynomial basis.
Abstract base class for 1-D orthogonal polynomials.
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
A comparison functor implementing a strict weak ordering based lexographic ordering.
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)