Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
TestSpMM.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 #include <iostream>
42 
43 // Kokkos CrsMatrix
44 #include "Kokkos_Sparse.hpp"
45 
46 // Utilities
47 #include "impl/Kokkos_Timer.hpp"
48 
49 template< typename IntType >
50 inline
51 IntType map_fem_graph_coord( const IntType & N ,
52  const IntType & i ,
53  const IntType & j ,
54  const IntType & k )
55 {
56  return k + N * ( j + N * i );
57 }
58 
59 inline
60 size_t generate_fem_graph( size_t N ,
61  std::vector< std::vector<size_t> > & graph )
62 {
63  graph.resize( N * N * N , std::vector<size_t>() );
64 
65  size_t total = 0 ;
66 
67  for ( int i = 0 ; i < (int) N ; ++i ) {
68  for ( int j = 0 ; j < (int) N ; ++j ) {
69  for ( int k = 0 ; k < (int) N ; ++k ) {
70 
71  const size_t row = map_fem_graph_coord((int)N,i,j,k);
72 
73  graph[row].reserve(27);
74 
75  for ( int ii = -1 ; ii < 2 ; ++ii ) {
76  for ( int jj = -1 ; jj < 2 ; ++jj ) {
77  for ( int kk = -1 ; kk < 2 ; ++kk ) {
78  if ( 0 <= i + ii && i + ii < (int) N &&
79  0 <= j + jj && j + jj < (int) N &&
80  0 <= k + kk && k + kk < (int) N ) {
81  size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
82 
83  graph[row].push_back(col);
84  }
85  }}}
86  total += graph[row].size();
87  }}}
88 
89  return total ;
90 }
91 
92 template <typename ScalarType, typename OrdinalType, typename Device>
93 void
94 test_spmm(const OrdinalType ensemble_length,
95  const OrdinalType nGrid,
96  const OrdinalType iterCount,
97  std::vector<double>& scalar_perf,
98  std::vector<double>& block_left_perf,
99  std::vector<double>& block_right_perf)
100 {
101  typedef ScalarType value_type;
102  typedef OrdinalType ordinal_type;
103  typedef Device execution_space;
104  typedef Kokkos::View< value_type*, execution_space > vector_type;
105  typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > left_multivec_type;
106  //typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > right_multivec_type;
107  typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > matrix_type;
108  typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
109  typedef typename matrix_type::values_type matrix_values_type;
110 
111  //------------------------------
112  // Generate graph for "FEM" box structure:
113 
114  std::vector< std::vector<size_t> > fem_graph;
115  const size_t fem_length = nGrid * nGrid * nGrid;
116  const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
117 
118  //------------------------------
119  // Generate input vectors:
120 
121  std::vector<vector_type> x(ensemble_length);
122  std::vector<vector_type> y(ensemble_length);
123  for (ordinal_type e=0; e<ensemble_length; ++e) {
124  x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
125  y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
126 
127  Kokkos::deep_copy( x[e] , value_type(1.0) );
128  Kokkos::deep_copy( y[e] , value_type(0.0) );
129  }
130  left_multivec_type xl(Kokkos::ViewAllocateWithoutInitializing("xl"), fem_length, ensemble_length);
131  left_multivec_type yl(Kokkos::ViewAllocateWithoutInitializing("yl"), fem_length, ensemble_length);
132  // right_multivec_type xr(Kokkos::ViewAllocateWithoutInitializing("xr"), fem_length, ensemble_length);
133  // right_multivec_type yr(Kokkos::ViewAllocateWithoutInitializing("yr"), fem_length, ensemble_length);
134  Kokkos::deep_copy(xl, value_type(1.0));
135  //Kokkos::deep_copy(xr, value_type(1.0));
136  Kokkos::deep_copy(yl, value_type(0.0));
137  //Kokkos::deep_copy(yr, value_type(0.0));
138 
139  //------------------------------
140  // Generate matrix
141 
142  matrix_graph_type matrix_graph =
143  Kokkos::create_staticcrsgraph<matrix_graph_type>(
144  std::string("test crs graph"), fem_graph);
145  matrix_values_type matrix_values =
146  matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
147  matrix_type matrix("matrix", fem_length, matrix_values, matrix_graph);
148  Kokkos::deep_copy( matrix_values , value_type(1.0) );
149 
150  //------------------------------
151  // Scalar multiply
152 
153  {
154  // warm up
155  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
156  for (ordinal_type e=0; e<ensemble_length; ++e) {
157  KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
158  }
159  }
160 
161  execution_space::fence();
162  Kokkos::Impl::Timer clock ;
163  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
164  for (ordinal_type e=0; e<ensemble_length; ++e) {
165  KokkosSparse::spmv( "N", value_type(1.0), matrix, x[e] , value_type(0.0) , y[e]);
166  }
167  }
168  execution_space::fence();
169 
170  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
171  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
172 
173  scalar_perf.resize(5);
174  scalar_perf[0] = fem_length;
175  scalar_perf[1] = ensemble_length;
176  scalar_perf[2] = graph_length;
177  scalar_perf[3] = seconds_per_iter;
178  scalar_perf[4] = flops / seconds_per_iter;
179  }
180 
181  //------------------------------
182  // Block-left multiply
183 
184  {
185  // warm up
186  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
187  KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
188  }
189 
190  execution_space::fence();
191  Kokkos::Impl::Timer clock ;
192  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
193  KokkosSparse::spmv( "N", value_type(1.0), matrix, xl , value_type(0.0) , yl);
194  }
195  execution_space::fence();
196 
197  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
198  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
199 
200  block_left_perf.resize(5);
201  block_left_perf[0] = fem_length;
202  block_left_perf[1] = ensemble_length;
203  block_left_perf[2] = graph_length;
204  block_left_perf[3] = seconds_per_iter;
205  block_left_perf[4] = flops / seconds_per_iter;
206  }
207 
208 #if 0
209  //------------------------------
210  // Block-right multiply
211 
212  {
213  // warm up
214  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
215  KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
216  }
217 
218  execution_space::fence();
219  Kokkos::Impl::Timer clock ;
220  for (ordinal_type iter = 0; iter < iterCount; ++iter) {
221  KokkosSparse::spmv( "N", value_type(1.0), matrix, xr , value_type(0.0) , yr);
222  }
223  execution_space::fence();
224 
225  const double seconds_per_iter = clock.seconds() / ((double) iterCount );
226  const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
227 
228  block_right_perf.resize(5);
229  block_right_perf[0] = fem_length;
230  block_right_perf[1] = ensemble_length;
231  block_right_perf[2] = graph_length;
232  block_right_perf[3] = seconds_per_iter;
233  block_right_perf[4] = flops / seconds_per_iter;
234  }
235 #endif
236 
237 }
238 
239 template <typename Scalar, typename Ordinal, typename Device>
241  const Ordinal nIter,
242  const Ordinal ensemble_min,
243  const Ordinal ensemble_max,
244  const Ordinal ensemble_step )
245 {
246  std::cout.precision(8);
247  std::cout << std::endl
248  << "\"Grid Size\" , "
249  << "\"FEM Size\" , "
250  << "\"FEM Graph Size\" , "
251  << "\"Ensemble Size\" , "
252  << "\"Scalar SpMM Time\" , "
253  << "\"Scalar SpMM Speedup\" , "
254  << "\"Scalar SpMM GFLOPS\" , "
255  << "\"Block-Left SpMM Speedup\" , "
256  << "\"Block-Left SpMM GFLOPS\" , "
257  //<< "\"Block_Right SpMM Speedup\" , "
258  //<< "\"Block_Right SpMM GFLOPS\" , "
259  << std::endl;
260 
261  std::vector<double> perf_scalar, perf_block_left, perf_block_right;
262  for (Ordinal e=ensemble_min; e<=ensemble_max; e+=ensemble_step) {
263 
264  test_spmm<Scalar,Ordinal,Device>(
265  e, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right );
266 
267  std::cout << nGrid << " , "
268  << perf_scalar[0] << " , "
269  << perf_scalar[2] << " , "
270  << perf_scalar[1] << " , "
271  << perf_scalar[3] << " , "
272  << perf_scalar[4] / perf_scalar[4] << " , "
273  << perf_scalar[4] << " , "
274  << perf_block_left[4]/ perf_scalar[4] << " , "
275  << perf_block_left[4] << " , "
276  //<< perf_block_right[4]/ perf_scalar[4] << " , "
277  //<< perf_block_right[4] << " , "
278  << std::endl;
279 
280  }
281 }
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
Definition: TestSpMM.hpp:60
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestSpMM.hpp:51
void test_spmm(const OrdinalType ensemble_length, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf)
Definition: TestSpMM.hpp:94
Kokkos::DefaultExecutionSpace execution_space
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
void performance_test_driver(const Ordinal nGrid, const Ordinal nIter, const Ordinal ensemble_min, const Ordinal ensemble_max, const Ordinal ensemble_step)
Definition: TestSpMM.hpp:240
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
expr expr expr expr j
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)