Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
csr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2008-2009 NVIDIA Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <cusp/array1d.h>
18 
19 #include <cusp/detail/format_utils.h>
20 
21 //#include <iostream>
22 //#include <cusp/print.h>
23 namespace cusp
24 {
25 namespace detail
26 {
27 namespace device
28 {
29 
30 //Row-wise AX where X is a 2d array with one thread per row (based on scalar model)
31 template <typename IndexType,
32  typename ValueType>
33 __global__ void
34 row_spmm_csr_scalar_kernel(const IndexType Anum_rows,
35  const IndexType xnum_row,
36  const IndexType xnum_cols,
37  const IndexType * Ar,
38  const IndexType * Ac,
39  const ValueType * Aval,
40  const ValueType * x,
41  ValueType * y)
42 {
43  const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
44  const IndexType grid_size = gridDim.x * blockDim.x;
45 
46 
47  for(IndexType row = thread_id; row < Anum_rows; row += grid_size)
48  {
49  const IndexType row_start = Ar[row];
50  const IndexType row_end = Ar[row+1];
51  const IndexType r = row_end - row_start;
52 
53  for (IndexType j = 0; j < xnum_cols; j++){
54 
55  ValueType sum = 0.0;
56  for (IndexType jj = row_start; jj < row_end; jj++)
57  sum += Aval[jj] * x[j+xnum_cols*Ac[jj]];
58  y[j+xnum_cols*row]=sum;
59  }
60 
61  }
62 }
63 
64 
65 
66 //Col-wise with one thread per row
67 template <typename IndexType,
68  typename ValueType>
69 __global__ void
70 column_spmm_csr_scalar_kernel(const IndexType Anum_rows,
71  const IndexType xnum_rows,
72  const IndexType xnum_cols,
73  const IndexType * Ar,
74  const IndexType * Ac,
75  const ValueType * Aval,
76  const ValueType * x,
77  ValueType * y)
78 {
79  const IndexType thread_id = blockDim.x * blockIdx.x + threadIdx.x;
80  const IndexType grid_size = gridDim.x * blockDim.x;
81  for(IndexType row = thread_id; row < Anum_rows; row += grid_size){
82  const IndexType row_start = Ar[row];
83  const IndexType row_end = Ar[row+1];
84 
85  for (IndexType j = 0; j < xnum_cols; j++){
86 
87  ValueType sum = 0;
88  for (IndexType jj = row_start; jj < row_end; jj++)
89  sum += Aval[jj] * x[Ac[jj]+xnum_rows*j];//x(Ac[jj], j)
90  y[j*Anum_rows+row]=sum;
91  }
92  }
93 }
94 
95 
104 
105 
106 template <typename Matrix1,
107  typename Vector2,
108  typename Vector3>
109 void __spmm_csr_scalar(const Matrix1& A,
110  const Vector2& x,
111  Vector3& y, cusp::row_major)
112 {
113  CUSP_PROFILE_SCOPED();
114  typedef typename Vector3::index_type IndexType;
115  typedef typename Vector3::value_type ValueType;
116  typedef typename Vector3::memory_space MemorySpace;
117  const size_t BLOCK_SIZE = 256;
118  const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(row_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (size_t) 0);
119  const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
120 
121 
122 
123  row_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE >>>
124  (A.num_rows, x.num_rows, x.num_cols,
125  thrust::raw_pointer_cast(&A.row_offsets[0]),
126  thrust::raw_pointer_cast(&A.column_indices[0]),
127  thrust::raw_pointer_cast(&A.values[0]),
128  thrust::raw_pointer_cast(&(x.values)[0]),
129  thrust::raw_pointer_cast(&(y.values)[0]));
130 
131 }
132 
133 template <typename Matrix1,
134  typename Vector2,
135  typename Vector3>
136 void __spmm_csr_scalar(const Matrix1& A,
137  const Vector2& x,
138  Vector3& y, cusp::column_major)
139 {
140  CUSP_PROFILE_SCOPED();
141  typedef typename Vector3::index_type IndexType;
142  typedef typename Vector3::value_type ValueType;
143  typedef typename Vector3::memory_space MemorySpace;
144  const size_t BLOCK_SIZE = 256;
145  const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(column_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (size_t) 0);
146  const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
147  column_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE>>>
148  (A.num_rows, x.num_rows, x.num_cols,
149  thrust::raw_pointer_cast(&A.row_offsets[0]),
150  thrust::raw_pointer_cast(&A.column_indices[0]),
151  thrust::raw_pointer_cast(&A.values[0]),
152  thrust::raw_pointer_cast(&(x.values)[0]),
153  thrust::raw_pointer_cast(&(y.values)[0]));
154 
155 }
156 
157 
158 template <typename Matrix1,
159  typename Vector2,
160  typename Vector3>
161 void spmm_csr_scalar(const Matrix1& A,
162  const Vector2& x,
163  Vector3& y)
164 {
165 //Determine if row-wise or col-wise then call appropriate multiply
166  __spmm_csr_scalar(A, x, y, typename Vector2::orientation());
167 }
168 } // end namespace device
169 } // end namespace detail
170 } // end namespace cusp
171 
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
Definition: csr.h:161
const IndexType xnum_rows
Definition: csr_vector.h:260
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< RD, RP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value >::type sum(const Kokkos::View< RD, RP... > &r, const Kokkos::View< XD, XP... > &x)
const IndexType const IndexType xnum_cols
Definition: csr_vector.h:260
KOKKOS_INLINE_FUNCTION PCE< Storage > min(const typename PCE< Storage >::value_type &a, const PCE< Storage > &b)
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition: csr.h:34
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition: csr.h:70
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
expr expr expr expr j
Kokkos::DefaultExecutionSpace device
const IndexType thread_id
Definition: csr_vector.h:273
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)
Definition: csr.h:109