44 #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP 45 #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP 47 #include <Kokkos_ExecPolicy.hpp> 49 #include <initializer_list> 51 #if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__) 52 #define KOKKOS_MDRANGE_IVDEP 65 template <
typename ExecSpace>
66 struct default_outer_direction
69 static constexpr Iterate value = Iterate::Right;
72 template <
typename ExecSpace>
73 struct default_inner_direction
76 static constexpr Iterate value = Iterate::Right;
82 , Iterate OuterDir = Iterate::Default
83 , Iterate InnerDir = Iterate::Default
87 static_assert( N != 0u,
"Kokkos Error: rank 0 undefined");
88 static_assert( N != 1u,
"Kokkos Error: rank 1 is not a multi-dimensional range");
89 static_assert( N < 4u,
"Kokkos Error: Unsupported rank...");
91 using iteration_pattern = Rank<N, OuterDir, InnerDir>;
93 static constexpr
int rank = N;
94 static constexpr Iterate outer_direction = OuterDir;
95 static constexpr Iterate inner_direction = InnerDir;
101 template <
typename... Properties>
104 using range_policy = RangePolicy<Properties...>;
106 static_assert( !std::is_same<range_policy,void>::value
107 ,
"Kokkos Error: MD iteration pattern not defined" );
109 using iteration_pattern =
typename range_policy::iteration_pattern;
110 using work_tag =
typename range_policy::work_tag;
112 static constexpr
int rank = iteration_pattern::rank;
114 static constexpr
int outer_direction =
static_cast<int> (
115 (iteration_pattern::outer_direction != Iterate::Default && iteration_pattern::outer_direction != Iterate::Flat)
116 ? iteration_pattern::outer_direction
117 : default_outer_direction< typename range_policy::execution_space>::value );
119 static constexpr
int inner_direction =
static_cast<int> (
120 iteration_pattern::inner_direction != Iterate::Default
121 ? iteration_pattern::inner_direction
122 : default_inner_direction< typename range_policy::execution_space>::value ) ;
126 static constexpr
int Flat =
static_cast<int>( Iterate::Flat );
127 static constexpr
int Right =
static_cast<int>( Iterate::Right );
130 using size_type =
typename range_policy::index_type;
131 using index_type =
typename std::make_signed<size_type>::type;
134 template <
typename I>
135 MDRangePolicy( std::initializer_list<I> upper_corner )
137 static_assert( std::is_integral<I>::value,
"Kokkos Error: corner defined with non-integral type" );
144 const auto u = upper_corner.begin();
147 for (
int i=0; i<
rank; ++i) {
148 m_offset[i] =
static_cast<index_type
>(0);
149 m_dim[i] =
static_cast<index_type
>(u[i]);
150 if (inner_direction != Flat) {
156 m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i];
157 m_num_tiles *= m_tile_dim[i];
161 template <
typename IA,
typename IB>
162 MDRangePolicy( std::initializer_list<IA> corner_a
163 , std::initializer_list<IB> corner_b
166 static_assert( std::is_integral<IA>::value,
"Kokkos Error: corner A defined with non-integral type" );
167 static_assert( std::is_integral<IB>::value,
"Kokkos Error: corner B defined with non-integral type" );
175 using A =
typename std::make_signed<IA>::type;
176 using B =
typename std::make_signed<IB>::type;
178 const auto a = [=](
int i) {
return static_cast<A
>(corner_a.begin()[i]); };
179 const auto b = [=](
int i) {
return static_cast<B
>(corner_b.begin()[i]); };
182 for (
int i=0; i<
rank; ++i) {
183 m_offset[i] =
static_cast<index_type
>(a(i) <= b(i) ? a(i) : b(i));
184 m_dim[i] =
static_cast<index_type
>(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i));
185 if (inner_direction != Flat) {
191 m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i];
192 m_num_tiles *= m_tile_dim[i];
196 template <
typename IA,
typename IB,
typename T>
197 MDRangePolicy( std::initializer_list<IA> corner_a
198 , std::initializer_list<IB> corner_b
199 , std::initializer_list<T> tile
202 static_assert( std::is_integral<IA>::value,
"Kokkos Error: corner A defined with non-integral type" );
203 static_assert( std::is_integral<IB>::value,
"Kokkos Error: corner B defined with non-integral type" );
204 static_assert( std::is_integral<T>::value,
"Kokkos Error: tile defined with non-integral type" );
205 static_assert( inner_direction != Flat,
"Kokkos Error: tiling not support with flat iteration" );
213 using A =
typename std::make_signed<IA>::type;
214 using B =
typename std::make_signed<IB>::type;
216 const auto a = [=](
int i) {
return static_cast<A
>(corner_a.begin()[i]); };
217 const auto b = [=](
int i) {
return static_cast<B
>(corner_b.begin()[i]); };
218 const auto t = tile.begin();
221 for (
int i=0; i<
rank; ++i) {
222 m_offset[i] =
static_cast<index_type
>(a(i) <= b(i) ? a(i) : b(i));
223 m_dim[i] =
static_cast<index_type
>(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i));
224 m_tile[i] =
static_cast<int>(t[i] > (T)0 ? t[i] : (T)1 );
225 m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i];
226 m_num_tiles *= m_tile_dim[i];
230 index_type m_offset[
rank];
231 index_type m_dim[
rank];
233 index_type m_tile_dim[
rank];
234 size_type m_num_tiles;
241 template <
typename MDRange,
typename Functor,
typename Enable =
void >
244 using work_tag =
typename MDRange::work_tag;
245 using index_type =
typename MDRange::index_type;
246 using size_type =
typename MDRange::size_type;
251 KOKKOS_INLINE_FUNCTION
252 MDForFunctor( MDRange
const& range, Functor
const& f )
257 KOKKOS_INLINE_FUNCTION
258 MDForFunctor( MDRange
const& range, Functor && f )
260 , m_func( std::forward<Functor>(f) )
263 KOKKOS_INLINE_FUNCTION
264 MDForFunctor( MDRange && range, Functor
const& f )
265 : m_range( std::forward<MDRange>(range) )
269 KOKKOS_INLINE_FUNCTION
270 MDForFunctor( MDRange && range, Functor && f )
271 : m_range( std::forward<MDRange>(range) )
272 , m_func( std::forward<Functor>(f) )
276 KOKKOS_INLINE_FUNCTION
277 MDForFunctor( MDForFunctor
const& ) =
default;
279 KOKKOS_INLINE_FUNCTION
280 MDForFunctor& operator=( MDForFunctor
const& ) =
default;
282 KOKKOS_INLINE_FUNCTION
283 MDForFunctor( MDForFunctor && ) =
default;
285 KOKKOS_INLINE_FUNCTION
286 MDForFunctor& operator=( MDForFunctor && ) =
default;
289 template <
typename Idx>
290 KOKKOS_FORCEINLINE_FUNCTION
291 typename std::enable_if<( std::is_integral<Idx>::value
292 && std::is_same<void, work_tag>::value
293 && MDRange::rank == 2
294 && MDRange::inner_direction == MDRange::Flat
296 operator()(Idx t)
const 298 if ( MDRange::outer_direction == MDRange::Right ) {
299 m_func( m_range.m_offset[0] + ( t / m_range.m_dim[1] )
300 , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) );
302 m_func( m_range.m_offset[0] + ( t % m_range.m_dim[0] )
303 , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) );
308 template <
typename Idx>
309 KOKKOS_FORCEINLINE_FUNCTION
310 typename std::enable_if<( std::is_integral<Idx>::value
311 && !std::is_same<void, work_tag>::value
312 && MDRange::rank == 2
313 && MDRange::inner_direction == MDRange::Flat
315 operator()(Idx t)
const 317 if ( MDRange::outer_direction == MDRange::Right ) {
318 m_func( work_tag{}, m_range.m_offset[0] + ( t / m_range.m_dim[1] )
319 , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) );
321 m_func( work_tag{}, m_range.m_offset[0] + ( t % m_range.m_dim[0] )
322 , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) );
327 template <
typename Idx>
328 KOKKOS_FORCEINLINE_FUNCTION
329 typename std::enable_if<( std::is_integral<Idx>::value
330 && std::is_same<void, work_tag>::value
331 && MDRange::rank == 2
332 && MDRange::inner_direction != MDRange::Flat
334 operator()(Idx t)
const 337 if ( MDRange::outer_direction == MDRange::Right ) {
338 t0 = t / m_range.m_tile_dim[1];
339 t1 = t % m_range.m_tile_dim[1];
341 t0 = t % m_range.m_tile_dim[0];
342 t1 = t / m_range.m_tile_dim[0];
345 const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0];
346 const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1];
348 const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] );
349 const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] );
351 if ( MDRange::inner_direction == MDRange::Right ) {
352 for (
int i0=b0; i0<e0; ++i0) {
353 #if defined(KOKKOS_MDRANGE_IVDEP) 356 for (
int i1=b1; i1<e1; ++i1) {
360 for (
int i1=b1; i1<e1; ++i1) {
361 #if defined(KOKKOS_MDRANGE_IVDEP) 364 for (
int i0=b0; i0<e0; ++i0) {
371 template <
typename Idx>
372 KOKKOS_FORCEINLINE_FUNCTION
373 typename std::enable_if<( std::is_integral<Idx>::value
374 && !std::is_same<void, work_tag>::value
375 && MDRange::rank == 2
376 && MDRange::inner_direction != MDRange::Flat
378 operator()(Idx t)
const 383 if ( MDRange::outer_direction == MDRange::Right ) {
384 t0 = t / m_range.m_tile_dim[1];
385 t1 = t % m_range.m_tile_dim[1];
387 t0 = t % m_range.m_tile_dim[0];
388 t1 = t / m_range.m_tile_dim[0];
391 const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0];
392 const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1];
394 const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] );
395 const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] );
397 if ( MDRange::inner_direction == MDRange::Right ) {
398 for (
int i0=b0; i0<e0; ++i0) {
399 #if defined(KOKKOS_MDRANGE_IVDEP) 402 for (
int i1=b1; i1<e1; ++i1) {
403 m_func( tag, i0, i1 );
406 for (
int i1=b1; i1<e1; ++i1) {
407 #if defined(KOKKOS_MDRANGE_IVDEP) 410 for (
int i0=b0; i0<e0; ++i0) {
411 m_func( tag, i0, i1 );
419 template <
typename Idx>
420 KOKKOS_FORCEINLINE_FUNCTION
421 typename std::enable_if<( std::is_integral<Idx>::value
422 && std::is_same<void, work_tag>::value
423 && MDRange::rank == 3
424 && MDRange::inner_direction == MDRange::Flat
426 operator()(Idx t)
const 428 if ( MDRange::outer_direction == MDRange::Right ) {
429 const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2];
430 m_func( m_range.m_offset[0] + ( t / tmp_prod )
431 , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] )
432 , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] )
435 const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1];
436 m_func( m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] )
437 , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] )
438 , m_range.m_offset[2] + ( t / tmp_prod )
444 template <
typename Idx>
445 KOKKOS_FORCEINLINE_FUNCTION
446 typename std::enable_if<( std::is_integral<Idx>::value
447 && !std::is_same<void, work_tag>::value
448 && MDRange::rank == 3
449 && MDRange::inner_direction == MDRange::Flat
451 operator()(Idx t)
const 453 if ( MDRange::outer_direction == MDRange::Right ) {
454 const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2];
456 , m_range.m_offset[0] + ( t / tmp_prod )
457 , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] )
458 , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] )
461 const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1];
463 , m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] )
464 , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] )
465 , m_range.m_offset[2] + ( t / tmp_prod )
471 template <
typename Idx>
472 KOKKOS_FORCEINLINE_FUNCTION
473 typename std::enable_if<( std::is_integral<Idx>::value
474 && std::is_same<void, work_tag>::value
475 && MDRange::rank == 3
476 && MDRange::inner_direction != MDRange::Flat
478 operator()(Idx t)
const 480 index_type t0, t1, t2;
481 if ( MDRange::outer_direction == MDRange::Right ) {
482 const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]);
484 t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2];
485 t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2];
487 const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]);
488 t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0];
489 t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0];
493 const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0];
494 const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1];
495 const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2];
497 const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] );
498 const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] );
499 const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] );
501 if ( MDRange::inner_direction == MDRange::Right ) {
502 for (
int i0=b0; i0<e0; ++i0) {
503 for (
int i1=b1; i1<e1; ++i1) {
504 #if defined(KOKKOS_MDRANGE_IVDEP) 507 for (
int i2=b2; i2<e2; ++i2) {
508 m_func( i0, i1, i2 );
511 for (
int i2=b2; i2<e2; ++i2) {
512 for (
int i1=b1; i1<e1; ++i1) {
513 #if defined(KOKKOS_MDRANGE_IVDEP) 516 for (
int i0=b0; i0<e0; ++i0) {
517 m_func( i0, i1, i2 );
523 template <
typename Idx>
524 KOKKOS_FORCEINLINE_FUNCTION
525 typename std::enable_if<( std::is_integral<Idx>::value
526 && !std::is_same<void, work_tag>::value
527 && MDRange::rank == 3
528 && MDRange::inner_direction != MDRange::Flat
530 operator()(Idx t)
const 534 index_type t0, t1, t2;
535 if ( MDRange::outer_direction == MDRange::Right ) {
536 const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]);
538 t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2];
539 t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2];
541 const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]);
542 t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0];
543 t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0];
547 const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0];
548 const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1];
549 const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2];
551 const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] );
552 const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] );
553 const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] );
555 if ( MDRange::inner_direction == MDRange::Right ) {
556 for (
int i0=b0; i0<e0; ++i0) {
557 for (
int i1=b1; i1<e1; ++i1) {
558 #if defined(KOKKOS_MDRANGE_IVDEP) 561 for (
int i2=b2; i2<e2; ++i2) {
562 m_func( tag, i0, i1, i2 );
565 for (
int i2=b2; i2<e2; ++i2) {
566 for (
int i1=b1; i1<e1; ++i1) {
567 #if defined(KOKKOS_MDRANGE_IVDEP) 570 for (
int i0=b0; i0<e0; ++i0) {
571 m_func( tag, i0, i1, i2 );
582 template <
typename MDRange,
typename Functor>
583 void md_parallel_for( MDRange
const& range
585 ,
const std::string& str =
"" 588 Impl::MDForFunctor<MDRange, Functor> g(range, f);
590 using range_policy =
typename MDRange::range_policy;
595 template <
typename MDRange,
typename Functor>
596 void md_parallel_for(
const std::string& str
597 , MDRange
const& range
601 Impl::MDForFunctor<MDRange, Functor> g(range, f);
603 using range_policy =
typename MDRange::range_policy;
610 #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP
Declaration of parallel operators.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
KOKKOS_INLINE_FUNCTION constexpr unsigned rank(const View< D, P... > &V)
Temporary free function rank() until rank() is implemented in the View.