44 #ifndef KOKKOS_CUDASPACE_HPP 45 #define KOKKOS_CUDASPACE_HPP 47 #include <Kokkos_Core_fwd.hpp> 49 #if defined( KOKKOS_HAVE_CUDA ) 55 #include <Kokkos_HostSpace.hpp> 57 #include <Cuda/Kokkos_Cuda_abort.hpp> 69 typedef CudaSpace memory_space ;
70 typedef Kokkos::Cuda execution_space ;
73 typedef unsigned int size_type ;
78 CudaSpace( CudaSpace && rhs ) = default ;
79 CudaSpace(
const CudaSpace & rhs ) = default ;
80 CudaSpace & operator = ( CudaSpace && rhs ) = default ;
81 CudaSpace & operator = (
const CudaSpace & rhs ) = default ;
82 ~CudaSpace() = default ;
85 void * allocate(
const size_t arg_alloc_size )
const ;
88 void deallocate(
void *
const arg_alloc_ptr
89 ,
const size_t arg_alloc_size )
const ;
93 static void access_error();
94 static void access_error(
const void *
const );
110 void init_lock_arrays_cuda_space();
119 int* atomic_lock_array_cuda_space_ptr(
bool deallocate =
false);
127 int* scratch_lock_array_cuda_space_ptr(
bool deallocate =
false);
135 int* threadid_lock_array_cuda_space_ptr(
bool deallocate =
false);
151 typedef CudaUVMSpace memory_space ;
152 typedef Cuda execution_space ;
154 typedef unsigned int size_type ;
162 CudaUVMSpace( CudaUVMSpace && rhs ) = default ;
163 CudaUVMSpace(
const CudaUVMSpace & rhs ) = default ;
164 CudaUVMSpace & operator = ( CudaUVMSpace && rhs ) = default ;
165 CudaUVMSpace & operator = (
const CudaUVMSpace & rhs ) = default ;
166 ~CudaUVMSpace() = default ;
169 void * allocate(
const size_t arg_alloc_size )
const ;
172 void deallocate(
void *
const arg_alloc_ptr
173 ,
const size_t arg_alloc_size )
const ;
191 class CudaHostPinnedSpace {
196 typedef HostSpace::execution_space execution_space ;
197 typedef CudaHostPinnedSpace memory_space ;
199 typedef unsigned int size_type ;
203 CudaHostPinnedSpace();
204 CudaHostPinnedSpace( CudaHostPinnedSpace && rhs ) = default ;
205 CudaHostPinnedSpace(
const CudaHostPinnedSpace & rhs ) = default ;
206 CudaHostPinnedSpace & operator = ( CudaHostPinnedSpace && rhs ) = default ;
207 CudaHostPinnedSpace & operator = (
const CudaHostPinnedSpace & rhs ) = default ;
208 ~CudaHostPinnedSpace() = default ;
211 void * allocate(
const size_t arg_alloc_size )
const ;
214 void deallocate(
void *
const arg_alloc_ptr
215 ,
const size_t arg_alloc_size )
const ;
233 struct MemorySpaceAccess<
Kokkos::HostSpace , Kokkos::CudaSpace > {
234 enum { assignable =
false };
235 enum { accessible =
false };
236 enum { deepcopy =
true };
240 struct MemorySpaceAccess<
Kokkos::HostSpace , Kokkos::CudaUVMSpace > {
242 enum { assignable =
false };
243 enum { accessible =
true };
244 enum { deepcopy =
true };
248 struct MemorySpaceAccess<
Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace > {
250 enum { assignable =
true };
251 enum { accessible =
true };
252 enum { deepcopy =
true };
258 enum { assignable =
false };
259 enum { accessible =
false };
260 enum { deepcopy =
true };
264 struct MemorySpaceAccess<
Kokkos::CudaSpace , Kokkos::CudaUVMSpace > {
265 enum { assignable =
true };
266 enum { accessible =
true };
267 enum { deepcopy =
true };
271 struct MemorySpaceAccess<
Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace > {
273 enum { assignable =
false };
274 enum { accessible =
true };
275 enum { deepcopy =
true };
283 enum { assignable =
false };
284 enum { accessible =
false };
285 enum { deepcopy =
true };
289 struct MemorySpaceAccess<
Kokkos::CudaUVMSpace , Kokkos::CudaSpace > {
292 enum { assignable =
false };
295 enum { accessible =
true };
296 enum { deepcopy =
true };
300 struct MemorySpaceAccess<
Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace > {
302 enum { assignable =
false };
303 enum { accessible =
true };
304 enum { deepcopy =
true };
312 enum { assignable =
false };
313 enum { accessible =
false };
314 enum { deepcopy =
true };
318 struct MemorySpaceAccess<
Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace > {
319 enum { assignable =
false };
320 enum { accessible =
false };
321 enum { deepcopy =
true };
325 struct MemorySpaceAccess<
Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace > {
326 enum { assignable =
false };
327 enum { accessible =
true };
328 enum { deepcopy =
true };
339 void DeepCopyAsyncCuda(
void * dst ,
const void * src ,
size_t n);
341 template<>
struct DeepCopy< CudaSpace , CudaSpace , Cuda>
343 DeepCopy(
void * dst ,
const void * src ,
size_t );
344 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
347 template<>
struct DeepCopy< CudaSpace , HostSpace , Cuda >
349 DeepCopy(
void * dst ,
const void * src ,
size_t );
350 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
353 template<>
struct DeepCopy< HostSpace , CudaSpace , Cuda >
355 DeepCopy(
void * dst ,
const void * src ,
size_t );
356 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
359 template<
class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
362 DeepCopy(
void * dst ,
const void * src ,
size_t n )
363 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
366 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
369 DeepCopyAsyncCuda (dst,src,n);
373 template<
class ExecutionSpace>
struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
376 DeepCopy(
void * dst ,
const void * src ,
size_t n )
377 { (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
380 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
383 DeepCopyAsyncCuda (dst,src,n);
387 template<
class ExecutionSpace>
388 struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
391 DeepCopy(
void * dst ,
const void * src ,
size_t n )
392 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
395 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
398 DeepCopyAsyncCuda (dst,src,n);
402 template<
class ExecutionSpace>
403 struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
406 DeepCopy(
void * dst ,
const void * src ,
size_t n )
407 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
410 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
413 DeepCopyAsyncCuda (dst,src,n);
417 template<
class ExecutionSpace>
418 struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
421 DeepCopy(
void * dst ,
const void * src ,
size_t n )
422 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
425 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
428 DeepCopyAsyncCuda (dst,src,n);
433 template<
class ExecutionSpace>
434 struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
437 DeepCopy(
void * dst ,
const void * src ,
size_t n )
438 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
441 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
444 DeepCopyAsyncCuda (dst,src,n);
448 template<
class ExecutionSpace>
449 struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
452 DeepCopy(
void * dst ,
const void * src ,
size_t n )
453 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
456 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
459 DeepCopyAsyncCuda (dst,src,n);
463 template<
class ExecutionSpace>
464 struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
467 DeepCopy(
void * dst ,
const void * src ,
size_t n )
468 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
471 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
474 DeepCopyAsyncCuda (dst,src,n);
478 template<
class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
481 DeepCopy(
void * dst ,
const void * src ,
size_t n )
482 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
485 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
488 DeepCopyAsyncCuda (dst,src,n);
493 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
496 DeepCopy(
void * dst ,
const void * src ,
size_t n )
497 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
500 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
503 DeepCopyAsyncCuda (dst,src,n);
507 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
510 DeepCopy(
void * dst ,
const void * src ,
size_t n )
511 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
514 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
517 DeepCopyAsyncCuda (dst,src,n);
521 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
524 DeepCopy(
void * dst ,
const void * src ,
size_t n )
525 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
528 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
531 DeepCopyAsyncCuda (dst,src,n);
535 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
538 DeepCopy(
void * dst ,
const void * src ,
size_t n )
539 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
542 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
545 DeepCopyAsyncCuda (dst,src,n);
550 template<
class ExecutionSpace>
struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
553 DeepCopy(
void * dst ,
const void * src ,
size_t n )
554 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
557 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
560 DeepCopyAsyncCuda (dst,src,n);
564 template<
class ExecutionSpace>
struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
567 DeepCopy(
void * dst ,
const void * src ,
size_t n )
568 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
571 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
574 DeepCopyAsyncCuda (dst,src,n);
591 enum { value =
false };
592 KOKKOS_INLINE_FUNCTION
static void verify(
void )
593 { Kokkos::abort(
"Cuda code attempted to access HostSpace memory"); }
595 KOKKOS_INLINE_FUNCTION
static void verify(
const void * )
596 { Kokkos::abort(
"Cuda code attempted to access HostSpace memory"); }
601 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
603 enum { value =
true };
604 KOKKOS_INLINE_FUNCTION
static void verify(
void ) { }
605 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) { }
610 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
612 enum { value =
true };
613 KOKKOS_INLINE_FUNCTION
static void verify(
void ) { }
614 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) { }
618 template<
class OtherSpace >
619 struct VerifyExecutionCanAccessMemorySpace<
620 typename enable_if< ! is_same<
Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
623 enum { value =
false };
624 KOKKOS_INLINE_FUNCTION
static void verify(
void )
625 { Kokkos::abort(
"Cuda code attempted to access unknown Space memory"); }
627 KOKKOS_INLINE_FUNCTION
static void verify(
const void * )
628 { Kokkos::abort(
"Cuda code attempted to access unknown Space memory"); }
634 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::HostSpace , Kokkos::CudaSpace >
636 enum { value =
false };
637 inline static void verify(
void ) { CudaSpace::access_error(); }
638 inline static void verify(
const void * p ) { CudaSpace::access_error(p); }
643 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::HostSpace , Kokkos::CudaUVMSpace >
645 enum { value =
true };
646 inline static void verify(
void ) { }
647 inline static void verify(
const void * ) { }
652 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
654 enum { value =
true };
655 KOKKOS_INLINE_FUNCTION
static void verify(
void ) {}
656 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) {}
669 class SharedAllocationRecord<
Kokkos::CudaSpace , void >
670 :
public SharedAllocationRecord< void , void >
674 friend class SharedAllocationRecord<
Kokkos::CudaUVMSpace , void > ;
676 typedef SharedAllocationRecord< void , void > RecordBase ;
678 SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
679 SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
681 static void deallocate( RecordBase * );
683 static ::cudaTextureObject_t
684 attach_texture_object( const unsigned sizeof_alias
685 , void * const alloc_ptr
686 , const size_t alloc_size );
688 static RecordBase s_root_record ;
690 ::cudaTextureObject_t m_tex_obj ;
691 const Kokkos::CudaSpace m_space ;
695 ~SharedAllocationRecord();
696 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
698 SharedAllocationRecord(
const Kokkos::CudaSpace & arg_space
699 ,
const std::string & arg_label
700 ,
const size_t arg_alloc_size
701 ,
const RecordBase::function_type arg_dealloc = & deallocate
706 std::string get_label()
const ;
708 static SharedAllocationRecord * allocate(
const Kokkos::CudaSpace & arg_space
709 ,
const std::string & arg_label
710 ,
const size_t arg_alloc_size );
714 void * allocate_tracked(
const Kokkos::CudaSpace & arg_space
715 ,
const std::string & arg_label
716 ,
const size_t arg_alloc_size );
720 void * reallocate_tracked(
void *
const arg_alloc_ptr
721 ,
const size_t arg_alloc_size );
725 void deallocate_tracked(
void *
const arg_alloc_ptr );
727 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
729 template<
typename AliasType >
731 ::cudaTextureObject_t attach_texture_object()
733 static_assert( ( std::is_same< AliasType , int >::value ||
734 std::is_same< AliasType , ::int2 >::value ||
735 std::is_same< AliasType , ::int4 >::value )
736 ,
"Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
738 if ( m_tex_obj == 0 ) {
739 m_tex_obj = attach_texture_object(
sizeof(AliasType)
740 , (
void*) RecordBase::m_alloc_ptr
741 , RecordBase::m_alloc_size );
747 template<
typename AliasType >
749 int attach_texture_object_offset(
const AliasType *
const ptr )
752 return ptr -
reinterpret_cast<AliasType*
>( RecordBase::m_alloc_ptr );
755 static void print_records( std::ostream & ,
const Kokkos::CudaSpace & ,
bool detail =
false );
760 class SharedAllocationRecord<
Kokkos::CudaUVMSpace , void >
761 :
public SharedAllocationRecord< void , void >
765 typedef SharedAllocationRecord< void , void > RecordBase ;
767 SharedAllocationRecord(
const SharedAllocationRecord & ) = delete ;
768 SharedAllocationRecord & operator = (
const SharedAllocationRecord & ) = delete ;
770 static void deallocate( RecordBase * );
772 static RecordBase s_root_record ;
774 ::cudaTextureObject_t m_tex_obj ;
775 const Kokkos::CudaUVMSpace m_space ;
779 ~SharedAllocationRecord();
780 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
782 SharedAllocationRecord(
const Kokkos::CudaUVMSpace & arg_space
783 ,
const std::string & arg_label
784 ,
const size_t arg_alloc_size
785 ,
const RecordBase::function_type arg_dealloc = & deallocate
790 std::string get_label()
const ;
792 static SharedAllocationRecord * allocate(
const Kokkos::CudaUVMSpace & arg_space
793 ,
const std::string & arg_label
794 ,
const size_t arg_alloc_size
799 void * allocate_tracked(
const Kokkos::CudaUVMSpace & arg_space
800 ,
const std::string & arg_label
801 ,
const size_t arg_alloc_size );
805 void * reallocate_tracked(
void *
const arg_alloc_ptr
806 ,
const size_t arg_alloc_size );
810 void deallocate_tracked(
void *
const arg_alloc_ptr );
812 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
815 template<
typename AliasType >
817 ::cudaTextureObject_t attach_texture_object()
819 static_assert( ( std::is_same< AliasType , int >::value ||
820 std::is_same< AliasType , ::int2 >::value ||
821 std::is_same< AliasType , ::int4 >::value )
822 ,
"Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
824 if ( m_tex_obj == 0 ) {
825 m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
826 attach_texture_object(
sizeof(AliasType)
827 , (
void*) RecordBase::m_alloc_ptr
828 , RecordBase::m_alloc_size );
834 template<
typename AliasType >
836 int attach_texture_object_offset(
const AliasType *
const ptr )
839 return ptr -
reinterpret_cast<AliasType*
>( RecordBase::m_alloc_ptr );
842 static void print_records( std::ostream & ,
const Kokkos::CudaUVMSpace & ,
bool detail =
false );
846 class SharedAllocationRecord<
Kokkos::CudaHostPinnedSpace , void >
847 :
public SharedAllocationRecord< void , void >
851 typedef SharedAllocationRecord< void , void > RecordBase ;
853 SharedAllocationRecord(
const SharedAllocationRecord & ) = delete ;
854 SharedAllocationRecord & operator = (
const SharedAllocationRecord & ) = delete ;
856 static void deallocate( RecordBase * );
858 static RecordBase s_root_record ;
860 const Kokkos::CudaHostPinnedSpace m_space ;
864 ~SharedAllocationRecord();
865 SharedAllocationRecord() : RecordBase(), m_space() {}
867 SharedAllocationRecord(
const Kokkos::CudaHostPinnedSpace & arg_space
868 ,
const std::string & arg_label
869 ,
const size_t arg_alloc_size
870 ,
const RecordBase::function_type arg_dealloc = & deallocate
875 std::string get_label()
const ;
877 static SharedAllocationRecord * allocate(
const Kokkos::CudaHostPinnedSpace & arg_space
878 ,
const std::string & arg_label
879 ,
const size_t arg_alloc_size
883 void * allocate_tracked(
const Kokkos::CudaHostPinnedSpace & arg_space
884 ,
const std::string & arg_label
885 ,
const size_t arg_alloc_size );
889 void * reallocate_tracked(
void *
const arg_alloc_ptr
890 ,
const size_t arg_alloc_size );
894 void deallocate_tracked(
void *
const arg_alloc_ptr );
897 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
899 static void print_records( std::ostream & ,
const Kokkos::CudaHostPinnedSpace & ,
bool detail =
false );
Memory space for main process and CPU execution spaces.
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.