9 #ifndef stk_algsup_CudaMemoryMgr_hpp 10 #define stk_algsup_CudaMemoryMgr_hpp 16 #include <stk_algsup/CudaCall.hpp> 35 : host_to_device_map(),
56 T* get_buffer(
const T* host_ptr,
size_t buf_size);
63 T* get_buffer(
size_t buf_size);
70 void destroy_buffer(T*& device_ptr);
77 void copy_to_buffer(
const T* host_ptr,
size_t buf_size, T* device_ptr);
84 void copy_from_buffer(T* host_ptr,
size_t buf_size,
const T* device_ptr);
91 std::map<const void*,void*> host_to_device_map;
92 std::map<const void*,const void*> device_to_host_map;
100 T* CudaMemoryMgr::get_buffer(
const T* host_ptr,
size_t buf_size)
102 T* device_ptr = NULL;
104 std::map<const void*,void*>::iterator iter = host_to_device_map.find(host_ptr);
106 if (iter == host_to_device_map.end()) {
107 void* void_device_ptr = NULL;
108 CUDA_CALL( cudaMalloc( &void_device_ptr,
sizeof(T)*buf_size) );
109 device_ptr =
reinterpret_cast<T*
>(void_device_ptr);
111 host_to_device_map.insert( std::make_pair(host_ptr, device_ptr) );
112 device_to_host_map.insert( std::make_pair(device_ptr, host_ptr) );
115 device_ptr =
reinterpret_cast<T*
>(iter->second);
124 T* CudaMemoryMgr::get_buffer(
size_t buf_size)
126 T* device_ptr = NULL;
128 CUDA_CALL( cudaMalloc( (
void**)&device_ptr,
sizeof(T)*buf_size) );
130 device_to_host_map.insert( std::make_pair(device_ptr, NULL) );
138 void CudaMemoryMgr::destroy_buffer(T*& device_ptr)
140 std::map<const void*,const void*>::iterator iter = device_to_host_map.find(device_ptr);
141 if (iter != device_to_host_map.end()) {
142 const void* host_ptr = iter->second;
143 if (host_ptr != NULL) {
144 std::map<const void*,void*>::iterator iter2 = host_to_device_map.find(host_ptr);
145 if (iter2 != host_to_device_map.end()) {
146 host_to_device_map.erase(iter2);
149 CUDA_CALL( cudaFree(device_ptr) );
151 device_to_host_map.erase(iter);
158 void CudaMemoryMgr::copy_to_buffer(
const T* host_ptr,
size_t buf_size, T* device_ptr)
160 std::map<const void*,const void*>::iterator iter = device_to_host_map.find(device_ptr);
161 if (iter == device_to_host_map.end()) {
163 throw std::runtime_error(
"CudaMemoryMgr::copy_to_buffer ERROR, device_ptr not known.");
166 CUDA_CALL( cudaMemcpy( device_ptr, host_ptr,
sizeof(T)*buf_size, cudaMemcpyHostToDevice) );
172 void CudaMemoryMgr::copy_from_buffer(T* host_ptr,
size_t buf_size,
const T* device_ptr)
174 std::map<const void*,const void*>::iterator iter = device_to_host_map.find(device_ptr);
175 if (iter == device_to_host_map.end()) {
177 throw std::runtime_error(
"CudaMemoryMgr::copy_from_buffer ERROR, device_ptr not known.");
180 CUDA_CALL( cudaMemcpy( host_ptr, device_ptr,
sizeof(T)*buf_size, cudaMemcpyDeviceToHost) );