ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_BACKEND_CUDA_HPP_ 00002 #define VIENNACL_BACKEND_CUDA_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00026 #include <iostream> 00027 #include <vector> 00028 #include <cassert> 00029 #include "viennacl/tools/shared_ptr.hpp" 00030 00031 // includes CUDA 00032 #include <cuda_runtime.h> 00033 00034 #define VIENNACL_CUDA_ERROR_CHECK(err) detail::cuda_error_check (err, __FILE__, __LINE__) 00035 00036 namespace viennacl 00037 { 00038 namespace backend 00039 { 00040 namespace cuda 00041 { 00042 typedef viennacl::tools::shared_ptr<char> handle_type; 00043 // Requirements for backend: 00044 00045 // * memory_create(size, host_ptr) 00046 // * memory_copy(src, dest, offset_src, offset_dest, size) 00047 // * memory_write_from_main_memory(src, offset, size, 00048 // dest, offset, size) 00049 // * memory_read_to_main_memory(src, offset, size 00050 // dest, offset, size) 00051 // * 00052 // 00053 00054 namespace detail 00055 { 00056 00057 00058 inline void cuda_error_check(cudaError error_code, const char *file, const int line ) 00059 { 00060 if(cudaSuccess != error_code) 00061 { 00062 std::cerr << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl; 00063 throw "CUDA error"; 00064 } 00065 } 00066 00067 00069 template <typename U> 00070 struct cuda_deleter 00071 { 00072 void operator()(U * p) const 00073 { 00074 //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl; 00075 cudaFree(p); 00076 } 00077 }; 00078 00079 } 00080 00087 inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL) 00088 { 00089 void * dev_ptr = NULL; 00090 VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) ); 00091 //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " << size_in_bytes << std::endl; 00092 00093 if (!host_ptr) 00094 return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>()); 00095 00096 handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>()); 00097 00098 // copy data: 00099 //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl; 00100 cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice); 00101 00102 return new_handle; 00103 } 00104 00105 00114 inline void memory_copy(handle_type const & src_buffer, 00115 handle_type & dst_buffer, 00116 vcl_size_t src_offset, 00117 vcl_size_t dst_offset, 00118 vcl_size_t bytes_to_copy) 00119 { 00120 assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); 00121 assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); 00122 00123 cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset), 00124 reinterpret_cast<void *>(src_buffer.get() + src_offset), 00125 bytes_to_copy, 00126 cudaMemcpyDeviceToDevice); 00127 } 00128 00129 00138 inline void memory_write(handle_type & dst_buffer, 00139 vcl_size_t dst_offset, 00140 vcl_size_t bytes_to_copy, 00141 const void * ptr, 00142 bool async = false) 00143 { 00144 assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!")); 00145 00146 if (async) 00147 cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset, 00148 reinterpret_cast<const char *>(ptr), 00149 bytes_to_copy, 00150 cudaMemcpyHostToDevice); 00151 else 00152 cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset, 00153 reinterpret_cast<const char *>(ptr), 00154 bytes_to_copy, 00155 cudaMemcpyHostToDevice); 00156 } 00157 00158 00167 inline void memory_read(handle_type const & src_buffer, 00168 vcl_size_t src_offset, 00169 vcl_size_t bytes_to_copy, 00170 void * ptr, 00171 bool async = false) 00172 { 00173 assert( (src_buffer.get() != NULL) && bool("Memory not initialized!")); 00174 00175 if (async) 00176 cudaMemcpyAsync(reinterpret_cast<char *>(ptr), 00177 reinterpret_cast<char *>(src_buffer.get()) + src_offset, 00178 bytes_to_copy, 00179 cudaMemcpyDeviceToHost); 00180 else 00181 cudaMemcpy(reinterpret_cast<char *>(ptr), 00182 reinterpret_cast<char *>(src_buffer.get()) + src_offset, 00183 bytes_to_copy, 00184 cudaMemcpyDeviceToHost); 00185 } 00186 00187 } //cuda 00188 } //backend 00189 } //viennacl 00190 #endif