ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cuda.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_BACKEND_CUDA_HPP_
2 #define VIENNACL_BACKEND_CUDA_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
26 #include <iostream>
27 #include <vector>
28 #include <cassert>
30 
31 // includes CUDA
32 #include <cuda_runtime.h>
33 
34 #define VIENNACL_CUDA_ERROR_CHECK(err) detail::cuda_error_check (err, __FILE__, __LINE__)
35 
36 namespace viennacl
37 {
38 namespace backend
39 {
40 namespace cuda
41 {
42 
44 // Requirements for backend:
45 
46 // * memory_create(size, host_ptr)
47 // * memory_copy(src, dest, offset_src, offset_dest, size)
48 // * memory_write_from_main_memory(src, offset, size,
49 // dest, offset, size)
50 // * memory_read_to_main_memory(src, offset, size
51 // dest, offset, size)
52 // *
53 //
54 
55 namespace detail
56 {
57 
58 
59  inline void cuda_error_check(cudaError error_code, const char *file, const int line )
60  {
61  if (cudaSuccess != error_code)
62  {
63  std::cerr << file << "(" << line << "): " << ": CUDA Runtime API error " << error_code << ": " << cudaGetErrorString( error_code ) << std::endl;
64  throw "CUDA error";
65  }
66  }
67 
68 
70  template<typename U>
71  struct cuda_deleter
72  {
73  void operator()(U * p) const
74  {
75  //std::cout << "Freeing handle " << reinterpret_cast<void *>(p) << std::endl;
76  cudaFree(p);
77  }
78  };
79 
80 }
81 
88 inline handle_type memory_create(vcl_size_t size_in_bytes, const void * host_ptr = NULL)
89 {
90  void * dev_ptr = NULL;
91  VIENNACL_CUDA_ERROR_CHECK( cudaMalloc(&dev_ptr, size_in_bytes) );
92  //std::cout << "Allocated new dev_ptr " << dev_ptr << " of size " << size_in_bytes << std::endl;
93 
94  if (!host_ptr)
95  return handle_type(reinterpret_cast<char *>(dev_ptr), detail::cuda_deleter<char>());
96 
97  handle_type new_handle(reinterpret_cast<char*>(dev_ptr), detail::cuda_deleter<char>());
98 
99  // copy data:
100  //std::cout << "Filling new handle from host_ptr " << host_ptr << std::endl;
101  cudaMemcpy(new_handle.get(), host_ptr, size_in_bytes, cudaMemcpyHostToDevice);
102 
103  return new_handle;
104 }
105 
106 
115 inline void memory_copy(handle_type const & src_buffer,
116  handle_type & dst_buffer,
117  vcl_size_t src_offset,
118  vcl_size_t dst_offset,
119  vcl_size_t bytes_to_copy)
120 {
121  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
122  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
123 
124  cudaMemcpy(reinterpret_cast<void *>(dst_buffer.get() + dst_offset),
125  reinterpret_cast<void *>(src_buffer.get() + src_offset),
126  bytes_to_copy,
127  cudaMemcpyDeviceToDevice);
128 }
129 
130 
139 inline void memory_write(handle_type & dst_buffer,
140  vcl_size_t dst_offset,
141  vcl_size_t bytes_to_copy,
142  const void * ptr,
143  bool async = false)
144 {
145  assert( (dst_buffer.get() != NULL) && bool("Memory not initialized!"));
146 
147  if (async)
148  cudaMemcpyAsync(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
149  reinterpret_cast<const char *>(ptr),
150  bytes_to_copy,
151  cudaMemcpyHostToDevice);
152  else
153  cudaMemcpy(reinterpret_cast<char *>(dst_buffer.get()) + dst_offset,
154  reinterpret_cast<const char *>(ptr),
155  bytes_to_copy,
156  cudaMemcpyHostToDevice);
157 }
158 
159 
168 inline void memory_read(handle_type const & src_buffer,
169  vcl_size_t src_offset,
170  vcl_size_t bytes_to_copy,
171  void * ptr,
172  bool async = false)
173 {
174  assert( (src_buffer.get() != NULL) && bool("Memory not initialized!"));
175 
176  if (async)
177  cudaMemcpyAsync(reinterpret_cast<char *>(ptr),
178  reinterpret_cast<char *>(src_buffer.get()) + src_offset,
179  bytes_to_copy,
180  cudaMemcpyDeviceToHost);
181  else
182  cudaMemcpy(reinterpret_cast<char *>(ptr),
183  reinterpret_cast<char *>(src_buffer.get()) + src_offset,
184  bytes_to_copy,
185  cudaMemcpyDeviceToHost);
186 }
187 
188 } //cuda
189 } //backend
190 } //viennacl
191 #endif
void cuda_error_check(cudaError error_code, const char *file, const int line)
Definition: cuda.hpp:59
void memory_write(handle_type &dst_buffer, vcl_size_t dst_offset, vcl_size_t bytes_to_copy, const void *ptr, bool async=false)
Writes data from main RAM identified by 'ptr' to the CUDA buffer identified by 'dst_buffer'.
Definition: cuda.hpp:139
void memory_copy(handle_type const &src_buffer, handle_type &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' on the CUDA device to memory star...
Definition: cuda.hpp:115
Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
viennacl::tools::shared_ptr< char > handle_type
Definition: cuda.hpp:43
std::size_t vcl_size_t
Definition: forwards.h:74
#define VIENNACL_CUDA_ERROR_CHECK(err)
Definition: cuda.hpp:34
Functor for deleting a CUDA handle. Used within the smart pointer class.
Definition: cuda.hpp:71
handle_type memory_create(vcl_size_t size_in_bytes, const void *host_ptr=NULL)
Creates an array of the specified size on the CUDA device. If the second argument is provided...
Definition: cuda.hpp:88
void memory_read(handle_type const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_copy, void *ptr, bool async=false)
Reads data from a CUDA buffer back to main RAM.
Definition: cuda.hpp:168