29 #ifndef VIENNACL_WITH_OPENCL
30 #define VIENNACL_WITH_OPENCL
60 static const char * my_compute_program =
61 "__kernel void elementwise_prod(\n"
62 " __global const float * vec1,\n"
63 " __global const float * vec2, \n"
64 " __global float * result,\n"
65 " unsigned int size) \n"
67 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
68 " result[i] = vec1[i] * vec2[i];\n"
88 std::vector<cl_device_id> device_id_array;
92 std::cout <<
"Platform info: " << pf.
info() << std::endl;
93 std::vector<viennacl::ocl::device> devices = pf.
devices(CL_DEVICE_TYPE_DEFAULT);
94 std::cout << devices[0].name() << std::endl;
95 std::cout <<
"Number of devices for custom context: " << devices.size() << std::endl;
98 for (std::size_t i=0; i<devices.size(); ++i)
100 device_id_array.push_back(devices[i].
id());
103 std::cout <<
"Creating context..." << std::endl;
105 cl_context my_context = clCreateContext(0, cl_uint(device_id_array.size()), &(device_id_array[0]), NULL, NULL, &err);
110 unsigned int vector_size = 10;
111 std::vector<ScalarType> vec1(vector_size);
112 std::vector<ScalarType> vec2(vector_size);
113 std::vector<ScalarType> result(vector_size);
118 for (
unsigned int i=0; i<vector_size; ++i)
120 vec1[i] =
static_cast<ScalarType
>(i);
121 vec2[i] =
static_cast<ScalarType
>(vector_size-i);
127 cl_mem mem_vec1 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size *
sizeof(ScalarType), &(vec1[0]), &err);
129 cl_mem mem_vec2 = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size *
sizeof(ScalarType), &(vec2[0]), &err);
131 cl_mem mem_result = clCreateBuffer(my_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, vector_size *
sizeof(ScalarType), &(result[0]), &err);
138 std::vector<cl_command_queue> queues(devices.size());
139 for (std::size_t i=0; i<devices.size(); ++i)
141 queues[i] = clCreateCommandQueue(my_context, devices[i].
id(), 0, &err);
148 std::size_t source_len = std::string(my_compute_program).length();
149 cl_program my_prog = clCreateProgramWithSource(my_context, 1, &my_compute_program, &source_len, &err);
150 err = clBuildProgram(my_prog, 0, NULL, NULL, NULL, NULL);
164 const char * kernel_name =
"elementwise_prod";
165 cl_kernel my_kernel = clCreateKernel(my_prog, kernel_name, &err);
172 err = clSetKernelArg(my_kernel, 0,
sizeof(cl_mem), (
void*)&mem_vec1);
174 err = clSetKernelArg(my_kernel, 1,
sizeof(cl_mem), (
void*)&mem_vec2);
176 err = clSetKernelArg(my_kernel, 2,
sizeof(cl_mem), (
void*)&mem_result);
178 err = clSetKernelArg(my_kernel, 3,
sizeof(
unsigned int), (
void*)&vector_size);
180 std::size_t global_size = vector_size;
181 std::size_t local_size = vector_size;
182 err = clEnqueueNDRangeKernel(queues[0], my_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
189 err = clEnqueueReadBuffer(queues[0], mem_vec1, CL_TRUE, 0,
sizeof(ScalarType)*vector_size, &(vec1[0]), 0, NULL, NULL);
191 err = clEnqueueReadBuffer(queues[0], mem_result, CL_TRUE, 0,
sizeof(ScalarType)*vector_size, &(result[0]), 0, NULL, NULL);
194 std::cout <<
"vec1 : ";
195 for (std::size_t i=0; i<vec1.size(); ++i)
196 std::cout << vec1[i] <<
" ";
197 std::cout << std::endl;
199 std::cout <<
"vec2 : ";
200 for (std::size_t i=0; i<vec2.size(); ++i)
201 std::cout << vec2[i] <<
" ";
202 std::cout << std::endl;
204 std::cout <<
"result: ";
205 for (std::size_t i=0; i<result.size(); ++i)
206 std::cout << result[i] <<
" ";
207 std::cout << std::endl;
221 std::cout <<
"Existing context: " << my_context << std::endl;
232 std::cout <<
"Standard vector operations within ViennaCL:" << std::endl;
233 vcl_result = vcl_s * vcl_vec1 + vcl_vec2;
235 std::cout <<
"vec1 : ";
236 std::cout << vcl_vec1 << std::endl;
238 std::cout <<
"vec2 : ";
239 std::cout << vcl_vec2 << std::endl;
241 std::cout <<
"result: ";
242 std::cout << vcl_result << std::endl;
249 std::cout <<
"Using existing kernel within the OpenCL backend of ViennaCL:" << std::endl;
252 viennacl::ocl::enqueue(my_vcl_kernel(vcl_vec1, vcl_vec2, vcl_result, static_cast<cl_uint>(vcl_vec1.size())));
254 std::cout <<
"vec1 : ";
255 std::cout << vcl_vec1 << std::endl;
257 std::cout <<
"vec2 : ";
258 std::cout << vcl_vec2 << std::endl;
260 std::cout <<
"result: ";
261 std::cout << vcl_result << std::endl;
272 vcl_result.resize(3);
275 std::cout <<
"result of matrix-vector product: ";
276 std::cout << vcl_result << std::endl;
282 std::cout <<
"!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;
viennacl::ocl::kernel & add_kernel(cl_kernel kernel_handle, std::string const &kernel_name)
Adds a kernel to the program.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
Implementations of dense matrix related operations including matrix-vector products.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Implementation of the dense matrix class.
viennacl::ocl::context & current_context()
Convenience function for returning the current context.
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
#define VIENNACL_ERR_CHECK(err)
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
const OCL_TYPE & get() const
Wrapper class for an OpenCL program.
Implementations of the OpenCL backend, where all contexts are stored in.
void switch_context(long i)
Convenience function for switching the current context.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void setup_context(long i, std::vector< cl_device_id > const &devices)
Convenience function for setting devices for a context.