ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_ 00002 #define VIENNACL_LINALG_OPENCL_VECTOR_OPERATIONS_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <cmath> 00026 00027 #include "viennacl/forwards.h" 00028 #include "viennacl/ocl/device.hpp" 00029 #include "viennacl/ocl/handle.hpp" 00030 #include "viennacl/ocl/kernel.hpp" 00031 #include "viennacl/scalar.hpp" 00032 #include "viennacl/tools/tools.hpp" 00033 #include "viennacl/linalg/opencl/common.hpp" 00034 #include "viennacl/linalg/opencl/kernels/vector.hpp" 00035 #include "viennacl/linalg/opencl/kernels/vector_element.hpp" 00036 #include "viennacl/meta/predicate.hpp" 00037 #include "viennacl/meta/enable_if.hpp" 00038 #include "viennacl/traits/size.hpp" 00039 #include "viennacl/traits/start.hpp" 00040 #include "viennacl/traits/handle.hpp" 00041 #include "viennacl/traits/stride.hpp" 00042 00043 namespace viennacl 00044 { 00045 namespace linalg 00046 { 00047 namespace opencl 00048 { 00049 // 00050 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here! 00051 // 00052 00053 00054 template <typename T, typename ScalarType1> 00055 void av(vector_base<T> & vec1, 00056 vector_base<T> const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) 00057 { 00058 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00059 00060 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00061 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00062 00063 cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha); 00064 00065 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), 00066 (viennacl::is_cpu_scalar<ScalarType1>::value ? "av_cpu" : "av_gpu")); 00067 k.global_work_size(0, std::min<vcl_size_t>(128 * k.local_work_size(), 00068 viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size(vec1), k.local_work_size()) ) ); 00069 00070 viennacl::ocl::packed_cl_uint size_vec1; 00071 size_vec1.start = cl_uint(viennacl::traits::start(vec1)); 00072 size_vec1.stride = cl_uint(viennacl::traits::stride(vec1)); 00073 size_vec1.size = cl_uint(viennacl::traits::size(vec1)); 00074 size_vec1.internal_size = cl_uint(viennacl::traits::internal_size(vec1)); 00075 00076 viennacl::ocl::packed_cl_uint size_vec2; 00077 size_vec2.start = cl_uint(viennacl::traits::start(vec2)); 00078 size_vec2.stride = cl_uint(viennacl::traits::stride(vec2)); 00079 size_vec2.size = cl_uint(viennacl::traits::size(vec2)); 00080 size_vec2.internal_size = cl_uint(viennacl::traits::internal_size(vec2)); 00081 00082 00083 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec1), 00084 size_vec1, 00085 00086 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)), 00087 options_alpha, 00088 viennacl::traits::opencl_handle(vec2), 00089 size_vec2 ) 00090 ); 00091 } 00092 00093 00094 template <typename T, typename ScalarType1, typename ScalarType2> 00095 void avbv(vector_base<T> & vec1, 00096 vector_base<T> const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, 00097 vector_base<T> const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) 00098 { 00099 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00100 assert(viennacl::traits::opencl_handle(vec2).context() == viennacl::traits::opencl_handle(vec3).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00101 00102 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00103 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00104 00105 std::string kernel_name; 00106 if (viennacl::is_cpu_scalar<ScalarType1>::value && viennacl::is_cpu_scalar<ScalarType2>::value) 00107 kernel_name = "avbv_cpu_cpu"; 00108 else if (viennacl::is_cpu_scalar<ScalarType1>::value && !viennacl::is_cpu_scalar<ScalarType2>::value) 00109 kernel_name = "avbv_cpu_gpu"; 00110 else if (!viennacl::is_cpu_scalar<ScalarType1>::value && viennacl::is_cpu_scalar<ScalarType2>::value) 00111 kernel_name = "avbv_gpu_cpu"; 00112 else 00113 kernel_name = "avbv_gpu_gpu"; 00114 00115 cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha); 00116 cl_uint options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta); 00117 00118 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), kernel_name); 00119 k.global_work_size(0, std::min<vcl_size_t>(128 * k.local_work_size(), 00120 viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size(vec1), k.local_work_size()) ) ); 00121 00122 viennacl::ocl::packed_cl_uint size_vec1; 00123 size_vec1.start = cl_uint(viennacl::traits::start(vec1)); 00124 size_vec1.stride = cl_uint(viennacl::traits::stride(vec1)); 00125 size_vec1.size = cl_uint(viennacl::traits::size(vec1)); 00126 size_vec1.internal_size = cl_uint(viennacl::traits::internal_size(vec1)); 00127 00128 viennacl::ocl::packed_cl_uint size_vec2; 00129 size_vec2.start = cl_uint(viennacl::traits::start(vec2)); 00130 size_vec2.stride = cl_uint(viennacl::traits::stride(vec2)); 00131 size_vec2.size = cl_uint(viennacl::traits::size(vec2)); 00132 size_vec2.internal_size = cl_uint(viennacl::traits::internal_size(vec2)); 00133 00134 viennacl::ocl::packed_cl_uint size_vec3; 00135 size_vec3.start = cl_uint(viennacl::traits::start(vec3)); 00136 size_vec3.stride = cl_uint(viennacl::traits::stride(vec3)); 00137 size_vec3.size = cl_uint(viennacl::traits::size(vec3)); 00138 size_vec3.internal_size = cl_uint(viennacl::traits::internal_size(vec3)); 00139 00140 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec1), 00141 size_vec1, 00142 00143 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)), 00144 options_alpha, 00145 viennacl::traits::opencl_handle(vec2), 00146 size_vec2, 00147 00148 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(beta)), 00149 options_beta, 00150 viennacl::traits::opencl_handle(vec3), 00151 size_vec3 ) 00152 ); 00153 } 00154 00155 00156 template <typename T, typename ScalarType1, typename ScalarType2> 00157 void avbv_v(vector_base<T> & vec1, 00158 vector_base<T> const & vec2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, 00159 vector_base<T> const & vec3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) 00160 { 00161 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00162 assert(viennacl::traits::opencl_handle(vec2).context() == viennacl::traits::opencl_handle(vec3).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00163 00164 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00165 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00166 00167 std::string kernel_name; 00168 if (viennacl::is_cpu_scalar<ScalarType1>::value && viennacl::is_cpu_scalar<ScalarType2>::value) 00169 kernel_name = "avbv_v_cpu_cpu"; 00170 else if (viennacl::is_cpu_scalar<ScalarType1>::value && !viennacl::is_cpu_scalar<ScalarType2>::value) 00171 kernel_name = "avbv_v_cpu_gpu"; 00172 else if (!viennacl::is_cpu_scalar<ScalarType1>::value && viennacl::is_cpu_scalar<ScalarType2>::value) 00173 kernel_name = "avbv_v_gpu_cpu"; 00174 else 00175 kernel_name = "avbv_v_gpu_gpu"; 00176 00177 cl_uint options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha); 00178 cl_uint options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta); 00179 00180 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), kernel_name); 00181 k.global_work_size(0, std::min<vcl_size_t>(128 * k.local_work_size(), 00182 viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size(vec1), k.local_work_size()) ) ); 00183 00184 viennacl::ocl::packed_cl_uint size_vec1; 00185 size_vec1.start = cl_uint(viennacl::traits::start(vec1)); 00186 size_vec1.stride = cl_uint(viennacl::traits::stride(vec1)); 00187 size_vec1.size = cl_uint(viennacl::traits::size(vec1)); 00188 size_vec1.internal_size = cl_uint(viennacl::traits::internal_size(vec1)); 00189 00190 viennacl::ocl::packed_cl_uint size_vec2; 00191 size_vec2.start = cl_uint(viennacl::traits::start(vec2)); 00192 size_vec2.stride = cl_uint(viennacl::traits::stride(vec2)); 00193 size_vec2.size = cl_uint(viennacl::traits::size(vec2)); 00194 size_vec2.internal_size = cl_uint(viennacl::traits::internal_size(vec2)); 00195 00196 viennacl::ocl::packed_cl_uint size_vec3; 00197 size_vec3.start = cl_uint(viennacl::traits::start(vec3)); 00198 size_vec3.stride = cl_uint(viennacl::traits::stride(vec3)); 00199 size_vec3.size = cl_uint(viennacl::traits::size(vec3)); 00200 size_vec3.internal_size = cl_uint(viennacl::traits::internal_size(vec3)); 00201 00202 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec1), 00203 size_vec1, 00204 00205 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(alpha)), 00206 options_alpha, 00207 viennacl::traits::opencl_handle(vec2), 00208 size_vec2, 00209 00210 viennacl::traits::opencl_handle(viennacl::tools::promote_if_host_scalar<T>(beta)), 00211 options_beta, 00212 viennacl::traits::opencl_handle(vec3), 00213 size_vec3 ) 00214 ); 00215 } 00216 00217 00224 template <typename T> 00225 void vector_assign(vector_base<T> & vec1, const T & alpha, bool up_to_internal_size = false) 00226 { 00227 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00228 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00229 00230 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "assign_cpu"); 00231 k.global_work_size(0, std::min<vcl_size_t>(128 * k.local_work_size(), 00232 viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size(vec1), k.local_work_size()) ) ); 00233 00234 cl_uint size = up_to_internal_size ? cl_uint(vec1.internal_size()) : cl_uint(viennacl::traits::size(vec1)); 00235 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec1), 00236 cl_uint(viennacl::traits::start(vec1)), 00237 cl_uint(viennacl::traits::stride(vec1)), 00238 size, 00239 cl_uint(vec1.internal_size()), //Note: Do NOT use traits::internal_size() here, because vector proxies don't require padding. 00240 viennacl::traits::opencl_handle(T(alpha)) ) 00241 ); 00242 } 00243 00244 00250 template <typename T> 00251 void vector_swap(vector_base<T> & vec1, vector_base<T> & vec2) 00252 { 00253 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00254 00255 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00256 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00257 00258 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "swap"); 00259 00260 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec1), 00261 cl_uint(viennacl::traits::start(vec1)), 00262 cl_uint(viennacl::traits::stride(vec1)), 00263 cl_uint(viennacl::traits::size(vec1)), 00264 viennacl::traits::opencl_handle(vec2), 00265 cl_uint(viennacl::traits::start(vec2)), 00266 cl_uint(viennacl::traits::stride(vec2)), 00267 cl_uint(viennacl::traits::size(vec2))) 00268 ); 00269 } 00270 00272 00278 template <typename T, typename OP> 00279 void element_op(vector_base<T> & vec1, 00280 vector_expression<const vector_base<T>, const vector_base<T>, op_element_binary<OP> > const & proxy) 00281 { 00282 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00283 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00284 00285 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00286 viennacl::linalg::opencl::kernels::vector_element<T>::init(ctx); 00287 00288 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_element<T>::program_name(), "element_op"); 00289 00290 cl_uint op_type = 2; //0: product, 1: division, 2: power 00291 if (viennacl::is_division<OP>::value) 00292 op_type = 1; 00293 else if (viennacl::is_product<OP>::value) 00294 op_type = 0; 00295 00296 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec1), 00297 cl_uint(viennacl::traits::start(vec1)), 00298 cl_uint(viennacl::traits::stride(vec1)), 00299 cl_uint(viennacl::traits::size(vec1)), 00300 00301 viennacl::traits::opencl_handle(proxy.lhs()), 00302 cl_uint(viennacl::traits::start(proxy.lhs())), 00303 cl_uint(viennacl::traits::stride(proxy.lhs())), 00304 00305 viennacl::traits::opencl_handle(proxy.rhs()), 00306 cl_uint(viennacl::traits::start(proxy.rhs())), 00307 cl_uint(viennacl::traits::stride(proxy.rhs())), 00308 00309 op_type) 00310 ); 00311 } 00312 00314 00320 template <typename T, typename OP> 00321 void element_op(vector_base<T> & vec1, 00322 vector_expression<const vector_base<T>, const vector_base<T>, op_element_unary<OP> > const & proxy) 00323 { 00324 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.lhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00325 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(proxy.rhs()).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00326 00327 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00328 viennacl::linalg::opencl::kernels::vector_element<T>::init(ctx); 00329 00330 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_element<T>::program_name(), detail::op_to_string(OP()) + "_assign"); 00331 00332 viennacl::ocl::packed_cl_uint size_vec1; 00333 size_vec1.start = cl_uint(viennacl::traits::start(vec1)); 00334 size_vec1.stride = cl_uint(viennacl::traits::stride(vec1)); 00335 size_vec1.size = cl_uint(viennacl::traits::size(vec1)); 00336 size_vec1.internal_size = cl_uint(viennacl::traits::internal_size(vec1)); 00337 00338 viennacl::ocl::packed_cl_uint size_vec2; 00339 size_vec2.start = cl_uint(viennacl::traits::start(proxy.lhs())); 00340 size_vec2.stride = cl_uint(viennacl::traits::stride(proxy.lhs())); 00341 size_vec2.size = cl_uint(viennacl::traits::size(proxy.lhs())); 00342 size_vec2.internal_size = cl_uint(viennacl::traits::internal_size(proxy.lhs())); 00343 00344 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec1), 00345 size_vec1, 00346 viennacl::traits::opencl_handle(proxy.lhs()), 00347 size_vec2) 00348 ); 00349 } 00350 00352 00359 template <typename T> 00360 void inner_prod_impl(vector_base<T> const & vec1, 00361 vector_base<T> const & vec2, 00362 vector_base<T> & partial_result) 00363 { 00364 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00365 assert(viennacl::traits::opencl_handle(vec2).context() == viennacl::traits::opencl_handle(partial_result).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00366 00367 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00368 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00369 00370 assert( (viennacl::traits::size(vec1) == viennacl::traits::size(vec2)) 00371 && bool("Incompatible vector sizes in inner_prod_impl()!")); 00372 00373 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "inner_prod1"); 00374 00375 assert( (k.global_work_size() / k.local_work_size() <= partial_result.size()) && bool("Size mismatch for partial reduction in inner_prod_impl()") ); 00376 00377 viennacl::ocl::packed_cl_uint size_vec1; 00378 size_vec1.start = cl_uint(viennacl::traits::start(vec1)); 00379 size_vec1.stride = cl_uint(viennacl::traits::stride(vec1)); 00380 size_vec1.size = cl_uint(viennacl::traits::size(vec1)); 00381 size_vec1.internal_size = cl_uint(viennacl::traits::internal_size(vec1)); 00382 00383 viennacl::ocl::packed_cl_uint size_vec2; 00384 size_vec2.start = cl_uint(viennacl::traits::start(vec2)); 00385 size_vec2.stride = cl_uint(viennacl::traits::stride(vec2)); 00386 size_vec2.size = cl_uint(viennacl::traits::size(vec2)); 00387 size_vec2.internal_size = cl_uint(viennacl::traits::internal_size(vec2)); 00388 00389 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec1), 00390 size_vec1, 00391 viennacl::traits::opencl_handle(vec2), 00392 size_vec2, 00393 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * k.local_work_size()), 00394 viennacl::traits::opencl_handle(partial_result) 00395 ) 00396 ); 00397 } 00398 00399 00400 //implementation of inner product: 00401 //namespace { 00408 template <typename T> 00409 void inner_prod_impl(vector_base<T> const & vec1, 00410 vector_base<T> const & vec2, 00411 scalar<T> & result) 00412 { 00413 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00414 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00415 00416 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00417 00418 vcl_size_t work_groups = 128; 00419 viennacl::vector<T> temp(work_groups, viennacl::traits::context(vec1)); 00420 temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: 00421 00422 // Step 1: Compute partial inner products for each work group: 00423 inner_prod_impl(vec1, vec2, temp); 00424 00425 // Step 2: Sum partial results: 00426 viennacl::ocl::kernel & ksum = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "sum"); 00427 00428 ksum.local_work_size(0, work_groups); 00429 ksum.global_work_size(0, work_groups); 00430 viennacl::ocl::enqueue(ksum(viennacl::traits::opencl_handle(temp), 00431 cl_uint(viennacl::traits::start(temp)), 00432 cl_uint(viennacl::traits::stride(temp)), 00433 cl_uint(viennacl::traits::size(temp)), 00434 cl_uint(1), 00435 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * ksum.local_work_size()), 00436 viennacl::traits::opencl_handle(result) ) 00437 ); 00438 } 00439 00440 namespace detail 00441 { 00442 template <typename ScalarT> 00443 viennacl::ocl::packed_cl_uint make_layout(vector_base<ScalarT> const & vec) 00444 { 00445 viennacl::ocl::packed_cl_uint ret; 00446 ret.start = cl_uint(viennacl::traits::start(vec)); 00447 ret.stride = cl_uint(viennacl::traits::stride(vec)); 00448 ret.size = cl_uint(viennacl::traits::size(vec)); 00449 ret.internal_size = cl_uint(viennacl::traits::internal_size(vec)); 00450 return ret; 00451 } 00452 } 00453 00460 template <typename T> 00461 void inner_prod_impl(vector_base<T> const & x, 00462 vector_tuple<T> const & vec_tuple, 00463 vector_base<T> & result) 00464 { 00465 assert(viennacl::traits::opencl_handle(x).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00466 00467 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(x).context()); 00468 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00469 viennacl::linalg::opencl::kernels::vector_multi_inner_prod<T>::init(ctx); 00470 00471 vcl_size_t work_groups = 128; 00472 00473 viennacl::vector<T> temp(work_groups, viennacl::traits::context(x)); 00474 temp.resize(8 * work_groups, ctx); // bring default-constructed vectors to the correct size: 00475 00476 viennacl::ocl::packed_cl_uint layout_x = detail::make_layout(x); 00477 00478 viennacl::ocl::kernel & ksum = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_multi_inner_prod<T>::program_name(), "sum_inner_prod"); 00479 viennacl::ocl::kernel & inner_prod_kernel_1 = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "inner_prod1"); 00480 viennacl::ocl::kernel & inner_prod_kernel_2 = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_multi_inner_prod<T>::program_name(), "inner_prod2"); 00481 viennacl::ocl::kernel & inner_prod_kernel_3 = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_multi_inner_prod<T>::program_name(), "inner_prod3"); 00482 viennacl::ocl::kernel & inner_prod_kernel_4 = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_multi_inner_prod<T>::program_name(), "inner_prod4"); 00483 viennacl::ocl::kernel & inner_prod_kernel_8 = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector_multi_inner_prod<T>::program_name(), "inner_prod8"); 00484 00485 vcl_size_t current_index = 0; 00486 while (current_index < vec_tuple.const_size()) 00487 { 00488 switch (vec_tuple.const_size() - current_index) 00489 { 00490 case 7: 00491 case 6: 00492 case 5: 00493 case 4: 00494 { 00495 vector_base<T> const & y0 = vec_tuple.const_at(current_index ); 00496 vector_base<T> const & y1 = vec_tuple.const_at(current_index + 1); 00497 vector_base<T> const & y2 = vec_tuple.const_at(current_index + 2); 00498 vector_base<T> const & y3 = vec_tuple.const_at(current_index + 3); 00499 viennacl::ocl::enqueue(inner_prod_kernel_4( viennacl::traits::opencl_handle(x), layout_x, 00500 viennacl::traits::opencl_handle(y0), detail::make_layout(y0), 00501 viennacl::traits::opencl_handle(y1), detail::make_layout(y1), 00502 viennacl::traits::opencl_handle(y2), detail::make_layout(y2), 00503 viennacl::traits::opencl_handle(y3), detail::make_layout(y3), 00504 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 4 * inner_prod_kernel_4.local_work_size()), 00505 viennacl::traits::opencl_handle(temp) 00506 ) ); 00507 00508 ksum.local_work_size(0, work_groups); 00509 ksum.global_work_size(0, 4 * work_groups); 00510 viennacl::ocl::enqueue(ksum(viennacl::traits::opencl_handle(temp), 00511 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 4 * ksum.local_work_size()), 00512 viennacl::traits::opencl_handle(result), 00513 cl_uint(viennacl::traits::start(result) + current_index * viennacl::traits::stride(result)), 00514 cl_uint(viennacl::traits::stride(result)) 00515 ) 00516 ); 00517 } 00518 current_index += 4; 00519 break; 00520 00521 case 3: 00522 { 00523 vector_base<T> const & y0 = vec_tuple.const_at(current_index ); 00524 vector_base<T> const & y1 = vec_tuple.const_at(current_index + 1); 00525 vector_base<T> const & y2 = vec_tuple.const_at(current_index + 2); 00526 viennacl::ocl::enqueue(inner_prod_kernel_3( viennacl::traits::opencl_handle(x), layout_x, 00527 viennacl::traits::opencl_handle(y0), detail::make_layout(y0), 00528 viennacl::traits::opencl_handle(y1), detail::make_layout(y1), 00529 viennacl::traits::opencl_handle(y2), detail::make_layout(y2), 00530 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 3 * inner_prod_kernel_3.local_work_size()), 00531 viennacl::traits::opencl_handle(temp) 00532 ) ); 00533 00534 ksum.local_work_size(0, work_groups); 00535 ksum.global_work_size(0, 3 * work_groups); 00536 viennacl::ocl::enqueue(ksum(viennacl::traits::opencl_handle(temp), 00537 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 3 * ksum.local_work_size()), 00538 viennacl::traits::opencl_handle(result), 00539 cl_uint(viennacl::traits::start(result) + current_index * viennacl::traits::stride(result)), 00540 cl_uint(viennacl::traits::stride(result)) 00541 ) 00542 ); 00543 } 00544 current_index += 3; 00545 break; 00546 00547 case 2: 00548 { 00549 vector_base<T> const & y0 = vec_tuple.const_at(current_index ); 00550 vector_base<T> const & y1 = vec_tuple.const_at(current_index + 1); 00551 viennacl::ocl::enqueue(inner_prod_kernel_2( viennacl::traits::opencl_handle(x), layout_x, 00552 viennacl::traits::opencl_handle(y0), detail::make_layout(y0), 00553 viennacl::traits::opencl_handle(y1), detail::make_layout(y1), 00554 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 2 * inner_prod_kernel_2.local_work_size()), 00555 viennacl::traits::opencl_handle(temp) 00556 ) ); 00557 00558 ksum.local_work_size(0, work_groups); 00559 ksum.global_work_size(0, 2 * work_groups); 00560 viennacl::ocl::enqueue(ksum(viennacl::traits::opencl_handle(temp), 00561 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 2 * ksum.local_work_size()), 00562 viennacl::traits::opencl_handle(result), 00563 cl_uint(viennacl::traits::start(result) + current_index * viennacl::traits::stride(result)), 00564 cl_uint(viennacl::traits::stride(result)) 00565 ) 00566 ); 00567 } 00568 current_index += 2; 00569 break; 00570 00571 case 1: 00572 { 00573 vector_base<T> const & y0 = vec_tuple.const_at(current_index ); 00574 viennacl::ocl::enqueue(inner_prod_kernel_1( viennacl::traits::opencl_handle(x), layout_x, 00575 viennacl::traits::opencl_handle(y0), detail::make_layout(y0), 00576 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 1 * inner_prod_kernel_1.local_work_size()), 00577 viennacl::traits::opencl_handle(temp) 00578 ) ); 00579 00580 ksum.local_work_size(0, work_groups); 00581 ksum.global_work_size(0, 1 * work_groups); 00582 viennacl::ocl::enqueue(ksum(viennacl::traits::opencl_handle(temp), 00583 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 1 * ksum.local_work_size()), 00584 viennacl::traits::opencl_handle(result), 00585 cl_uint(viennacl::traits::start(result) + current_index * viennacl::traits::stride(result)), 00586 cl_uint(viennacl::traits::stride(result)) 00587 ) 00588 ); 00589 } 00590 current_index += 1; 00591 break; 00592 00593 default: //8 or more vectors 00594 { 00595 vector_base<T> const & y0 = vec_tuple.const_at(current_index ); 00596 vector_base<T> const & y1 = vec_tuple.const_at(current_index + 1); 00597 vector_base<T> const & y2 = vec_tuple.const_at(current_index + 2); 00598 vector_base<T> const & y3 = vec_tuple.const_at(current_index + 3); 00599 vector_base<T> const & y4 = vec_tuple.const_at(current_index + 4); 00600 vector_base<T> const & y5 = vec_tuple.const_at(current_index + 5); 00601 vector_base<T> const & y6 = vec_tuple.const_at(current_index + 6); 00602 vector_base<T> const & y7 = vec_tuple.const_at(current_index + 7); 00603 viennacl::ocl::enqueue(inner_prod_kernel_8( viennacl::traits::opencl_handle(x), layout_x, 00604 viennacl::traits::opencl_handle(y0), detail::make_layout(y0), 00605 viennacl::traits::opencl_handle(y1), detail::make_layout(y1), 00606 viennacl::traits::opencl_handle(y2), detail::make_layout(y2), 00607 viennacl::traits::opencl_handle(y3), detail::make_layout(y3), 00608 viennacl::traits::opencl_handle(y4), detail::make_layout(y4), 00609 viennacl::traits::opencl_handle(y5), detail::make_layout(y5), 00610 viennacl::traits::opencl_handle(y6), detail::make_layout(y6), 00611 viennacl::traits::opencl_handle(y7), detail::make_layout(y7), 00612 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 8 * inner_prod_kernel_8.local_work_size()), 00613 viennacl::traits::opencl_handle(temp) 00614 ) ); 00615 00616 ksum.local_work_size(0, work_groups); 00617 ksum.global_work_size(0, 8 * work_groups); 00618 viennacl::ocl::enqueue(ksum(viennacl::traits::opencl_handle(temp), 00619 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * 8 * ksum.local_work_size()), 00620 viennacl::traits::opencl_handle(result), 00621 cl_uint(viennacl::traits::start(result) + current_index * viennacl::traits::stride(result)), 00622 cl_uint(viennacl::traits::stride(result)) 00623 ) 00624 ); 00625 } 00626 current_index += 8; 00627 break; 00628 } 00629 } 00630 00631 } 00632 00633 00634 //implementation of inner product: 00635 //namespace { 00642 template <typename T> 00643 void inner_prod_cpu(vector_base<T> const & vec1, 00644 vector_base<T> const & vec2, 00645 T & result) 00646 { 00647 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Vectors do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00648 00649 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00650 00651 vcl_size_t work_groups = 128; 00652 viennacl::vector<T> temp(work_groups, viennacl::traits::context(vec1)); 00653 temp.resize(work_groups, ctx); // bring default-constructed vectors to the correct size: 00654 00655 // Step 1: Compute partial inner products for each work group: 00656 inner_prod_impl(vec1, vec2, temp); 00657 00658 // Step 2: Sum partial results: 00659 00660 // Now copy partial results from GPU back to CPU and run reduction there: 00661 std::vector<T> temp_cpu(work_groups); 00662 viennacl::fast_copy(temp.begin(), temp.end(), temp_cpu.begin()); 00663 00664 result = 0; 00665 for (typename std::vector<T>::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it) 00666 result += *it; 00667 } 00668 00669 00671 00678 template <typename T> 00679 void norm_reduction_impl(vector_base<T> const & vec, 00680 vector_base<T> & partial_result, 00681 cl_uint norm_id) 00682 { 00683 assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(partial_result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00684 00685 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec).context()); 00686 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00687 00688 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "norm"); 00689 00690 assert( (k.global_work_size() / k.local_work_size() <= partial_result.size()) && bool("Size mismatch for partial reduction in norm_reduction_impl()") ); 00691 00692 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec), 00693 cl_uint(viennacl::traits::start(vec)), 00694 cl_uint(viennacl::traits::stride(vec)), 00695 cl_uint(viennacl::traits::size(vec)), 00696 cl_uint(norm_id), 00697 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * k.local_work_size()), 00698 viennacl::traits::opencl_handle(partial_result) ) 00699 ); 00700 } 00701 00702 00704 00710 template <typename T> 00711 void norm_1_impl(vector_base<T> const & vec, 00712 scalar<T> & result) 00713 { 00714 assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00715 00716 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec).context()); 00717 00718 vcl_size_t work_groups = 128; 00719 viennacl::vector<T> temp(work_groups, viennacl::traits::context(vec)); 00720 00721 // Step 1: Compute the partial work group results 00722 norm_reduction_impl(vec, temp, 1); 00723 00724 // Step 2: Compute the partial reduction using OpenCL 00725 viennacl::ocl::kernel & ksum = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "sum"); 00726 00727 ksum.local_work_size(0, work_groups); 00728 ksum.global_work_size(0, work_groups); 00729 viennacl::ocl::enqueue(ksum(viennacl::traits::opencl_handle(temp), 00730 cl_uint(viennacl::traits::start(temp)), 00731 cl_uint(viennacl::traits::stride(temp)), 00732 cl_uint(viennacl::traits::size(temp)), 00733 cl_uint(1), 00734 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * ksum.local_work_size()), 00735 result) 00736 ); 00737 } 00738 00744 template <typename T> 00745 void norm_1_cpu(vector_base<T> const & vec, 00746 T & result) 00747 { 00748 vcl_size_t work_groups = 128; 00749 viennacl::vector<T> temp(work_groups, viennacl::traits::context(vec)); 00750 00751 // Step 1: Compute the partial work group results 00752 norm_reduction_impl(vec, temp, 1); 00753 00754 // Step 2: Now copy partial results from GPU back to CPU and run reduction there: 00755 typedef std::vector<typename viennacl::result_of::cl_type<T>::type> CPUVectorType; 00756 00757 CPUVectorType temp_cpu(work_groups); 00758 viennacl::fast_copy(temp.begin(), temp.end(), temp_cpu.begin()); 00759 00760 result = 0; 00761 for (typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it) 00762 result += static_cast<T>(*it); 00763 } 00764 00765 00766 00768 00769 00775 template <typename T> 00776 void norm_2_impl(vector_base<T> const & vec, 00777 scalar<T> & result) 00778 { 00779 assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00780 00781 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec).context()); 00782 00783 vcl_size_t work_groups = 128; 00784 viennacl::vector<T> temp(work_groups, viennacl::traits::context(vec)); 00785 00786 // Step 1: Compute the partial work group results 00787 norm_reduction_impl(vec, temp, 2); 00788 00789 // Step 2: Reduction via OpenCL 00790 viennacl::ocl::kernel & ksum = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "sum"); 00791 00792 ksum.local_work_size(0, work_groups); 00793 ksum.global_work_size(0, work_groups); 00794 viennacl::ocl::enqueue( ksum(viennacl::traits::opencl_handle(temp), 00795 cl_uint(viennacl::traits::start(temp)), 00796 cl_uint(viennacl::traits::stride(temp)), 00797 cl_uint(viennacl::traits::size(temp)), 00798 cl_uint(2), 00799 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * ksum.local_work_size()), 00800 result) 00801 ); 00802 } 00803 00809 template <typename T> 00810 void norm_2_cpu(vector_base<T> const & vec, 00811 T & result) 00812 { 00813 vcl_size_t work_groups = 128; 00814 viennacl::vector<T> temp(work_groups, viennacl::traits::context(vec)); 00815 00816 // Step 1: Compute the partial work group results 00817 norm_reduction_impl(vec, temp, 2); 00818 00819 // Step 2: Now copy partial results from GPU back to CPU and run reduction there: 00820 typedef std::vector<typename viennacl::result_of::cl_type<T>::type> CPUVectorType; 00821 00822 CPUVectorType temp_cpu(work_groups); 00823 viennacl::fast_copy(temp.begin(), temp.end(), temp_cpu.begin()); 00824 00825 result = 0; 00826 for (typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it) 00827 result += static_cast<T>(*it); 00828 result = std::sqrt(result); 00829 } 00830 00831 00832 00834 00840 template <typename T> 00841 void norm_inf_impl(vector_base<T> const & vec, 00842 scalar<T> & result) 00843 { 00844 assert(viennacl::traits::opencl_handle(vec).context() == viennacl::traits::opencl_handle(result).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00845 00846 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec).context()); 00847 00848 vcl_size_t work_groups = 128; 00849 viennacl::vector<T> temp(work_groups, viennacl::traits::context(vec)); 00850 00851 // Step 1: Compute the partial work group results 00852 norm_reduction_impl(vec, temp, 0); 00853 00854 //part 2: parallel reduction of reduced kernel: 00855 viennacl::ocl::kernel & ksum = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "sum"); 00856 ksum.local_work_size(0, work_groups); 00857 ksum.global_work_size(0, work_groups); 00858 00859 viennacl::ocl::enqueue( ksum(viennacl::traits::opencl_handle(temp), 00860 cl_uint(viennacl::traits::start(temp)), 00861 cl_uint(viennacl::traits::stride(temp)), 00862 cl_uint(viennacl::traits::size(temp)), 00863 cl_uint(0), 00864 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * ksum.local_work_size()), 00865 result) 00866 ); 00867 } 00868 00874 template <typename T> 00875 void norm_inf_cpu(vector_base<T> const & vec, 00876 T & result) 00877 { 00878 vcl_size_t work_groups = 128; 00879 viennacl::vector<T> temp(work_groups, viennacl::traits::context(vec)); 00880 00881 // Step 1: Compute the partial work group results 00882 norm_reduction_impl(vec, temp, 0); 00883 00884 // Step 2: Now copy partial results from GPU back to CPU and run reduction there: 00885 typedef std::vector<typename viennacl::result_of::cl_type<T>::type> CPUVectorType; 00886 00887 CPUVectorType temp_cpu(work_groups); 00888 viennacl::fast_copy(temp.begin(), temp.end(), temp_cpu.begin()); 00889 00890 result = 0; 00891 for (typename CPUVectorType::const_iterator it = temp_cpu.begin(); it != temp_cpu.end(); ++it) 00892 result = std::max(result, static_cast<T>(*it)); 00893 } 00894 00895 00897 00898 //This function should return a CPU scalar, otherwise statements like 00899 // vcl_rhs[index_norm_inf(vcl_rhs)] 00900 // are ambiguous 00906 template <typename T> 00907 cl_uint index_norm_inf(vector_base<T> const & vec) 00908 { 00909 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec).context()); 00910 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00911 00912 viennacl::ocl::handle<cl_mem> h = ctx.create_memory(CL_MEM_READ_WRITE, sizeof(cl_uint)); 00913 00914 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "index_norm_inf"); 00915 //cl_uint size = static_cast<cl_uint>(vcl_vec.internal_size()); 00916 00917 //TODO: Use multi-group kernel for large vector sizes 00918 00919 k.global_work_size(0, k.local_work_size()); 00920 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec), 00921 cl_uint(viennacl::traits::start(vec)), 00922 cl_uint(viennacl::traits::stride(vec)), 00923 cl_uint(viennacl::traits::size(vec)), 00924 viennacl::ocl::local_mem(sizeof(typename viennacl::result_of::cl_type<T>::type) * k.local_work_size()), 00925 viennacl::ocl::local_mem(sizeof(cl_uint) * k.local_work_size()), h)); 00926 00927 //read value: 00928 cl_uint result; 00929 cl_int err = clEnqueueReadBuffer(ctx.get_queue().handle().get(), h.get(), CL_TRUE, 0, sizeof(cl_uint), &result, 0, NULL, NULL); 00930 VIENNACL_ERR_CHECK(err); 00931 return result; 00932 } 00933 00934 //TODO: Special case vec1 == vec2 allows improvement!! 00944 template <typename T> 00945 void plane_rotation(vector_base<T> & vec1, 00946 vector_base<T> & vec2, 00947 T alpha, T beta) 00948 { 00949 assert(viennacl::traits::opencl_handle(vec1).context() == viennacl::traits::opencl_handle(vec2).context() && bool("Operands do not reside in the same OpenCL context. Automatic migration not yet supported!")); 00950 00951 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(vec1).context()); 00952 viennacl::linalg::opencl::kernels::vector<T>::init(ctx); 00953 00954 assert(viennacl::traits::size(vec1) == viennacl::traits::size(vec2)); 00955 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::vector<T>::program_name(), "plane_rotation"); 00956 00957 viennacl::ocl::enqueue(k(viennacl::traits::opencl_handle(vec1), 00958 cl_uint(viennacl::traits::start(vec1)), 00959 cl_uint(viennacl::traits::stride(vec1)), 00960 cl_uint(viennacl::traits::size(vec1)), 00961 viennacl::traits::opencl_handle(vec2), 00962 cl_uint(viennacl::traits::start(vec2)), 00963 cl_uint(viennacl::traits::stride(vec2)), 00964 cl_uint(viennacl::traits::size(vec2)), 00965 viennacl::traits::opencl_handle(alpha), 00966 viennacl::traits::opencl_handle(beta)) 00967 ); 00968 } 00969 00970 } //namespace opencl 00971 } //namespace linalg 00972 } //namespace viennacl 00973 00974 00975 #endif