ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_ 00002 #define VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include "viennacl/forwards.h" 00026 #include "viennacl/ocl/device.hpp" 00027 #include "viennacl/ocl/handle.hpp" 00028 #include "viennacl/ocl/kernel.hpp" 00029 #include "viennacl/scalar.hpp" 00030 #include "viennacl/vector.hpp" 00031 #include "viennacl/tools/tools.hpp" 00032 #include "viennacl/linalg/opencl/kernels/compressed_matrix.hpp" 00033 #include "viennacl/linalg/opencl/kernels/coordinate_matrix.hpp" 00034 #include "viennacl/linalg/opencl/kernels/ell_matrix.hpp" 00035 #include "viennacl/linalg/opencl/kernels/hyb_matrix.hpp" 00036 #include "viennacl/linalg/opencl/kernels/compressed_compressed_matrix.hpp" 00037 #include "viennacl/linalg/opencl/common.hpp" 00038 00039 namespace viennacl 00040 { 00041 namespace linalg 00042 { 00043 namespace opencl 00044 { 00045 00046 // 00047 // Compressed matrix 00048 // 00049 00050 namespace detail 00051 { 00052 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00053 void row_info(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & mat, 00054 vector_base<SCALARTYPE> & vec, 00055 viennacl::linalg::detail::row_info_types info_selector) 00056 { 00057 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00058 viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::init(ctx); 00059 viennacl::ocl::kernel & row_info_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::program_name(), "row_info_extractor"); 00060 00061 viennacl::ocl::enqueue(row_info_kernel(mat.handle1().opencl_handle(), mat.handle2().opencl_handle(), mat.handle().opencl_handle(), 00062 viennacl::traits::opencl_handle(vec), 00063 cl_uint(mat.size1()), 00064 cl_uint(info_selector) 00065 ) 00066 ); 00067 } 00068 } 00069 00078 template<class TYPE, unsigned int ALIGNMENT> 00079 void prod_impl(const viennacl::compressed_matrix<TYPE, ALIGNMENT> & mat, 00080 const viennacl::vector_base<TYPE> & vec, 00081 viennacl::vector_base<TYPE> & result) 00082 { 00083 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00084 viennacl::linalg::opencl::kernels::compressed_matrix<TYPE>::init(ctx); 00085 std::stringstream ss; 00086 ss << "vec_mul"; 00087 if (ALIGNMENT == 4) 00088 ss << "4"; 00089 if (ALIGNMENT == 8) 00090 ss << "8"; 00091 00092 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<TYPE>::program_name(), ss.str()); 00093 00094 viennacl::ocl::packed_cl_uint layout_vec; 00095 layout_vec.start = cl_uint(viennacl::traits::start(vec)); 00096 layout_vec.stride = cl_uint(viennacl::traits::stride(vec)); 00097 layout_vec.size = cl_uint(viennacl::traits::size(vec)); 00098 layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec)); 00099 00100 viennacl::ocl::packed_cl_uint layout_result; 00101 layout_result.start = cl_uint(viennacl::traits::start(result)); 00102 layout_result.stride = cl_uint(viennacl::traits::stride(result)); 00103 layout_result.size = cl_uint(viennacl::traits::size(result)); 00104 layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result)); 00105 00106 viennacl::ocl::enqueue(k(mat.handle1().opencl_handle(), mat.handle2().opencl_handle(), mat.handle().opencl_handle(), 00107 vec, layout_vec, 00108 result, layout_result 00109 )); 00110 } 00111 00112 00121 template< typename TYPE, unsigned int ALIGNMENT, typename F1, typename F2> 00122 void prod_impl(const viennacl::compressed_matrix<TYPE, ALIGNMENT> & sp_mat, 00123 const viennacl::matrix_base<TYPE, F1> & d_mat, 00124 viennacl::matrix_base<TYPE, F2> & result) { 00125 00126 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_mat).context()); 00127 viennacl::linalg::opencl::kernels::compressed_matrix<TYPE>::init(ctx); 00128 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<TYPE>::program_name(), 00129 detail::sparse_dense_matmult_kernel_name(false, is_row_major<F1>::value, is_row_major<F2>::value)); 00130 00131 viennacl::ocl::enqueue(k(sp_mat.handle1().opencl_handle(), sp_mat.handle2().opencl_handle(), sp_mat.handle().opencl_handle(), 00132 viennacl::traits::opencl_handle(d_mat), 00133 cl_uint(viennacl::traits::start1(d_mat)), cl_uint(viennacl::traits::start2(d_mat)), 00134 cl_uint(viennacl::traits::stride1(d_mat)), cl_uint(viennacl::traits::stride2(d_mat)), 00135 cl_uint(viennacl::traits::size1(d_mat)), cl_uint(viennacl::traits::size2(d_mat)), 00136 cl_uint(viennacl::traits::internal_size1(d_mat)), cl_uint(viennacl::traits::internal_size2(d_mat)), 00137 viennacl::traits::opencl_handle(result), 00138 cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)), 00139 cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)), 00140 cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)), 00141 cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)) )); 00142 } 00143 00153 template< typename TYPE, unsigned int ALIGNMENT, typename F1, typename F2> 00154 void prod_impl(const viennacl::compressed_matrix<TYPE, ALIGNMENT> & sp_mat, 00155 const viennacl::matrix_expression< const viennacl::matrix_base<TYPE, F1>, 00156 const viennacl::matrix_base<TYPE, F1>, 00157 viennacl::op_trans > & d_mat, 00158 viennacl::matrix_base<TYPE, F2> & result) { 00159 00160 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_mat).context()); 00161 viennacl::linalg::opencl::kernels::compressed_matrix<TYPE>::init(ctx); 00162 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<TYPE>::program_name(), 00163 detail::sparse_dense_matmult_kernel_name(true, is_row_major<F1>::value, is_row_major<F2>::value)); 00164 00165 viennacl::ocl::enqueue(k(sp_mat.handle1().opencl_handle(), sp_mat.handle2().opencl_handle(), sp_mat.handle().opencl_handle(), 00166 viennacl::traits::opencl_handle(d_mat.lhs()), 00167 cl_uint(viennacl::traits::start1(d_mat.lhs())), cl_uint(viennacl::traits::start2(d_mat.lhs())), 00168 cl_uint(viennacl::traits::stride1(d_mat.lhs())), cl_uint(viennacl::traits::stride2(d_mat.lhs())), 00169 cl_uint(viennacl::traits::size1(d_mat.lhs())), cl_uint(viennacl::traits::size2(d_mat.lhs())), 00170 cl_uint(viennacl::traits::internal_size1(d_mat.lhs())), cl_uint(viennacl::traits::internal_size2(d_mat.lhs())), 00171 viennacl::traits::opencl_handle(result), 00172 cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)), 00173 cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)), 00174 cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)), 00175 cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)) ) ); 00176 } 00177 00178 00179 00180 // triangular solvers 00181 00187 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00188 void inplace_solve(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & L, 00189 vector_base<SCALARTYPE> & vec, 00190 viennacl::linalg::unit_lower_tag) 00191 { 00192 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context()); 00193 viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::init(ctx); 00194 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::program_name(), "unit_lu_forward"); 00195 00196 k.local_work_size(0, 128); 00197 k.global_work_size(0, k.local_work_size()); 00198 viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), 00199 viennacl::traits::opencl_handle(vec), 00200 cl_uint(L.size1()) 00201 ) 00202 ); 00203 } 00204 00210 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00211 void inplace_solve(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & L, 00212 vector_base<SCALARTYPE> & vec, 00213 viennacl::linalg::lower_tag) 00214 { 00215 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context()); 00216 viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::init(ctx); 00217 00218 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::program_name(), "lu_forward"); 00219 00220 k.local_work_size(0, 128); 00221 k.global_work_size(0, k.local_work_size()); 00222 viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(), 00223 viennacl::traits::opencl_handle(vec), 00224 cl_uint(L.size1()) 00225 ) 00226 ); 00227 } 00228 00229 00235 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00236 void inplace_solve(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & U, 00237 vector_base<SCALARTYPE> & vec, 00238 viennacl::linalg::unit_upper_tag) 00239 { 00240 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U).context()); 00241 viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::init(ctx); 00242 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::program_name(), "unit_lu_backward"); 00243 00244 k.local_work_size(0, 128); 00245 k.global_work_size(0, k.local_work_size()); 00246 viennacl::ocl::enqueue(k(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(), 00247 viennacl::traits::opencl_handle(vec), 00248 cl_uint(U.size1()) 00249 ) 00250 ); 00251 } 00252 00258 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00259 void inplace_solve(compressed_matrix<SCALARTYPE, MAT_ALIGNMENT> const & U, 00260 vector_base<SCALARTYPE> & vec, 00261 viennacl::linalg::upper_tag) 00262 { 00263 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U).context()); 00264 viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::init(ctx); 00265 00266 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::program_name(), "lu_backward"); 00267 00268 k.local_work_size(0, 128); 00269 k.global_work_size(0, k.local_work_size()); 00270 viennacl::ocl::enqueue(k(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(), 00271 viennacl::traits::opencl_handle(vec), 00272 cl_uint(U.size1()) 00273 ) 00274 ); 00275 } 00276 00277 00278 00279 00280 00281 // transposed triangular solvers 00282 00283 namespace detail 00284 { 00285 // 00286 // block solves 00287 // 00288 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00289 void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00290 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00291 op_trans> & L, 00292 viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, 00293 vector_base<ScalarType> const & /* L_diagonal */, //ignored 00294 vector_base<ScalarType> & vec, 00295 viennacl::linalg::unit_lower_tag) 00296 { 00297 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L.lhs()).context()); 00298 viennacl::linalg::opencl::kernels::compressed_matrix<ScalarType>::init(ctx); 00299 viennacl::ocl::kernel & block_solve_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<ScalarType>::program_name(), "block_trans_unit_lu_forward"); 00300 block_solve_kernel.global_work_size(0, num_blocks * block_solve_kernel.local_work_size(0)); 00301 00302 viennacl::ocl::enqueue(block_solve_kernel(L.lhs().handle1().opencl_handle(), 00303 L.lhs().handle2().opencl_handle(), 00304 L.lhs().handle().opencl_handle(), 00305 block_indices.opencl_handle(), 00306 vec, 00307 static_cast<cl_uint>(vec.size()))); 00308 } 00309 00310 00311 template<typename ScalarType, unsigned int MAT_ALIGNMENT> 00312 void block_inplace_solve(const matrix_expression<const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00313 const compressed_matrix<ScalarType, MAT_ALIGNMENT>, 00314 op_trans> & U, 00315 viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks, 00316 vector_base<ScalarType> const & U_diagonal, 00317 vector_base<ScalarType> & vec, 00318 viennacl::linalg::upper_tag) 00319 { 00320 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U.lhs()).context()); 00321 viennacl::linalg::opencl::kernels::compressed_matrix<ScalarType>::init(ctx); 00322 viennacl::ocl::kernel & block_solve_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<ScalarType>::program_name(), "block_trans_lu_backward"); 00323 block_solve_kernel.global_work_size(0, num_blocks * block_solve_kernel.local_work_size(0)); 00324 00325 viennacl::ocl::enqueue(block_solve_kernel(U.lhs().handle1().opencl_handle(), 00326 U.lhs().handle2().opencl_handle(), 00327 U.lhs().handle().opencl_handle(), 00328 U_diagonal, 00329 block_indices.opencl_handle(), 00330 vec, 00331 static_cast<cl_uint>(vec.size()))); 00332 } 00333 00334 00335 } 00336 00337 00343 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00344 void inplace_solve(matrix_expression< const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00345 const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00346 op_trans> const & proxy_L, 00347 vector_base<SCALARTYPE> & vec, 00348 viennacl::linalg::unit_lower_tag) 00349 { 00350 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_L.lhs()).context()); 00351 viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::init(ctx); 00352 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::program_name(), "trans_unit_lu_forward"); 00353 00354 k.local_work_size(0, 128); 00355 k.global_work_size(0, k.local_work_size()); 00356 viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(), 00357 viennacl::traits::opencl_handle(vec), 00358 cl_uint(proxy_L.lhs().size1()) 00359 ) 00360 ); 00361 } 00362 00363 00369 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00370 void inplace_solve(matrix_expression< const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00371 const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00372 op_trans> const & proxy_L, 00373 vector_base<SCALARTYPE> & vec, 00374 viennacl::linalg::lower_tag) 00375 { 00376 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_L.lhs()).context()); 00377 viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::init(ctx); 00378 00379 viennacl::vector<SCALARTYPE> diagonal(vec.size()); 00380 detail::row_info(proxy_L.lhs(), diagonal, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); 00381 00382 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::program_name(), "trans_lu_forward"); 00383 00384 k.local_work_size(0, 128); 00385 k.global_work_size(0, k.local_work_size()); 00386 viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(), 00387 viennacl::traits::opencl_handle(diagonal), 00388 viennacl::traits::opencl_handle(vec), 00389 cl_uint(proxy_L.lhs().size1()) 00390 ) 00391 ); 00392 } 00393 00399 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00400 void inplace_solve(matrix_expression< const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00401 const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00402 op_trans> const & proxy_U, 00403 vector_base<SCALARTYPE> & vec, 00404 viennacl::linalg::unit_upper_tag) 00405 { 00406 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_U.lhs()).context()); 00407 viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::init(ctx); 00408 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::program_name(), "trans_unit_lu_backward"); 00409 00410 k.local_work_size(0, 128); 00411 k.global_work_size(0, k.local_work_size()); 00412 viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(), 00413 viennacl::traits::opencl_handle(vec), 00414 cl_uint(proxy_U.lhs().size1()) 00415 ) 00416 ); 00417 } 00418 00419 00425 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00426 void inplace_solve(matrix_expression< const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00427 const compressed_matrix<SCALARTYPE, MAT_ALIGNMENT>, 00428 op_trans> const & proxy_U, 00429 vector_base<SCALARTYPE> & vec, 00430 viennacl::linalg::upper_tag) 00431 { 00432 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_U.lhs()).context()); 00433 viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::init(ctx); 00434 00435 viennacl::vector<SCALARTYPE> diagonal(vec.size()); 00436 detail::row_info(proxy_U.lhs(), diagonal, viennacl::linalg::detail::SPARSE_ROW_DIAGONAL); 00437 00438 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_matrix<SCALARTYPE>::program_name(), "trans_lu_backward"); 00439 00440 k.local_work_size(0, 128); 00441 k.global_work_size(0, k.local_work_size()); 00442 viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(), 00443 viennacl::traits::opencl_handle(diagonal), 00444 viennacl::traits::opencl_handle(vec), 00445 cl_uint(proxy_U.lhs().size1()) 00446 ) 00447 ); 00448 } 00449 00450 00451 // 00452 // Compressed Compressed matrix 00453 // 00454 00463 template<class TYPE> 00464 void prod_impl(const viennacl::compressed_compressed_matrix<TYPE> & mat, 00465 const viennacl::vector_base<TYPE> & vec, 00466 viennacl::vector_base<TYPE> & result) 00467 { 00468 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00469 viennacl::linalg::opencl::kernels::compressed_compressed_matrix<TYPE>::init(ctx); 00470 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::compressed_compressed_matrix<TYPE>::program_name(), "vec_mul"); 00471 00472 result.clear(); 00473 00474 viennacl::ocl::packed_cl_uint layout_vec; 00475 layout_vec.start = cl_uint(viennacl::traits::start(vec)); 00476 layout_vec.stride = cl_uint(viennacl::traits::stride(vec)); 00477 layout_vec.size = cl_uint(viennacl::traits::size(vec)); 00478 layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec)); 00479 00480 viennacl::ocl::packed_cl_uint layout_result; 00481 layout_result.start = cl_uint(viennacl::traits::start(result)); 00482 layout_result.stride = cl_uint(viennacl::traits::stride(result)); 00483 layout_result.size = cl_uint(viennacl::traits::size(result)); 00484 layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result)); 00485 00486 viennacl::ocl::enqueue(k(mat.handle1().opencl_handle(), mat.handle3().opencl_handle(), mat.handle2().opencl_handle(), mat.handle().opencl_handle(), cl_uint(mat.nnz1()), 00487 vec, layout_vec, 00488 result, layout_result 00489 )); 00490 } 00491 00492 00493 // 00494 // Coordinate matrix 00495 // 00496 00497 namespace detail 00498 { 00499 template<typename SCALARTYPE, unsigned int MAT_ALIGNMENT> 00500 void row_info(coordinate_matrix<SCALARTYPE, MAT_ALIGNMENT> const & mat, 00501 vector_base<SCALARTYPE> & vec, 00502 viennacl::linalg::detail::row_info_types info_selector) 00503 { 00504 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00505 viennacl::linalg::opencl::kernels::coordinate_matrix<SCALARTYPE>::init(ctx); 00506 viennacl::ocl::kernel & row_info_kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::coordinate_matrix<SCALARTYPE>::program_name(), "row_info_extractor"); 00507 unsigned int thread_num = 256; //k.local_work_size(0); 00508 00509 row_info_kernel.local_work_size(0, thread_num); 00510 00511 row_info_kernel.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases 00512 viennacl::ocl::enqueue(row_info_kernel(mat.handle12().opencl_handle(), mat.handle().opencl_handle(), mat.handle3().opencl_handle(), 00513 viennacl::traits::opencl_handle(vec), 00514 cl_uint(info_selector), 00515 viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num), 00516 viennacl::ocl::local_mem(sizeof(SCALARTYPE)*thread_num)) ); 00517 } 00518 } 00519 00528 template<class SCALARTYPE, unsigned int ALIGNMENT> 00529 void prod_impl(const viennacl::coordinate_matrix<SCALARTYPE, ALIGNMENT> & mat, 00530 const viennacl::vector_base<SCALARTYPE> & vec, 00531 viennacl::vector_base<SCALARTYPE> & result) 00532 { 00533 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00534 viennacl::linalg::opencl::kernels::coordinate_matrix<SCALARTYPE>::init(ctx); 00535 00536 result.clear(); 00537 00538 viennacl::ocl::packed_cl_uint layout_vec; 00539 layout_vec.start = cl_uint(viennacl::traits::start(vec)); 00540 layout_vec.stride = cl_uint(viennacl::traits::stride(vec)); 00541 layout_vec.size = cl_uint(viennacl::traits::size(vec)); 00542 layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec)); 00543 00544 viennacl::ocl::packed_cl_uint layout_result; 00545 layout_result.start = cl_uint(viennacl::traits::start(result)); 00546 layout_result.stride = cl_uint(viennacl::traits::stride(result)); 00547 layout_result.size = cl_uint(viennacl::traits::size(result)); 00548 layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result)); 00549 00550 //std::cout << "prod(coordinate_matrix" << ALIGNMENT << ", vector) called with internal_nnz=" << mat.internal_nnz() << std::endl; 00551 00552 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::coordinate_matrix<SCALARTYPE>::program_name(), "vec_mul"); 00553 unsigned int thread_num = 256; //k.local_work_size(0); 00554 00555 k.local_work_size(0, thread_num); 00556 00557 k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases 00558 //k.global_work_size(0, thread_num); //Only one work group 00559 viennacl::ocl::enqueue(k(mat.handle12().opencl_handle(), mat.handle().opencl_handle(), mat.handle3().opencl_handle(), 00560 viennacl::traits::opencl_handle(vec), 00561 layout_vec, 00562 viennacl::traits::opencl_handle(result), 00563 layout_result, 00564 viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num), 00565 viennacl::ocl::local_mem(sizeof(SCALARTYPE)*thread_num)) ); 00566 00567 } 00568 00569 00578 template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 00579 void prod_impl(const viennacl::coordinate_matrix<NumericT, ALIGNMENT> & mat, 00580 const viennacl::matrix_base<NumericT, F1> & d_mat, 00581 viennacl::matrix_base<NumericT, F2> & result) 00582 { 00583 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00584 viennacl::linalg::opencl::kernels::coordinate_matrix<NumericT>::init(ctx); 00585 00586 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::coordinate_matrix<NumericT>::program_name(), 00587 detail::sparse_dense_matmult_kernel_name(false, is_row_major<F1>::value, is_row_major<F2>::value)); 00588 00589 result.clear(); 00590 00591 unsigned int thread_num = 256; //k.local_work_size(0); 00592 k.local_work_size(0, thread_num); 00593 k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases 00594 00595 viennacl::ocl::enqueue(k(mat.handle12().opencl_handle(), mat.handle().opencl_handle(), mat.handle3().opencl_handle(), 00596 viennacl::traits::opencl_handle(d_mat), 00597 cl_uint(viennacl::traits::start1(d_mat)), cl_uint(viennacl::traits::start2(d_mat)), 00598 cl_uint(viennacl::traits::stride1(d_mat)), cl_uint(viennacl::traits::stride2(d_mat)), 00599 cl_uint(viennacl::traits::size1(d_mat)), cl_uint(viennacl::traits::size2(d_mat)), 00600 cl_uint(viennacl::traits::internal_size1(d_mat)), cl_uint(viennacl::traits::internal_size2(d_mat)), 00601 viennacl::traits::opencl_handle(result), 00602 cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)), 00603 cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)), 00604 cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)), 00605 cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)), 00606 viennacl::ocl::local_mem(sizeof(cl_uint)*k.local_work_size(0)), 00607 viennacl::ocl::local_mem(sizeof(NumericT)*k.local_work_size(0))) ); 00608 00609 } 00610 00619 template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 00620 void prod_impl(const viennacl::coordinate_matrix<NumericT, ALIGNMENT> & mat, 00621 const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>, 00622 const viennacl::matrix_base<NumericT, F1>, 00623 viennacl::op_trans > & d_mat, 00624 viennacl::matrix_base<NumericT, F2> & result) 00625 { 00626 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00627 viennacl::linalg::opencl::kernels::coordinate_matrix<NumericT>::init(ctx); 00628 00629 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::coordinate_matrix<NumericT>::program_name(), 00630 detail::sparse_dense_matmult_kernel_name(true, is_row_major<F1>::value, is_row_major<F2>::value)); 00631 00632 result.clear(); 00633 00634 unsigned int thread_num = 256; //k.local_work_size(0); 00635 k.local_work_size(0, thread_num); 00636 k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases 00637 00638 viennacl::ocl::enqueue(k(mat.handle12().opencl_handle(), mat.handle().opencl_handle(), mat.handle3().opencl_handle(), 00639 viennacl::traits::opencl_handle(d_mat), 00640 cl_uint(viennacl::traits::start1(d_mat.lhs())), cl_uint(viennacl::traits::start2(d_mat.lhs())), 00641 cl_uint(viennacl::traits::stride1(d_mat.lhs())), cl_uint(viennacl::traits::stride2(d_mat.lhs())), 00642 cl_uint(viennacl::traits::size1(d_mat.lhs())), cl_uint(viennacl::traits::size2(d_mat.lhs())), 00643 cl_uint(viennacl::traits::internal_size1(d_mat.lhs())), cl_uint(viennacl::traits::internal_size2(d_mat.lhs())), 00644 viennacl::traits::opencl_handle(result), 00645 cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)), 00646 cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)), 00647 cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)), 00648 cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)), 00649 viennacl::ocl::local_mem(sizeof(cl_uint)*k.local_work_size(0)), 00650 viennacl::ocl::local_mem(sizeof(NumericT)*k.local_work_size(0))) ); 00651 00652 } 00653 00654 00655 // 00656 // ELL Matrix 00657 // 00658 00659 template<class TYPE, unsigned int ALIGNMENT> 00660 void prod_impl( const viennacl::ell_matrix<TYPE, ALIGNMENT> & mat, 00661 const viennacl::vector_base<TYPE> & vec, 00662 viennacl::vector_base<TYPE> & result) 00663 { 00664 assert(mat.size1() == result.size()); 00665 assert(mat.size2() == vec.size()); 00666 00667 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00668 viennacl::linalg::opencl::kernels::ell_matrix<TYPE>::init(ctx); 00669 result.clear(); 00670 00671 viennacl::ocl::packed_cl_uint layout_vec; 00672 layout_vec.start = cl_uint(viennacl::traits::start(vec)); 00673 layout_vec.stride = cl_uint(viennacl::traits::stride(vec)); 00674 layout_vec.size = cl_uint(viennacl::traits::size(vec)); 00675 layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec)); 00676 00677 viennacl::ocl::packed_cl_uint layout_result; 00678 layout_result.start = cl_uint(viennacl::traits::start(result)); 00679 layout_result.stride = cl_uint(viennacl::traits::stride(result)); 00680 layout_result.size = cl_uint(viennacl::traits::size(result)); 00681 layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result)); 00682 00683 std::stringstream ss; 00684 ss << "vec_mul_" << 1;//(ALIGNMENT != 1?4:1); 00685 viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ell_matrix<TYPE>::program_name(), "vec_mul"); 00686 00687 unsigned int thread_num = 128; 00688 unsigned int group_num = 256; 00689 00690 k.local_work_size(0, thread_num); 00691 k.global_work_size(0, thread_num * group_num); 00692 00693 viennacl::ocl::enqueue(k(mat.handle2().opencl_handle(), 00694 mat.handle().opencl_handle(), 00695 viennacl::traits::opencl_handle(vec), 00696 layout_vec, 00697 viennacl::traits::opencl_handle(result), 00698 layout_result, 00699 cl_uint(mat.size1()), 00700 cl_uint(mat.size2()), 00701 cl_uint(mat.internal_size1()), 00702 cl_uint(mat.maxnnz()), 00703 cl_uint(mat.internal_maxnnz()) 00704 ) 00705 ); 00706 00707 00708 } 00709 00719 template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2 > 00720 void prod_impl(const viennacl::ell_matrix<ScalarType, ALIGNMENT> & sp_mat, 00721 const viennacl::matrix_base<NumericT, F1> & d_mat, 00722 viennacl::matrix_base<NumericT, F2> & result) { 00723 00724 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_mat).context()); 00725 viennacl::linalg::opencl::kernels::ell_matrix<ScalarType>::init(ctx); 00726 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ell_matrix<ScalarType>::program_name(), 00727 detail::sparse_dense_matmult_kernel_name(false, is_row_major<F1>::value, is_row_major<F2>::value)); 00728 00729 //unsigned int thread_num = 128; 00730 //unsigned int group_num = 256; 00731 // 00732 //k.local_work_size(0, thread_num); 00733 //k.global_work_size(0, thread_num * group_num); 00734 00735 viennacl::ocl::enqueue(k(sp_mat.handle2().opencl_handle(), sp_mat.handle().opencl_handle(), 00736 cl_uint(sp_mat.size1()), 00737 cl_uint(sp_mat.size2()), 00738 cl_uint(sp_mat.internal_size1()), 00739 cl_uint(sp_mat.maxnnz()), 00740 cl_uint(sp_mat.internal_maxnnz()), 00741 viennacl::traits::opencl_handle(d_mat), 00742 cl_uint(viennacl::traits::start1(d_mat)), cl_uint(viennacl::traits::start2(d_mat)), 00743 cl_uint(viennacl::traits::stride1(d_mat)), cl_uint(viennacl::traits::stride2(d_mat)), 00744 cl_uint(viennacl::traits::size1(d_mat)), cl_uint(viennacl::traits::size2(d_mat)), 00745 cl_uint(viennacl::traits::internal_size1(d_mat)), cl_uint(viennacl::traits::internal_size2(d_mat)), 00746 viennacl::traits::opencl_handle(result), 00747 cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)), 00748 cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)), 00749 cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)), 00750 cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)) 00751 ) 00752 ); 00753 } 00754 00764 template<class ScalarType, unsigned int ALIGNMENT, class NumericT, typename F1, typename F2> 00765 void prod_impl(const viennacl::ell_matrix<ScalarType, ALIGNMENT> & sp_mat, 00766 const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>, 00767 const viennacl::matrix_base<NumericT, F1>, 00768 viennacl::op_trans > & d_mat, 00769 viennacl::matrix_base<NumericT, F2> & result) { 00770 00771 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_mat).context()); 00772 viennacl::linalg::opencl::kernels::ell_matrix<ScalarType>::init(ctx); 00773 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::ell_matrix<ScalarType>::program_name(), 00774 detail::sparse_dense_matmult_kernel_name(true, is_row_major<F1>::value, is_row_major<F2>::value)); 00775 00776 //unsigned int thread_num = 128; 00777 //unsigned int group_num = 256; 00778 // 00779 //k.local_work_size(0, thread_num); 00780 //k.global_work_size(0, thread_num * group_num); 00781 00782 viennacl::ocl::enqueue(k(sp_mat.handle2().opencl_handle(), sp_mat.handle().opencl_handle(), 00783 cl_uint(sp_mat.size1()), 00784 cl_uint(sp_mat.size2()), 00785 cl_uint(sp_mat.internal_size1()), 00786 cl_uint(sp_mat.maxnnz()), 00787 cl_uint(sp_mat.internal_maxnnz()), 00788 viennacl::traits::opencl_handle(d_mat.lhs()), 00789 cl_uint(viennacl::traits::start1(d_mat.lhs())), cl_uint(viennacl::traits::start2(d_mat.lhs())), 00790 cl_uint(viennacl::traits::stride1(d_mat.lhs())), cl_uint(viennacl::traits::stride2(d_mat.lhs())), 00791 cl_uint(viennacl::traits::size1(d_mat.lhs())), cl_uint(viennacl::traits::size2(d_mat.lhs())), 00792 cl_uint(viennacl::traits::internal_size1(d_mat.lhs())), cl_uint(viennacl::traits::internal_size2(d_mat.lhs())), 00793 viennacl::traits::opencl_handle(result), 00794 cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)), 00795 cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)), 00796 cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)), 00797 cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)) 00798 ) 00799 ); 00800 } 00801 00802 // 00803 // Hybrid Matrix 00804 // 00805 00806 template<class TYPE, unsigned int ALIGNMENT> 00807 void prod_impl( const viennacl::hyb_matrix<TYPE, ALIGNMENT>& mat, 00808 const viennacl::vector_base<TYPE>& vec, 00809 viennacl::vector_base<TYPE>& result) 00810 { 00811 assert(mat.size1() == result.size()); 00812 assert(mat.size2() == vec.size()); 00813 00814 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00815 viennacl::linalg::opencl::kernels::hyb_matrix<TYPE>::init(ctx); 00816 00817 viennacl::ocl::packed_cl_uint layout_vec; 00818 layout_vec.start = cl_uint(viennacl::traits::start(vec)); 00819 layout_vec.stride = cl_uint(viennacl::traits::stride(vec)); 00820 layout_vec.size = cl_uint(viennacl::traits::size(vec)); 00821 layout_vec.internal_size = cl_uint(viennacl::traits::internal_size(vec)); 00822 00823 viennacl::ocl::packed_cl_uint layout_result; 00824 layout_result.start = cl_uint(viennacl::traits::start(result)); 00825 layout_result.stride = cl_uint(viennacl::traits::stride(result)); 00826 layout_result.size = cl_uint(viennacl::traits::size(result)); 00827 layout_result.internal_size = cl_uint(viennacl::traits::internal_size(result)); 00828 00829 viennacl::ocl::kernel& k = ctx.get_kernel(viennacl::linalg::opencl::kernels::hyb_matrix<TYPE>::program_name(), "vec_mul"); 00830 00831 unsigned int thread_num = 256; 00832 unsigned int group_num = 32; 00833 00834 k.local_work_size(0, thread_num); 00835 k.global_work_size(0, thread_num * group_num); 00836 00837 viennacl::ocl::enqueue(k(mat.handle2().opencl_handle(), 00838 mat.handle().opencl_handle(), 00839 mat.handle3().opencl_handle(), 00840 mat.handle4().opencl_handle(), 00841 mat.handle5().opencl_handle(), 00842 viennacl::traits::opencl_handle(vec), 00843 layout_vec, 00844 viennacl::traits::opencl_handle(result), 00845 layout_result, 00846 cl_uint(mat.size1()), 00847 cl_uint(mat.internal_size1()), 00848 cl_uint(mat.ell_nnz()), 00849 cl_uint(mat.internal_ellnnz()) 00850 ) 00851 ); 00852 } 00853 00854 template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 00855 void prod_impl( const viennacl::hyb_matrix<NumericT, ALIGNMENT>& mat, 00856 const viennacl::matrix_base<NumericT, F1> & d_mat, 00857 viennacl::matrix_base<NumericT, F2> & result) 00858 { 00859 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00860 viennacl::linalg::opencl::kernels::hyb_matrix<NumericT>::init(ctx); 00861 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::hyb_matrix<NumericT>::program_name(), 00862 detail::sparse_dense_matmult_kernel_name(false, is_row_major<F1>::value, is_row_major<F2>::value)); 00863 00864 unsigned int thread_num = 256; 00865 unsigned int group_num = 32; 00866 00867 k.local_work_size(0, thread_num); 00868 k.global_work_size(0, thread_num * group_num); 00869 00870 viennacl::ocl::enqueue(k(mat.handle2().opencl_handle(), 00871 mat.handle().opencl_handle(), 00872 mat.handle3().opencl_handle(), 00873 mat.handle4().opencl_handle(), 00874 mat.handle5().opencl_handle(), 00875 cl_uint(mat.size1()), 00876 cl_uint(mat.internal_size1()), 00877 cl_uint(mat.ell_nnz()), 00878 cl_uint(mat.internal_ellnnz()), 00879 viennacl::traits::opencl_handle(d_mat), 00880 cl_uint(viennacl::traits::start1(d_mat)), cl_uint(viennacl::traits::start2(d_mat)), 00881 cl_uint(viennacl::traits::stride1(d_mat)), cl_uint(viennacl::traits::stride2(d_mat)), 00882 cl_uint(viennacl::traits::size1(d_mat)), cl_uint(viennacl::traits::size2(d_mat)), 00883 cl_uint(viennacl::traits::internal_size1(d_mat)), cl_uint(viennacl::traits::internal_size2(d_mat)), 00884 viennacl::traits::opencl_handle(result), 00885 cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)), 00886 cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)), 00887 cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)), 00888 cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)) 00889 ) 00890 ); 00891 } 00892 00893 template<typename NumericT, unsigned int ALIGNMENT, typename F1, typename F2> 00894 void prod_impl( const viennacl::hyb_matrix<NumericT, ALIGNMENT>& mat, 00895 const viennacl::matrix_expression< const viennacl::matrix_base<NumericT, F1>, 00896 const viennacl::matrix_base<NumericT, F1>, 00897 viennacl::op_trans > & d_mat, 00898 viennacl::matrix_base<NumericT, F2> & result) 00899 { 00900 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(mat).context()); 00901 viennacl::linalg::opencl::kernels::hyb_matrix<NumericT>::init(ctx); 00902 viennacl::ocl::kernel & k = ctx.get_kernel(viennacl::linalg::opencl::kernels::hyb_matrix<NumericT>::program_name(), 00903 detail::sparse_dense_matmult_kernel_name(true, is_row_major<F1>::value, is_row_major<F2>::value)); 00904 00905 unsigned int thread_num = 256; 00906 unsigned int group_num = 32; 00907 00908 k.local_work_size(0, thread_num); 00909 k.global_work_size(0, thread_num * group_num); 00910 00911 viennacl::ocl::enqueue(k(mat.handle2().opencl_handle(), 00912 mat.handle().opencl_handle(), 00913 mat.handle3().opencl_handle(), 00914 mat.handle4().opencl_handle(), 00915 mat.handle5().opencl_handle(), 00916 cl_uint(mat.size1()), 00917 cl_uint(mat.internal_size1()), 00918 cl_uint(mat.ell_nnz()), 00919 cl_uint(mat.internal_ellnnz()), 00920 viennacl::traits::opencl_handle(d_mat.lhs()), 00921 cl_uint(viennacl::traits::start1(d_mat.lhs())), cl_uint(viennacl::traits::start2(d_mat.lhs())), 00922 cl_uint(viennacl::traits::stride1(d_mat.lhs())), cl_uint(viennacl::traits::stride2(d_mat.lhs())), 00923 cl_uint(viennacl::traits::size1(d_mat.lhs())), cl_uint(viennacl::traits::size2(d_mat.lhs())), 00924 cl_uint(viennacl::traits::internal_size1(d_mat.lhs())), cl_uint(viennacl::traits::internal_size2(d_mat.lhs())), 00925 viennacl::traits::opencl_handle(result), 00926 cl_uint(viennacl::traits::start1(result)), cl_uint(viennacl::traits::start2(result)), 00927 cl_uint(viennacl::traits::stride1(result)), cl_uint(viennacl::traits::stride2(result)), 00928 cl_uint(viennacl::traits::size1(result)), cl_uint(viennacl::traits::size2(result)), 00929 cl_uint(viennacl::traits::internal_size1(result)), cl_uint(viennacl::traits::internal_size2(result)) 00930 ) 00931 ); 00932 } 00933 00934 00935 } // namespace opencl 00936 } //namespace linalg 00937 } //namespace viennacl 00938 00939 00940 #endif