ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sparse_matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/ocl/device.hpp"
27 #include "viennacl/ocl/handle.hpp"
28 #include "viennacl/ocl/kernel.hpp"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/vector.hpp"
31 #include "viennacl/tools/tools.hpp"
39 
40 namespace viennacl
41 {
42 namespace linalg
43 {
44 namespace opencl
45 {
46 
47 //
48 // Compressed matrix
49 //
50 
51 namespace detail
52 {
53  template<typename NumericT, unsigned int AlignmentV>
57  {
58  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
61 
62  viennacl::ocl::enqueue(row_info_kernel(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
63  viennacl::traits::opencl_handle(x),
64  cl_uint(A.size1()),
65  cl_uint(info_selector)
66  )
67  );
68  }
69 }
70 
79 template<typename NumericT, unsigned int AlignmentV>
83 {
84  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
86  std::stringstream ss;
87  ss << "vec_mul";
88  unsigned int alignment = AlignmentV; //prevent unreachable code warnings below
89  if (alignment == 4)
90  ss << "4";
91  if (alignment == 8)
92  ss << "8";
93 
95 
97  layout_x.start = cl_uint(viennacl::traits::start(x));
98  layout_x.stride = cl_uint(viennacl::traits::stride(x));
99  layout_x.size = cl_uint(viennacl::traits::size(x));
100  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
101 
103  layout_y.start = cl_uint(viennacl::traits::start(y));
104  layout_y.stride = cl_uint(viennacl::traits::stride(y));
105  layout_y.size = cl_uint(viennacl::traits::size(y));
106  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
107 
108  if (alignment == 4 || alignment == 8)
109  {
110  viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(),
111  x, layout_x,
112  y, layout_y
113  ));
114  }
115  else
116  {
117  if (ctx.current_device().max_work_group_size() >= 256)
118  k.local_work_size(0, 256);
119  k.global_work_size(0, A.blocks1() * k.local_work_size(0));
120 
121  viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle2().opencl_handle(), A.handle3().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.blocks1()),
122  x, layout_x,
123  y, layout_y
124  ));
125  }
126 }
127 
128 
137 template< typename NumericT, unsigned int AlignmentV>
141 
142  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_A).context());
146 
147  viennacl::ocl::enqueue(k(sp_A.handle1().opencl_handle(), sp_A.handle2().opencl_handle(), sp_A.handle().opencl_handle(),
148  viennacl::traits::opencl_handle(d_A),
149  cl_uint(viennacl::traits::start1(d_A)), cl_uint(viennacl::traits::start2(d_A)),
150  cl_uint(viennacl::traits::stride1(d_A)), cl_uint(viennacl::traits::stride2(d_A)),
151  cl_uint(viennacl::traits::size1(d_A)), cl_uint(viennacl::traits::size2(d_A)),
153  viennacl::traits::opencl_handle(y),
154  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
155  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
156  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
158 }
159 
169 template<typename NumericT, unsigned int AlignmentV>
173  viennacl::op_trans > const & d_A,
175 
176  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_A).context());
179  detail::sparse_dense_matmult_kernel_name(true, d_A.lhs().row_major(), y.row_major()));
180 
181  viennacl::ocl::enqueue(k(sp_A.handle1().opencl_handle(), sp_A.handle2().opencl_handle(), sp_A.handle().opencl_handle(),
182  viennacl::traits::opencl_handle(d_A.lhs()),
183  cl_uint(viennacl::traits::start1(d_A.lhs())), cl_uint(viennacl::traits::start2(d_A.lhs())),
184  cl_uint(viennacl::traits::stride1(d_A.lhs())), cl_uint(viennacl::traits::stride2(d_A.lhs())),
185  cl_uint(viennacl::traits::size1(d_A.lhs())), cl_uint(viennacl::traits::size2(d_A.lhs())),
186  cl_uint(viennacl::traits::internal_size1(d_A.lhs())), cl_uint(viennacl::traits::internal_size2(d_A.lhs())),
187  viennacl::traits::opencl_handle(y),
188  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
189  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
190  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
192 }
193 
194 
195 
196 // triangular solvers
197 
203 template<typename NumericT, unsigned int MAT_AlignmentV>
207 {
208  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
211 
212  k.local_work_size(0, 128);
214  viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
215  viennacl::traits::opencl_handle(x),
216  cl_uint(L.size1())
217  )
218  );
219 }
220 
226 template<typename NumericT, unsigned int AlignmentV>
230 {
231  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L).context());
233 
235 
236  k.local_work_size(0, 128);
238  viennacl::ocl::enqueue(k(L.handle1().opencl_handle(), L.handle2().opencl_handle(), L.handle().opencl_handle(),
239  viennacl::traits::opencl_handle(x),
240  cl_uint(L.size1())
241  )
242  );
243 }
244 
245 
251 template<typename NumericT, unsigned int AlignmentV>
255 {
256  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U).context());
259 
260  k.local_work_size(0, 128);
262  viennacl::ocl::enqueue(k(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(),
263  viennacl::traits::opencl_handle(x),
264  cl_uint(U.size1())
265  )
266  );
267 }
268 
274 template<typename NumericT, unsigned int AlignmentV>
278 {
279  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U).context());
281 
283 
284  k.local_work_size(0, 128);
286  viennacl::ocl::enqueue(k(U.handle1().opencl_handle(), U.handle2().opencl_handle(), U.handle().opencl_handle(),
287  viennacl::traits::opencl_handle(x),
288  cl_uint(U.size1())
289  )
290  );
291 }
292 
293 
294 
295 
296 
297 // transposed triangular solvers
298 
299 namespace detail
300 {
301  //
302  // block solves
303  //
304  template<typename NumericT, unsigned int AlignmentV>
307  op_trans> & L,
308  viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks,
309  vector_base<NumericT> const & /* L_diagonal */, //ignored
312  {
313  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(L.lhs()).context());
316  block_solve_kernel.global_work_size(0, num_blocks * block_solve_kernel.local_work_size(0));
317 
318  viennacl::ocl::enqueue(block_solve_kernel(L.lhs().handle1().opencl_handle(),
319  L.lhs().handle2().opencl_handle(),
320  L.lhs().handle().opencl_handle(),
321  block_indices.opencl_handle(),
322  x,
323  static_cast<cl_uint>(x.size())));
324  }
325 
326 
327  template<typename NumericT, unsigned int AlignmentV>
330  op_trans> const & U,
331  viennacl::backend::mem_handle const & block_indices, vcl_size_t num_blocks,
332  vector_base<NumericT> const & U_diagonal,
335  {
336  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(U.lhs()).context());
339  block_solve_kernel.global_work_size(0, num_blocks * block_solve_kernel.local_work_size(0));
340 
341  viennacl::ocl::enqueue(block_solve_kernel(U.lhs().handle1().opencl_handle(),
342  U.lhs().handle2().opencl_handle(),
343  U.lhs().handle().opencl_handle(),
344  U_diagonal,
345  block_indices.opencl_handle(),
346  x,
347  static_cast<cl_uint>(x.size())));
348  }
349 
350 
351 }
352 
353 
359 template<typename NumericT, unsigned int AlignmentV>
362  op_trans> const & proxy_L,
365 {
366  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_L.lhs()).context());
369 
370  k.local_work_size(0, 128);
372  viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
373  viennacl::traits::opencl_handle(x),
374  cl_uint(proxy_L.lhs().size1())
375  )
376  );
377 }
378 
379 
385 template<typename NumericT, unsigned int AlignmentV>
388  op_trans> const & proxy_L,
391 {
392  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_L.lhs()).context());
394 
395  viennacl::vector<NumericT> diagonal(x.size());
397 
399 
400  k.local_work_size(0, 128);
401  k.global_work_size(0, k.local_work_size());
402  viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
403  viennacl::traits::opencl_handle(diagonal),
404  viennacl::traits::opencl_handle(x),
405  cl_uint(proxy_L.lhs().size1())
406  )
407  );
408 }
409 
415 template<typename NumericT, unsigned int AlignmentV>
418  op_trans> const & proxy_U,
421 {
422  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_U.lhs()).context());
425 
426  k.local_work_size(0, 128);
428  viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
429  viennacl::traits::opencl_handle(x),
430  cl_uint(proxy_U.lhs().size1())
431  )
432  );
433 }
434 
435 
441 template<typename NumericT, unsigned int AlignmentV>
444  op_trans> const & proxy_U,
447 {
448  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(proxy_U.lhs()).context());
450 
451  viennacl::vector<NumericT> diagonal(x.size());
453 
455 
456  k.local_work_size(0, 128);
457  k.global_work_size(0, k.local_work_size());
458  viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
459  viennacl::traits::opencl_handle(diagonal),
460  viennacl::traits::opencl_handle(x),
461  cl_uint(proxy_U.lhs().size1())
462  )
463  );
464 }
465 
466 
467 //
468 // Compressed Compressed matrix
469 //
470 
479 template<typename NumericT>
483 {
484  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
487 
488  y.clear();
489 
491  layout_x.start = cl_uint(viennacl::traits::start(x));
492  layout_x.stride = cl_uint(viennacl::traits::stride(x));
493  layout_x.size = cl_uint(viennacl::traits::size(x));
494  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
495 
497  layout_y.start = cl_uint(viennacl::traits::start(y));
498  layout_y.stride = cl_uint(viennacl::traits::stride(y));
499  layout_y.size = cl_uint(viennacl::traits::size(y));
500  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
501 
502  viennacl::ocl::enqueue(k(A.handle1().opencl_handle(), A.handle3().opencl_handle(), A.handle2().opencl_handle(), A.handle().opencl_handle(), cl_uint(A.nnz1()),
503  x, layout_x,
504  y, layout_y
505  ));
506 }
507 
508 
509 //
510 // Coordinate matrix
511 //
512 
513 namespace detail
514 {
515  template<typename NumericT, unsigned int AlignmentV>
519  {
520  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
523  unsigned int thread_num = 128; //k.local_work_size(0);
524 
525  row_info_kernel.local_work_size(0, thread_num);
526 
527  row_info_kernel.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
528  viennacl::ocl::enqueue(row_info_kernel(A.handle12().opencl_handle(), A.handle().opencl_handle(), A.handle3().opencl_handle(),
529  viennacl::traits::opencl_handle(x),
530  cl_uint(info_selector),
531  viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num),
532  viennacl::ocl::local_mem(sizeof(NumericT)*thread_num)) );
533  }
534 }
535 
544 template<typename NumericT, unsigned int AlignmentV>
548 {
549  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
551 
552  y.clear();
553 
555  layout_x.start = cl_uint(viennacl::traits::start(x));
556  layout_x.stride = cl_uint(viennacl::traits::stride(x));
557  layout_x.size = cl_uint(viennacl::traits::size(x));
558  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
559 
561  layout_y.start = cl_uint(viennacl::traits::start(y));
562  layout_y.stride = cl_uint(viennacl::traits::stride(y));
563  layout_y.size = cl_uint(viennacl::traits::size(y));
564  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
565 
566  //std::cout << "prod(coordinate_matrix" << AlignmentV << ", vector) called with internal_nnz=" << A.internal_nnz() << std::endl;
567 
569  unsigned int thread_num = 128; //k.local_work_size(0);
570 
571  k.local_work_size(0, thread_num);
572 
573  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
574  //k.global_work_size(0, thread_num); //Only one work group
575  viennacl::ocl::enqueue(k(A.handle12().opencl_handle(), A.handle().opencl_handle(), A.handle3().opencl_handle(),
576  viennacl::traits::opencl_handle(x),
577  layout_x,
578  viennacl::traits::opencl_handle(y),
579  layout_y,
580  viennacl::ocl::local_mem(sizeof(cl_uint)*thread_num),
581  viennacl::ocl::local_mem(sizeof(NumericT)*thread_num)) );
582 
583 }
584 
585 
594 template<typename NumericT, unsigned int AlignmentV>
598 {
599  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
601 
604 
605  y.clear();
606 
607  unsigned int thread_num = 128; //k.local_work_size(0);
608  k.local_work_size(0, thread_num);
609  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
610 
611  viennacl::ocl::enqueue(k(A.handle12().opencl_handle(), A.handle().opencl_handle(), A.handle3().opencl_handle(),
612  viennacl::traits::opencl_handle(d_A),
613  cl_uint(viennacl::traits::start1(d_A)), cl_uint(viennacl::traits::start2(d_A)),
614  cl_uint(viennacl::traits::stride1(d_A)), cl_uint(viennacl::traits::stride2(d_A)),
615  cl_uint(viennacl::traits::size1(d_A)), cl_uint(viennacl::traits::size2(d_A)),
617  viennacl::traits::opencl_handle(y),
618  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
619  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
620  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
622  viennacl::ocl::local_mem(sizeof(cl_uint)*k.local_work_size(0)),
623  viennacl::ocl::local_mem(sizeof(NumericT)*k.local_work_size(0))) );
624 
625 }
626 
635 template<typename NumericT, unsigned int AlignmentV>
639  viennacl::op_trans > const & d_A,
641 {
642  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
644 
646  detail::sparse_dense_matmult_kernel_name(true, d_A.lhs().row_major(), y.row_major()));
647 
648  y.clear();
649 
650  unsigned int thread_num = 128; //k.local_work_size(0);
651  k.local_work_size(0, thread_num);
652  k.global_work_size(0, 64 * thread_num); //64 work groups are hard-coded for now. Gives reasonable performance in most cases
653 
654  viennacl::ocl::enqueue(k(A.handle12().opencl_handle(), A.handle().opencl_handle(), A.handle3().opencl_handle(),
655  viennacl::traits::opencl_handle(d_A),
656  cl_uint(viennacl::traits::start1(d_A.lhs())), cl_uint(viennacl::traits::start2(d_A.lhs())),
657  cl_uint(viennacl::traits::stride1(d_A.lhs())), cl_uint(viennacl::traits::stride2(d_A.lhs())),
658  cl_uint(viennacl::traits::size1(d_A.lhs())), cl_uint(viennacl::traits::size2(d_A.lhs())),
659  cl_uint(viennacl::traits::internal_size1(d_A.lhs())), cl_uint(viennacl::traits::internal_size2(d_A.lhs())),
660  viennacl::traits::opencl_handle(y),
661  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
662  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
663  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
665  viennacl::ocl::local_mem(sizeof(cl_uint)*k.local_work_size(0)),
666  viennacl::ocl::local_mem(sizeof(NumericT)*k.local_work_size(0))) );
667 
668 }
669 
670 
671 //
672 // ELL Matrix
673 //
674 
675 template<typename NumericT, unsigned int AlignmentV>
679 {
680  assert(A.size1() == y.size());
681  assert(A.size2() == x.size());
682 
683  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
685  y.clear();
686 
688  layout_x.start = cl_uint(viennacl::traits::start(x));
689  layout_x.stride = cl_uint(viennacl::traits::stride(x));
690  layout_x.size = cl_uint(viennacl::traits::size(x));
691  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
692 
694  layout_y.start = cl_uint(viennacl::traits::start(y));
695  layout_y.stride = cl_uint(viennacl::traits::stride(y));
696  layout_y.size = cl_uint(viennacl::traits::size(y));
697  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
698 
699  std::stringstream ss;
700  ss << "vec_mul_" << 1;//(AlignmentV != 1?4:1);
702 
703  unsigned int thread_num = 128;
704  unsigned int group_num = 256;
705 
706  k.local_work_size(0, thread_num);
707  k.global_work_size(0, thread_num * group_num);
708 
709  viennacl::ocl::enqueue(k(A.handle2().opencl_handle(),
710  A.handle().opencl_handle(),
711  viennacl::traits::opencl_handle(x),
712  layout_x,
713  viennacl::traits::opencl_handle(y),
714  layout_y,
715  cl_uint(A.size1()),
716  cl_uint(A.size2()),
717  cl_uint(A.internal_size1()),
718  cl_uint(A.maxnnz()),
719  cl_uint(A.internal_maxnnz())
720  )
721  );
722 
723 
724 }
725 
735 template<typename NumericT, unsigned int AlignmentV>
739 
740  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_A).context());
744 
745  //unsigned int thread_num = 128;
746  //unsigned int group_num = 256;
747  //
748  //k.local_work_size(0, thread_num);
749  //k.global_work_size(0, thread_num * group_num);
750 
751  viennacl::ocl::enqueue(k(sp_A.handle2().opencl_handle(), sp_A.handle().opencl_handle(),
752  cl_uint(sp_A.size1()),
753  cl_uint(sp_A.size2()),
754  cl_uint(sp_A.internal_size1()),
755  cl_uint(sp_A.maxnnz()),
756  cl_uint(sp_A.internal_maxnnz()),
757  viennacl::traits::opencl_handle(d_A),
758  cl_uint(viennacl::traits::start1(d_A)), cl_uint(viennacl::traits::start2(d_A)),
759  cl_uint(viennacl::traits::stride1(d_A)), cl_uint(viennacl::traits::stride2(d_A)),
760  cl_uint(viennacl::traits::size1(d_A)), cl_uint(viennacl::traits::size2(d_A)),
762  viennacl::traits::opencl_handle(y),
763  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
764  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
765  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
767  )
768  );
769 }
770 
780 template<typename NumericT, unsigned int AlignmentV>
784  viennacl::op_trans > const & d_A,
786 
787  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(sp_A).context());
790  detail::sparse_dense_matmult_kernel_name(true, d_A.lhs().row_major(), y.row_major()));
791 
792  //unsigned int thread_num = 128;
793  //unsigned int group_num = 256;
794  //
795  //k.local_work_size(0, thread_num);
796  //k.global_work_size(0, thread_num * group_num);
797 
798  viennacl::ocl::enqueue(k(sp_A.handle2().opencl_handle(), sp_A.handle().opencl_handle(),
799  cl_uint(sp_A.size1()),
800  cl_uint(sp_A.size2()),
801  cl_uint(sp_A.internal_size1()),
802  cl_uint(sp_A.maxnnz()),
803  cl_uint(sp_A.internal_maxnnz()),
804  viennacl::traits::opencl_handle(d_A.lhs()),
805  cl_uint(viennacl::traits::start1(d_A.lhs())), cl_uint(viennacl::traits::start2(d_A.lhs())),
806  cl_uint(viennacl::traits::stride1(d_A.lhs())), cl_uint(viennacl::traits::stride2(d_A.lhs())),
807  cl_uint(viennacl::traits::size1(d_A.lhs())), cl_uint(viennacl::traits::size2(d_A.lhs())),
808  cl_uint(viennacl::traits::internal_size1(d_A.lhs())), cl_uint(viennacl::traits::internal_size2(d_A.lhs())),
809  viennacl::traits::opencl_handle(y),
810  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
811  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
812  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
814  )
815  );
816 }
817 
818 //
819 // SELL-C-\sigma Matrix
820 //
821 
822 template<typename ScalarT, typename IndexT>
826 {
827  assert(A.size1() == y.size());
828  assert(A.size2() == x.size());
829 
830  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
832  y.clear();
833 
835  layout_x.start = cl_uint(viennacl::traits::start(x));
836  layout_x.stride = cl_uint(viennacl::traits::stride(x));
837  layout_x.size = cl_uint(viennacl::traits::size(x));
838  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
839 
841  layout_y.start = cl_uint(viennacl::traits::start(y));
842  layout_y.stride = cl_uint(viennacl::traits::stride(y));
843  layout_y.size = cl_uint(viennacl::traits::size(y));
844  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
845 
846  std::stringstream ss;
847  ss << "vec_mul_" << 1;//(AlignmentV != 1?4:1);
849 
850  vcl_size_t thread_num = A.rows_per_block();
851  unsigned int group_num = 256;
852 
853  k.local_work_size(0, thread_num);
854  k.global_work_size(0, thread_num * group_num);
855 
856  viennacl::ocl::enqueue(k(A.handle1().opencl_handle(),
857  A.handle2().opencl_handle(),
858  A.handle3().opencl_handle(),
859  A.handle().opencl_handle(),
860  viennacl::traits::opencl_handle(x),
861  layout_x,
862  viennacl::traits::opencl_handle(y),
863  layout_y)
864  );
865 }
866 
867 
868 //
869 // Hybrid Matrix
870 //
871 
872 template<typename NumericT, unsigned int AlignmentV>
876 {
877  assert(A.size1() == y.size());
878  assert(A.size2() == x.size());
879 
880  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
882 
884  layout_x.start = cl_uint(viennacl::traits::start(x));
885  layout_x.stride = cl_uint(viennacl::traits::stride(x));
886  layout_x.size = cl_uint(viennacl::traits::size(x));
887  layout_x.internal_size = cl_uint(viennacl::traits::internal_size(x));
888 
890  layout_y.start = cl_uint(viennacl::traits::start(y));
891  layout_y.stride = cl_uint(viennacl::traits::stride(y));
892  layout_y.size = cl_uint(viennacl::traits::size(y));
893  layout_y.internal_size = cl_uint(viennacl::traits::internal_size(y));
894 
896 
897  viennacl::ocl::enqueue(k(A.handle2().opencl_handle(),
898  A.handle().opencl_handle(),
899  A.handle3().opencl_handle(),
900  A.handle4().opencl_handle(),
901  A.handle5().opencl_handle(),
902  viennacl::traits::opencl_handle(x),
903  layout_x,
904  viennacl::traits::opencl_handle(y),
905  layout_y,
906  cl_uint(A.size1()),
907  cl_uint(A.internal_size1()),
908  cl_uint(A.ell_nnz()),
909  cl_uint(A.internal_ellnnz())
910  )
911  );
912 }
913 
914 template<typename NumericT, unsigned int AlignmentV>
918 {
919  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
923 
924  viennacl::ocl::enqueue(k(A.handle2().opencl_handle(),
925  A.handle().opencl_handle(),
926  A.handle3().opencl_handle(),
927  A.handle4().opencl_handle(),
928  A.handle5().opencl_handle(),
929  cl_uint(A.size1()),
930  cl_uint(A.internal_size1()),
931  cl_uint(A.ell_nnz()),
932  cl_uint(A.internal_ellnnz()),
933  viennacl::traits::opencl_handle(d_A),
934  cl_uint(viennacl::traits::start1(d_A)), cl_uint(viennacl::traits::start2(d_A)),
935  cl_uint(viennacl::traits::stride1(d_A)), cl_uint(viennacl::traits::stride2(d_A)),
936  cl_uint(viennacl::traits::size1(d_A)), cl_uint(viennacl::traits::size2(d_A)),
938  viennacl::traits::opencl_handle(y),
939  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
940  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
941  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
943  )
944  );
945 }
946 
947 template<typename NumericT, unsigned int AlignmentV>
951  viennacl::op_trans > const & d_A,
953 {
954  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context());
957  detail::sparse_dense_matmult_kernel_name(true, d_A.lhs().row_major(), y.row_major()));
958 
959  viennacl::ocl::enqueue(k(A.handle2().opencl_handle(),
960  A.handle().opencl_handle(),
961  A.handle3().opencl_handle(),
962  A.handle4().opencl_handle(),
963  A.handle5().opencl_handle(),
964  cl_uint(A.size1()),
965  cl_uint(A.internal_size1()),
966  cl_uint(A.ell_nnz()),
967  cl_uint(A.internal_ellnnz()),
968  viennacl::traits::opencl_handle(d_A.lhs()),
969  cl_uint(viennacl::traits::start1(d_A.lhs())), cl_uint(viennacl::traits::start2(d_A.lhs())),
970  cl_uint(viennacl::traits::stride1(d_A.lhs())), cl_uint(viennacl::traits::stride2(d_A.lhs())),
971  cl_uint(viennacl::traits::size1(d_A.lhs())), cl_uint(viennacl::traits::size2(d_A.lhs())),
972  cl_uint(viennacl::traits::internal_size1(d_A.lhs())), cl_uint(viennacl::traits::internal_size2(d_A.lhs())),
973  viennacl::traits::opencl_handle(y),
974  cl_uint(viennacl::traits::start1(y)), cl_uint(viennacl::traits::start2(y)),
975  cl_uint(viennacl::traits::stride1(y)), cl_uint(viennacl::traits::stride2(y)),
976  cl_uint(viennacl::traits::size1(y)), cl_uint(viennacl::traits::size2(y)),
978  )
979  );
980 }
981 
982 
983 } // namespace opencl
984 } //namespace linalg
985 } //namespace viennacl
986 
987 
988 #endif
vcl_size_t internal_ellnnz() const
Definition: hyb_matrix.hpp:101
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:405
cl_uint stride
Increment between integers.
Definition: kernel.hpp:50
static void init(viennacl::ocl::context &ctx)
vcl_size_t size1() const
Definition: ell_matrix.hpp:91
viennacl::ocl::device const & current_device() const
Returns the current device.
Definition: context.hpp:111
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Definition: kernel.hpp:45
handle_type & handle2()
Definition: ell_matrix.hpp:103
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
const handle_type & handle3() const
Definition: hyb_matrix.hpp:107
const vcl_size_t & size1() const
Returns the number of rows.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
cl_uint start
Starting value of the integer stride.
Definition: kernel.hpp:48
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Various little tools used here and there in ViennaCL.
const handle_type & handle() const
Definition: hyb_matrix.hpp:105
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:742
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices...
Definition: common.hpp:70
A tag class representing a lower triangular matrix.
Definition: forwards.h:809
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
Main kernel class for generating OpenCL kernels for coordinate_matrix.
vcl_size_t internal_size1() const
Definition: hyb_matrix.hpp:95
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:340
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
vcl_size_t size2() const
Definition: ell_matrix.hpp:92
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
const handle_type & handle4() const
Definition: hyb_matrix.hpp:108
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
vcl_size_t rows_per_block() const
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
Definition: kernel.hpp:54
vcl_size_t internal_size1() const
Definition: ell_matrix.hpp:88
Common implementations shared by OpenCL-based operations.
Main kernel class for generating OpenCL kernels for ell_matrix.
Definition: ell_matrix.hpp:156
const handle_type & handle2() const
Definition: hyb_matrix.hpp:106
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Main kernel class for generating OpenCL kernels for compressed_matrix.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:607
OpenCL kernel file for compressed_matrix operations.
A tag class representing an upper triangular matrix.
Definition: forwards.h:814
OpenCL kernel file for ell_matrix operations.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:402
void clear()
Resets all entries to zero.
Definition: matrix.hpp:597
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
vcl_size_t size2() const
Definition: hyb_matrix.hpp:99
std::size_t vcl_size_t
Definition: forwards.h:74
Main kernel class for generating OpenCL kernels for ell_matrix.
OpenCL kernel file for sliced_ell_matrix operations.
vcl_size_t maxnnz() const
Definition: ell_matrix.hpp:95
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
OpenCL kernel file for hyb_matrix operations.
handle_type & handle()
Definition: ell_matrix.hpp:100
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B.
const handle_type & handle3() const
Returns the OpenCL handle to the row block array.
void clear()
Resets all entries to zero. Does not change the size of the vector.
Definition: vector.hpp:861
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:239
static void init(viennacl::ocl::context &ctx)
void init()
Definition: Random.hpp:25
OpenCL kernel file for vector operations.
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
vcl_size_t ell_nnz() const
Definition: hyb_matrix.hpp:102
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:819
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:751
OpenCL kernel file for coordinate_matrix operations.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
A tag class representing transposed matrices.
Definition: forwards.h:219
A sparse square matrix in compressed sparse rows format.
const handle_type & handle5() const
Definition: hyb_matrix.hpp:109
vcl_size_t size1() const
Definition: hyb_matrix.hpp:98
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &block_indices, vcl_size_t num_blocks, vector_base< NumericT > const &, vector_base< NumericT > &x, viennacl::linalg::unit_lower_tag)
static void init(viennacl::ocl::context &ctx)
Definition: hyb_matrix.hpp:183
const vcl_size_t & blocks1() const
Returns the internal number of row blocks for an adaptive SpMV.
vcl_size_t internal_maxnnz() const
Definition: ell_matrix.hpp:94
Implementation of the ViennaCL scalar class.
static void init(viennacl::ocl::context &ctx)
Definition: ell_matrix.hpp:163
void prod_impl(const matrix_base< NumericT > &A, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
Definition: device.hpp:483
A tag class representing an upper triangular matrix with unit diagonal.
Definition: forwards.h:824
Main kernel class for generating OpenCL kernels for compressed_compressed_matrix. ...
cl_uint size
Number of values in the stride.
Definition: kernel.hpp:52
Main kernel class for generating OpenCL kernels for hyb_matrix.
Definition: hyb_matrix.hpp:176
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void row_info(compressed_matrix< NumericT, AlignmentV > const &A, vector_base< NumericT > &x, viennacl::linalg::detail::row_info_types info_selector)