1 #ifndef VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_SPARSE_MATRIX_OPERATIONS_HPP_
53 template<
typename NumericT,
unsigned int AlignmentV>
63 viennacl::traits::opencl_handle(x),
65 cl_uint(info_selector)
79 template<
typename NumericT,
unsigned int AlignmentV>
88 unsigned int alignment = AlignmentV;
108 if (alignment == 4 || alignment == 8)
137 template<
typename NumericT,
unsigned int AlignmentV>
148 viennacl::traits::opencl_handle(d_A),
153 viennacl::traits::opencl_handle(y),
169 template<
typename NumericT,
unsigned int AlignmentV>
182 viennacl::traits::opencl_handle(d_A.lhs()),
187 viennacl::traits::opencl_handle(y),
203 template<
typename NumericT,
unsigned int MAT_AlignmentV>
215 viennacl::traits::opencl_handle(x),
226 template<
typename NumericT,
unsigned int AlignmentV>
239 viennacl::traits::opencl_handle(x),
251 template<
typename NumericT,
unsigned int AlignmentV>
263 viennacl::traits::opencl_handle(x),
274 template<
typename NumericT,
unsigned int AlignmentV>
287 viennacl::traits::opencl_handle(x),
304 template<
typename NumericT,
unsigned int AlignmentV>
319 L.lhs().handle2().opencl_handle(),
320 L.lhs().handle().opencl_handle(),
321 block_indices.opencl_handle(),
323 static_cast<cl_uint
>(x.
size())));
327 template<
typename NumericT,
unsigned int AlignmentV>
342 U.lhs().handle2().opencl_handle(),
343 U.lhs().handle().opencl_handle(),
345 block_indices.opencl_handle(),
347 static_cast<cl_uint
>(x.
size())));
359 template<
typename NumericT,
unsigned int AlignmentV>
372 viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
373 viennacl::traits::opencl_handle(x),
374 cl_uint(proxy_L.lhs().size1())
385 template<
typename NumericT,
unsigned int AlignmentV>
400 k.local_work_size(0, 128);
401 k.global_work_size(0, k.local_work_size());
402 viennacl::ocl::enqueue(k(proxy_L.lhs().handle1().opencl_handle(), proxy_L.lhs().handle2().opencl_handle(), proxy_L.lhs().handle().opencl_handle(),
403 viennacl::traits::opencl_handle(diagonal),
404 viennacl::traits::opencl_handle(x),
405 cl_uint(proxy_L.lhs().size1())
415 template<
typename NumericT,
unsigned int AlignmentV>
428 viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
429 viennacl::traits::opencl_handle(x),
430 cl_uint(proxy_U.lhs().size1())
441 template<
typename NumericT,
unsigned int AlignmentV>
456 k.local_work_size(0, 128);
457 k.global_work_size(0, k.local_work_size());
458 viennacl::ocl::enqueue(k(proxy_U.lhs().handle1().opencl_handle(), proxy_U.lhs().handle2().opencl_handle(), proxy_U.lhs().handle().opencl_handle(),
459 viennacl::traits::opencl_handle(diagonal),
460 viennacl::traits::opencl_handle(x),
461 cl_uint(proxy_U.lhs().size1())
479 template<
typename NumericT>
515 template<
typename NumericT,
unsigned int AlignmentV>
523 unsigned int thread_num = 128;
529 viennacl::traits::opencl_handle(x),
530 cl_uint(info_selector),
544 template<
typename NumericT,
unsigned int AlignmentV>
569 unsigned int thread_num = 128;
576 viennacl::traits::opencl_handle(x),
578 viennacl::traits::opencl_handle(y),
594 template<
typename NumericT,
unsigned int AlignmentV>
607 unsigned int thread_num = 128;
612 viennacl::traits::opencl_handle(d_A),
617 viennacl::traits::opencl_handle(y),
635 template<
typename NumericT,
unsigned int AlignmentV>
650 unsigned int thread_num = 128;
655 viennacl::traits::opencl_handle(d_A),
660 viennacl::traits::opencl_handle(y),
675 template<
typename NumericT,
unsigned int AlignmentV>
699 std::stringstream ss;
700 ss <<
"vec_mul_" << 1;
703 unsigned int thread_num = 128;
704 unsigned int group_num = 256;
710 A.
handle().opencl_handle(),
711 viennacl::traits::opencl_handle(x),
713 viennacl::traits::opencl_handle(y),
735 template<
typename NumericT,
unsigned int AlignmentV>
752 cl_uint(sp_A.
size1()),
753 cl_uint(sp_A.
size2()),
757 viennacl::traits::opencl_handle(d_A),
762 viennacl::traits::opencl_handle(y),
780 template<
typename NumericT,
unsigned int AlignmentV>
799 cl_uint(sp_A.
size1()),
800 cl_uint(sp_A.
size2()),
804 viennacl::traits::opencl_handle(d_A.lhs()),
809 viennacl::traits::opencl_handle(y),
822 template<
typename ScalarT,
typename IndexT>
846 std::stringstream ss;
847 ss <<
"vec_mul_" << 1;
851 unsigned int group_num = 256;
859 A.
handle().opencl_handle(),
860 viennacl::traits::opencl_handle(x),
862 viennacl::traits::opencl_handle(y),
872 template<
typename NumericT,
unsigned int AlignmentV>
898 A.
handle().opencl_handle(),
902 viennacl::traits::opencl_handle(x),
904 viennacl::traits::opencl_handle(y),
914 template<
typename NumericT,
unsigned int AlignmentV>
925 A.
handle().opencl_handle(),
933 viennacl::traits::opencl_handle(d_A),
938 viennacl::traits::opencl_handle(y),
947 template<
typename NumericT,
unsigned int AlignmentV>
960 A.
handle().opencl_handle(),
968 viennacl::traits::opencl_handle(d_A.lhs()),
973 viennacl::traits::opencl_handle(y),
vcl_size_t internal_ellnnz() const
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
cl_uint stride
Increment between integers.
static void init(viennacl::ocl::context &ctx)
viennacl::ocl::device const & current_device() const
Returns the current device.
Helper class for packing four cl_uint numbers into a uint4 type for access inside an OpenCL kernel...
Represents an OpenCL device within ViennaCL.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
const handle_type & handle3() const
const vcl_size_t & size1() const
Returns the number of rows.
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Represents an OpenCL kernel within ViennaCL.
cl_uint start
Starting value of the integer stride.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
const handle_type & handle() const
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
std::string sparse_dense_matmult_kernel_name(bool B_transposed, bool B_row_major, bool C_row_major)
Returns the OpenCL kernel string for the operation C = A * B with A sparse, B, C dense matrices...
A tag class representing a lower triangular matrix.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Main kernel class for generating OpenCL kernels for coordinate_matrix.
vcl_size_t internal_size1() const
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
const handle_type & handle4() const
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
vcl_size_t rows_per_block() const
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
cl_uint internal_size
Internal length of the buffer. Might be larger than 'size' due to padding.
vcl_size_t internal_size1() const
Common implementations shared by OpenCL-based operations.
Main kernel class for generating OpenCL kernels for ell_matrix.
const handle_type & handle2() const
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Main kernel class for generating OpenCL kernels for compressed_matrix.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
static void init(viennacl::ocl::context &ctx)
OpenCL kernel file for compressed_matrix operations.
A tag class representing an upper triangular matrix.
OpenCL kernel file for ell_matrix operations.
Sparse matrix class using the sliced ELLPACK with parameters C, .
void clear()
Resets all entries to zero.
const handle_type & handle3() const
Returns the OpenCL handle to the row index array.
Implementation of a smart-pointer-like class for handling OpenCL handles.
result_of::size_type< T >::type start(T const &obj)
A sparse square matrix in compressed sparse rows format optimized for the case that only a few rows c...
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
Main kernel class for generating OpenCL kernels for ell_matrix.
OpenCL kernel file for sliced_ell_matrix operations.
vcl_size_t maxnnz() const
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
OpenCL kernel file for hyb_matrix operations.
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for dense triangular systems. Matlab notation: A \ B.
const handle_type & handle3() const
Returns the OpenCL handle to the row block array.
void clear()
Resets all entries to zero. Does not change the size of the vector.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
static void init(viennacl::ocl::context &ctx)
OpenCL kernel file for vector operations.
size_type size() const
Returns the length of the vector (cf. std::vector)
const vcl_size_t & nnz1() const
Returns the number of nonzero entries.
vcl_size_t ell_nnz() const
A tag class representing a lower triangular matrix with unit diagonal.
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
OpenCL kernel file for coordinate_matrix operations.
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
A tag class representing transposed matrices.
A sparse square matrix in compressed sparse rows format.
const handle_type & handle5() const
void block_inplace_solve(const matrix_expression< const compressed_matrix< NumericT, AlignmentV >, const compressed_matrix< NumericT, AlignmentV >, op_trans > &L, viennacl::backend::mem_handle const &block_indices, vcl_size_t num_blocks, vector_base< NumericT > const &, vector_base< NumericT > &x, viennacl::linalg::unit_lower_tag)
static void init(viennacl::ocl::context &ctx)
const vcl_size_t & blocks1() const
Returns the internal number of row blocks for an adaptive SpMV.
vcl_size_t internal_maxnnz() const
Implementation of the ViennaCL scalar class.
static void init(viennacl::ocl::context &ctx)
void prod_impl(const matrix_base< NumericT > &A, bool trans_A, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
size_t max_work_group_size() const
Maximum number of work-items in a work-group executing a kernel using the data parallel execution mod...
A tag class representing an upper triangular matrix with unit diagonal.
Main kernel class for generating OpenCL kernels for compressed_compressed_matrix. ...
cl_uint size
Number of values in the stride.
Main kernel class for generating OpenCL kernels for hyb_matrix.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void row_info(compressed_matrix< NumericT, AlignmentV > const &A, vector_base< NumericT > &x, viennacl::linalg::detail::row_info_types info_selector)