1 #ifndef VIENNACL_LINALG_OPENCL_ITERATIVE_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_OPENCL_ITERATIVE_OPERATIONS_HPP_
51 template<
typename NumericT>
78 template<
typename NumericT>
90 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
106 buffer_size_per_vector,
114 template<
typename NumericT>
124 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
129 unsigned int thread_num = 256;
142 buffer_size_per_vector,
148 template<
typename NumericT>
158 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
162 unsigned int thread_num = 128;
163 unsigned int group_num = 256;
175 A.
handle().opencl_handle(),
179 viennacl::traits::opencl_handle(p),
180 viennacl::traits::opencl_handle(Ap),
183 buffer_size_per_vector,
190 template<
typename NumericT>
200 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
204 unsigned int thread_num =
static_cast<unsigned int>(A.
rows_per_block());
205 unsigned int group_num = 256;
213 A.
handle().opencl_handle(),
214 viennacl::traits::opencl_handle(p),
215 viennacl::traits::opencl_handle(Ap),
218 buffer_size_per_vector,
226 template<
typename NumericT>
236 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
240 unsigned int thread_num = 128;
241 unsigned int group_num = 128;
253 A.
handle().opencl_handle(),
260 viennacl::traits::opencl_handle(p),
261 viennacl::traits::opencl_handle(Ap),
264 buffer_size_per_vector,
274 template<
typename NumericT>
297 cl_uint chunk_size = cl_uint(buffer_chunk_size);
298 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
300 inner_prod_buffer, chunk_size, chunk_offset, vec_size,
305 template<
typename NumericT>
312 (void)buffer_chunk_size;
339 template<
typename NumericT>
354 cl_uint chunk_size = cl_uint(buffer_chunk_size);
355 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
371 inner_prod_buffer, chunk_size, chunk_offset,
380 template<
typename NumericT>
393 cl_uint chunk_size = cl_uint(buffer_chunk_size);
394 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
399 unsigned int thread_num = 256;
412 inner_prod_buffer, chunk_size, chunk_offset,
419 template<
typename NumericT>
432 cl_uint chunk_size = cl_uint(buffer_chunk_size);
433 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
437 unsigned int thread_num = 128;
438 unsigned int group_num = 128;
450 A.
handle().opencl_handle(),
454 viennacl::traits::opencl_handle(p),
455 viennacl::traits::opencl_handle(Ap),
458 inner_prod_buffer, chunk_size, chunk_offset,
466 template<
typename NumericT>
479 cl_uint chunk_size = cl_uint(buffer_chunk_size);
480 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
484 unsigned int thread_num =
static_cast<unsigned int>(A.
rows_per_block());
493 A.
handle().opencl_handle(),
494 viennacl::traits::opencl_handle(p),
495 viennacl::traits::opencl_handle(Ap),
498 inner_prod_buffer, chunk_size, chunk_offset,
507 template<
typename NumericT>
520 cl_uint chunk_size = cl_uint(buffer_chunk_size);
521 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
525 unsigned int thread_num = 256;
526 unsigned int group_num = 128;
538 A.
handle().opencl_handle(),
545 viennacl::traits::opencl_handle(p),
546 viennacl::traits::opencl_handle(Ap),
549 inner_prod_buffer, chunk_size, chunk_offset,
566 template <
typename T>
584 cl_uint size_vk = cl_uint(v_k.
size());
586 cl_uint R_offset = cl_uint(offset_in_R);
587 cl_uint chunk_size = cl_uint(buffer_chunk_size);
588 cl_uint chunk_offset = cl_uint(buffer_chunk_offset);
592 inner_prod_buffer, chunk_size,
593 r_dot_vk_buffer, chunk_offset,
599 template <
typename T>
615 cl_uint size_vk = cl_uint(v_k_size);
616 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
617 cl_uint ocl_k = cl_uint(param_k);
618 cl_uint chunk_size = cl_uint(buffer_chunk_size);
620 vi_in_vk_buffer, chunk_size
624 template <
typename T>
643 cl_uint size_vk = cl_uint(v_k_size);
644 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
645 cl_uint ocl_k = cl_uint(param_k);
646 cl_uint chunk_size = cl_uint(buffer_chunk_size);
647 cl_uint ocl_krylov_dim = cl_uint(krylov_dim);
649 vi_in_vk_buffer, chunk_size,
650 R_buffer, ocl_krylov_dim,
656 template <
typename T>
673 cl_uint size_vk = cl_uint(v_k_size);
674 cl_uint internal_size_vk = cl_uint(v_k_internal_size);
675 cl_uint ocl_k = cl_uint(param_k);
678 krylov_basis, size_vk, internal_size_vk,
684 template <
typename T>
696 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
714 buffer_size_per_vector,
722 template <
typename T>
732 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
737 inner_prod_buffer.
clear();
740 unsigned int thread_num = 128;
753 buffer_size_per_vector,
759 template <
typename T>
769 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
776 unsigned int group_num = 128;
782 A.
handle().opencl_handle(),
786 viennacl::traits::opencl_handle(p), start_p,
787 viennacl::traits::opencl_handle(Ap), start_Ap,
790 buffer_size_per_vector,
797 template <
typename T>
807 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
813 unsigned int thread_num =
static_cast<unsigned int>(A.
rows_per_block());
814 unsigned int group_num = 128;
822 A.
handle().opencl_handle(),
823 viennacl::traits::opencl_handle(p), start_p,
824 viennacl::traits::opencl_handle(Ap), start_Ap,
827 buffer_size_per_vector,
835 template <
typename T>
845 cl_uint buffer_size_per_vector = cl_uint(inner_prod_buffer.
size()) / cl_uint(3);
852 unsigned int group_num = 128;
859 A.
handle().opencl_handle(),
866 viennacl::traits::opencl_handle(p), start_p,
867 viennacl::traits::opencl_handle(Ap), start_Ap,
870 buffer_size_per_vector,
vcl_size_t internal_ellnnz() const
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
viennacl::ocl::device const & current_device() const
Returns the current device.
Main kernel class for generating specialized OpenCL kernels for fast iterative solvers.
Represents an OpenCL device within ViennaCL.
void pipelined_bicgstab_prod(compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Generic size and resize functionality for different vector and matrix types.
const handle_type & handle3() const
Represents an OpenCL kernel within ViennaCL.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
static void init(viennacl::ocl::context &ctx)
const handle_type & handle() const
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
const handle_type & handle12() const
Returns the OpenCL handle to the (row, column) index array.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
vcl_size_t internal_size1() const
void pipelined_gmres_gram_schmidt_stage2(vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
This file provides the forward declarations for the main types used within ViennaCL.
Determines row and column increments for matrices and matrix proxies.
const handle_type & handle4() const
cl_uint vendor_id() const
A unique device vendor identifier. An example of a unique device identifier could be the PCIe ID...
vcl_size_t rows_per_block() const
void pipelined_gmres_normalize_vk(vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
vcl_size_t internal_size1() const
Common implementations shared by OpenCL-based operations.
const handle_type & handle2() const
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
OpenCL kernel file for specialized iterative solver kernels.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Implementation of a smart-pointer-like class for handling OpenCL handles.
void pipelined_cg_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, NumericT beta, vector_base< NumericT > &inner_prod_buffer)
result_of::size_type< T >::type start(T const &obj)
void pipelined_bicgstab_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
vcl_size_t maxnnz() const
const handle_type & handle3() const
Returns the OpenCL handle to the group start index array.
void pipelined_gmres_gram_schmidt_stage1(vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t param_k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void pipelined_bicgstab_update_s(vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
const handle_type & handle3() const
Returns the OpenCL handle to the row block array.
void clear()
Resets all entries to zero. Does not change the size of the vector.
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Representation of an OpenCL kernel in ViennaCL.
size_type size() const
Returns the length of the vector (cf. std::vector)
vcl_size_t ell_nnz() const
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
void pipelined_cg_prod(compressed_matrix< NumericT > const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > &inner_prod_buffer)
Forward declarations of the implicit_vector_base, vector_base class.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
const handle_type & handle5() const
void pipelined_gmres_update_result(vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t param_k)
const vcl_size_t & blocks1() const
Returns the internal number of row blocks for an adaptive SpMV.
vcl_size_t internal_maxnnz() const
Implementation of the ViennaCL scalar class.
void pipelined_gmres_prod(compressed_matrix< T > const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
Simple enable-if variant that uses the SFINAE pattern.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...