ViennaCL - The Vienna Computing Library
1.5.2
|
Holds all CUDA compute kernels used by ViennaCL. More...
Namespaces | |
namespace | detail |
Helper functions for the CUDA linear algebra backend. | |
Data Structures | |
struct | mat_mult_matrix_index |
Helper struct for accessing an element of a row- or column-major matrix. More... | |
Functions | |
template<typename T > | |
__global__ void | matrix_matrix_upper_solve_kernel (const T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, bool transpose_A, T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool transpose_B, bool unit_diagonal) |
template<typename T > | |
__global__ void | matrix_matrix_lower_solve_kernel (const T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, bool row_major_A, bool transpose_A, T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_size1, unsigned int B_size2, unsigned int B_internal_size1, unsigned int B_internal_size2, bool row_major_B, bool transpose_B, bool unit_diagonal) |
template<typename NumericT , typename F1 , typename F2 , typename SOLVERTAG > | |
void | inplace_solve (const matrix_base< NumericT, F1 > &A, matrix_base< NumericT, F2 > &B, SOLVERTAG tag) |
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation) | |
template<typename NumericT , typename F1 , typename F2 , typename SOLVERTAG > | |
void | inplace_solve (const matrix_base< NumericT, F1 > &A, matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > proxy_B, SOLVERTAG tag) |
Direct inplace solver for triangular systems with multiple transposed right hand sides, i.e. A \ B^T (MATLAB notation) | |
template<typename NumericT , typename F1 , typename F2 , typename SOLVERTAG > | |
void | inplace_solve (const matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &proxy_A, matrix_base< NumericT, F2 > &B, SOLVERTAG tag) |
Direct inplace solver for transposed triangular systems with multiple right hand sides, i.e. A^T \ B (MATLAB notation) | |
template<typename NumericT , typename F1 , typename F2 , typename SOLVERTAG > | |
void | inplace_solve (const matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &proxy_A, matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > proxy_B, SOLVERTAG tag) |
Direct inplace solver for transposed triangular systems with multiple transposed right hand sides, i.e. A^T \ B^T (MATLAB notation) | |
template<typename T > | |
__global__ void | triangular_substitute_inplace_row_kernel (T const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options) |
template<typename T > | |
__global__ void | triangular_substitute_inplace_col_kernel (T const *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, unsigned int options) |
template<typename NumericT , typename F , typename SOLVERTAG > | |
void | inplace_solve (const matrix_base< NumericT, F > &mat, vector_base< NumericT > &vec, SOLVERTAG) |
Direct inplace solver for dense triangular systems (non-transposed version) | |
template<typename NumericT , typename F , typename SOLVERTAG > | |
void | inplace_solve (const matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_trans > &proxy, vector_base< NumericT > &vec, SOLVERTAG) |
Direct inplace solver for dense triangular systems (transposed version) | |
template<typename NumericT , typename F , typename ScalarType1 > | |
void | am (matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename NumericT , typename F , typename ScalarType1 , typename ScalarType2 > | |
void | ambm (matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT , typename F , typename ScalarType1 , typename ScalarType2 > | |
void | ambm_m (matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename NumericT , typename F > | |
void | matrix_assign (matrix_base< NumericT, F > &mat, NumericT s, bool clear=false) |
template<typename NumericT , typename F > | |
void | matrix_diagonal_assign (matrix_base< NumericT, F > &mat, NumericT s) |
template<typename NumericT , typename F > | |
void | matrix_diag_from_vector (const vector_base< NumericT > &vec, int k, matrix_base< NumericT, F > &mat) |
template<typename NumericT , typename F > | |
void | matrix_diag_to_vector (const matrix_base< NumericT, F > &mat, int k, vector_base< NumericT > &vec) |
template<typename NumericT , typename F > | |
void | matrix_row (const matrix_base< NumericT, F > &mat, unsigned int i, vector_base< NumericT > &vec) |
template<typename NumericT , typename F > | |
void | matrix_column (const matrix_base< NumericT, F > &mat, unsigned int j, vector_base< NumericT > &vec) |
template<typename T , typename F , typename OP > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_binary< OP > > const &proxy) |
template<typename F , typename OP > | |
void | element_op (matrix_base< float, F > &A, matrix_expression< const matrix_base< float, F >, const matrix_base< float, F >, op_element_binary< OP > > const &proxy) |
template<typename F , typename OP > | |
void | element_op (matrix_base< double, F > &A, matrix_expression< const matrix_base< double, F >, const matrix_base< double, F >, op_element_binary< OP > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_abs > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_acos > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_asin > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_atan > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_ceil > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_cos > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_cosh > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_exp > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_fabs > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_floor > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_log > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_log10 > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sin > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sinh > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sqrt > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_tan > > const &proxy) |
template<typename T , typename F > | |
void | element_op (matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_tanh > > const &proxy) |
template<typename NumericT , typename F > | |
void | prod_impl (const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result) |
Carries out matrix-vector multiplication. | |
template<typename NumericT , typename F > | |
void | prod_impl (const viennacl::matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_trans > &mat_trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result) |
Carries out matrix-vector multiplication with a transposed matrix. | |
template<typename NumericT , typename F1 , typename F2 , typename F3 , typename ScalarType > | |
void | prod_impl (const matrix_base< NumericT, F1 > &A, const matrix_base< NumericT, F2 > &B, matrix_base< NumericT, F3 > &C, ScalarType alpha, ScalarType beta) |
Carries out matrix-matrix multiplication. | |
template<typename NumericT , typename F1 , typename F2 , typename F3 , typename ScalarType > | |
void | prod_impl (const viennacl::matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &A, const matrix_base< NumericT, F2 > &B, matrix_base< NumericT, F3 > &C, ScalarType alpha, ScalarType beta) |
Carries out matrix-matrix multiplication. | |
template<typename NumericT , typename F1 , typename F2 , typename F3 , typename ScalarType > | |
void | prod_impl (const matrix_base< NumericT, F1 > &A, const viennacl::matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > &B, matrix_base< NumericT, F3 > &C, ScalarType alpha, ScalarType beta) |
Carries out matrix-matrix multiplication. | |
template<typename NumericT , typename F1 , typename F2 , typename F3 , typename ScalarType > | |
void | prod_impl (const viennacl::matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > &A, const viennacl::matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > &B, matrix_base< NumericT, F3 > &C, ScalarType alpha, ScalarType beta) |
Carries out matrix-matrix multiplication. | |
template<typename NumericT , typename F , typename S1 > | |
void | scaled_rank_1_update (matrix_base< NumericT, F > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2) |
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update. | |
template<typename T > | |
__global__ void | am_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | am_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | ambm_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_m_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_m_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_m_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_m_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_assign_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T alpha) |
template<typename T > | |
__global__ void | matrix_col_diagonal_assign_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T alpha) |
template<typename T > | |
__global__ void | element_op_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename T > | |
__global__ void | element_op_int_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename T > | |
__global__ void | matrix_col_element_abs_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_acos_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_asin_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_atan_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_ceil_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_cos_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_cosh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_exp_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_fabs_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_floor_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_log_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_log10_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_sin_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_sinh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_sqrt_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_tan_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_col_element_tanh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | vec_mul_col_kernel (const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, T *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename T > | |
__global__ void | trans_vec_mul_col_kernel (const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, T *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename T > | |
__global__ void | scaled_rank1_update_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T val, unsigned int options2, const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename T > | |
__global__ void | scaled_rank1_update_col_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *val, unsigned int options2, const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename T > | |
__global__ void | matrix_matrix_col_col_col_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_col_col_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_col_col_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_col_col_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_col_col_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_col_col_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_col_col_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_col_col_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_col_row_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_col_row_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_col_row_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_col_row_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_col_row_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_col_row_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_col_row_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_col_row_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_row_col_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_row_col_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_row_col_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_row_col_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_row_col_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_row_col_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_row_col_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_row_col_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_row_row_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_row_row_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_row_row_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_col_row_row_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_row_row_prod_AA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_row_row_prod_AT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_row_row_prod_TA_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | matrix_matrix_row_row_row_prod_TT_kernel (T alpha, const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *B, unsigned int B_row_start, unsigned int B_col_start, unsigned int B_row_inc, unsigned int B_col_inc, unsigned int B_row_size, unsigned int B_col_size, unsigned int B_internal_rows, unsigned int B_internal_cols, T beta, T *C, unsigned int C_row_start, unsigned int C_col_start, unsigned int C_row_inc, unsigned int C_col_inc, unsigned int C_row_size, unsigned int C_col_size, unsigned int C_internal_rows, unsigned int C_internal_cols) |
template<typename T > | |
__global__ void | am_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | am_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | ambm_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_m_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_m_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_m_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, T fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | ambm_m_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *fac2, unsigned int options2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *fac3, unsigned int options3, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_assign_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T alpha) |
template<typename T > | |
__global__ void | matrix_row_diagonal_assign_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T alpha) |
template<typename T > | |
__global__ void | element_op_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename T > | |
__global__ void | element_op_int_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2, const T *C, unsigned int C_start1, unsigned int C_start2, unsigned int C_inc1, unsigned int C_inc2, unsigned int C_internal_size1, unsigned int C_internal_size2, unsigned int op_type) |
template<typename T > | |
__global__ void | matrix_row_element_abs_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_acos_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_asin_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_atan_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_ceil_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_cos_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_cosh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_exp_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_fabs_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_floor_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_log_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_log10_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_sin_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_sinh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_sqrt_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_tan_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | matrix_row_element_tanh_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *B, unsigned int B_start1, unsigned int B_start2, unsigned int B_inc1, unsigned int B_inc2, unsigned int B_internal_size1, unsigned int B_internal_size2) |
template<typename T > | |
__global__ void | vec_mul_row_kernel (const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, T *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename T > | |
__global__ void | trans_vec_mul_row_kernel (const T *A, unsigned int A_row_start, unsigned int A_col_start, unsigned int A_row_inc, unsigned int A_col_inc, unsigned int A_row_size, unsigned int A_col_size, unsigned int A_internal_rows, unsigned int A_internal_cols, const T *v, unsigned int v_start, unsigned int v_inc, unsigned int v_size, T *result, unsigned int result_start, unsigned int result_inc, unsigned int result_size) |
template<typename T > | |
__global__ void | scaled_rank1_update_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, T val, unsigned int options2, const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename T > | |
__global__ void | scaled_rank1_update_row_kernel (T *A, unsigned int A_start1, unsigned int A_start2, unsigned int A_inc1, unsigned int A_inc2, unsigned int A_size1, unsigned int A_size2, unsigned int A_internal_size1, unsigned int A_internal_size2, const T *val, unsigned int options2, const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2) |
template<typename T > | |
__global__ void | as_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2) |
template<typename T > | |
__global__ void | as_kernel (T *s1, T fac2, unsigned int options2, const T *s2) |
template<typename S1 , typename S2 , typename ScalarType1 > | |
viennacl::enable_if < viennacl::is_scalar< S1 > ::value &&viennacl::is_scalar < S2 >::value &&viennacl::is_any_scalar < ScalarType1 >::value >::type | as (S1 &s1, S2 const &s2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename T > | |
__global__ void | asbs_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3) |
template<typename T > | |
__global__ void | asbs_kernel (T *s1, T fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3) |
template<typename T > | |
__global__ void | asbs_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2, T fac3, unsigned int options3, const T *s3) |
template<typename T > | |
__global__ void | asbs_kernel (T *s1, T fac2, unsigned int options2, const T *s2, T fac3, unsigned int options3, const T *s3) |
template<typename S1 , typename S2 , typename ScalarType1 , typename S3 , typename ScalarType2 > | |
viennacl::enable_if < viennacl::is_scalar< S1 > ::value &&viennacl::is_scalar < S2 >::value &&viennacl::is_scalar< S3 > ::value &&viennacl::is_any_scalar < ScalarType1 >::value &&viennacl::is_any_scalar < ScalarType2 >::value >::type | asbs (S1 &s1, S2 const &s2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, S3 const &s3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename T > | |
__global__ void | asbs_s_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3) |
template<typename T > | |
__global__ void | asbs_s_kernel (T *s1, T fac2, unsigned int options2, const T *s2, const T *fac3, unsigned int options3, const T *s3) |
template<typename T > | |
__global__ void | asbs_s_kernel (T *s1, const T *fac2, unsigned int options2, const T *s2, T fac3, unsigned int options3, const T *s3) |
template<typename T > | |
__global__ void | asbs_s_kernel (T *s1, T fac2, unsigned int options2, const T *s2, T fac3, unsigned int options3, const T *s3) |
template<typename S1 , typename S2 , typename ScalarType1 , typename S3 , typename ScalarType2 > | |
viennacl::enable_if < viennacl::is_scalar< S1 > ::value &&viennacl::is_scalar < S2 >::value &&viennacl::is_scalar< S3 > ::value &&viennacl::is_any_scalar < ScalarType1 >::value &&viennacl::is_any_scalar < ScalarType2 >::value >::type | asbs_s (S1 &s1, S2 const &s2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, S3 const &s3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename T > | |
__global__ void | scalar_swap_kernel (T *s1, T *s2) |
template<typename S1 , typename S2 > | |
viennacl::enable_if < viennacl::is_scalar< S1 > ::value &&viennacl::is_scalar < S2 >::value >::type | swap (S1 &s1, S2 &s2) |
Swaps the contents of two scalars, data is copied. | |
template<typename T > | |
__global__ void | compressed_matrix_vec_mul_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
template<class ScalarType , unsigned int ALIGNMENT> | |
void | prod_impl (const viennacl::compressed_matrix< ScalarType, ALIGNMENT > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result) |
Carries out matrix-vector multiplication with a compressed_matrix. | |
template<typename DMatIndexT , typename ResultIndexT , typename T > | |
__global__ void | compressed_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const T *sp_mat_elements, const T *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, T *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename TYPE , unsigned int ALIGNMENT, typename F1 , typename F2 > | |
void | prod_impl (const viennacl::compressed_matrix< TYPE, ALIGNMENT > &sp_mat, const viennacl::matrix_base< TYPE, F1 > &d_mat, viennacl::matrix_base< TYPE, F2 > &result) |
Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed. | |
template<typename DMatIndexT , typename ResultIndexT , typename T > | |
__global__ void | compressed_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_row_indices, const unsigned int *sp_mat_col_indices, const T *sp_mat_elements, const T *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, T *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename TYPE , unsigned int ALIGNMENT, typename F1 , typename F2 > | |
void | prod_impl (const viennacl::compressed_matrix< TYPE, ALIGNMENT > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< TYPE, F1 >, const viennacl::matrix_base< TYPE, F1 >, viennacl::op_trans > &d_mat, viennacl::matrix_base< TYPE, F2 > &result) |
Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed. | |
template<typename T > | |
__global__ void | compressed_matrix_diagonal_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *result, unsigned int size) |
template<typename SparseMatrixType , class ScalarType > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixType >::value > ::type | inplace_solve (const SparseMatrixType &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::unit_lower_tag) |
Carries out triangular inplace solves. | |
template<typename SparseMatrixType , class ScalarType > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixType >::value > ::type | inplace_solve (const SparseMatrixType &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::lower_tag) |
Carries out triangular inplace solves. | |
template<typename SparseMatrixType , class ScalarType > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixType >::value > ::type | inplace_solve (const SparseMatrixType &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::unit_upper_tag) |
Carries out triangular inplace solves. | |
template<typename SparseMatrixType , class ScalarType > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixType >::value > ::type | inplace_solve (const SparseMatrixType &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::upper_tag) |
Carries out triangular inplace solves. | |
template<typename SparseMatrixType , class ScalarType > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixType >::value > ::type | inplace_solve (const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::unit_lower_tag) |
Carries out triangular inplace solves. | |
template<typename SparseMatrixType , class ScalarType > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixType >::value > ::type | inplace_solve (const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::lower_tag) |
Carries out triangular inplace solves. | |
template<typename SparseMatrixType , class ScalarType > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixType >::value > ::type | inplace_solve (const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::unit_upper_tag) |
Carries out triangular inplace solves. | |
template<typename SparseMatrixType , class ScalarType > | |
viennacl::enable_if < viennacl::is_any_sparse_matrix < SparseMatrixType >::value > ::type | inplace_solve (const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > &mat, viennacl::vector_base< ScalarType > &vec, viennacl::linalg::upper_tag) |
Carries out triangular inplace solves. | |
template<typename T > | |
__global__ void | compressed_compressed_matrix_vec_mul_kernel (const unsigned int *row_jumper, const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, unsigned int nonzero_rows, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result, unsigned int size_result) |
template<class ScalarType > | |
void | prod_impl (const viennacl::compressed_compressed_matrix< ScalarType > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result) |
Carries out matrix-vector multiplication with a compressed_compressed_matrix. | |
template<typename T > | |
__global__ void | coordinate_matrix_vec_mul_kernel (const unsigned int *coords, const T *elements, const unsigned int *group_boundaries, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result) |
template<class ScalarType , unsigned int ALIGNMENT> | |
void | prod_impl (const viennacl::coordinate_matrix< ScalarType, ALIGNMENT > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result) |
Carries out matrix-vector multiplication with a coordinate_matrix. | |
template<typename DMatIndexT , typename ResultIndexT , typename ScalarType , typename NumericT > | |
__global__ void | coordinate_matrix_d_mat_mul_kernel (const unsigned int *coords, const ScalarType *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int ALIGNMENT, typename F1 , typename F2 > | |
void | prod_impl (const viennacl::coordinate_matrix< NumericT, ALIGNMENT > &sp_mat, const viennacl::matrix_base< NumericT, F1 > &d_mat, viennacl::matrix_base< NumericT, F2 > &result) |
Carries out Compressed Matrix(COO)-Dense Matrix multiplication. | |
template<typename DMatIndexT , typename ResultIndexT , typename ScalarType , typename NumericT > | |
__global__ void | coordinate_matrix_d_tr_mat_mul_kernel (const unsigned int *coords, const ScalarType *elements, const unsigned int *group_boundaries, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<class ScalarType , unsigned int ALIGNMENT, class NumericT , typename F1 , typename F2 > | |
void | prod_impl (const viennacl::coordinate_matrix< ScalarType, ALIGNMENT > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT, F2 > &result) |
Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication. | |
template<typename T > | |
__global__ void | ell_matrix_vec_mul_kernel (const unsigned int *coords, const T *elements, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int col_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row) |
template<class ScalarType , unsigned int ALIGNMENT> | |
void | prod_impl (const viennacl::ell_matrix< ScalarType, ALIGNMENT > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result) |
Carries out matrix-vector multiplication with a ell_matrix. | |
template<typename DMatIndexT , typename ResultIndexT , typename ScalarType , typename NumericT > | |
__global__ void | ell_matrix_d_mat_mul_kernel (const unsigned int *sp_mat_coords, const ScalarType *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<class ScalarType , unsigned int ALIGNMENT, class NumericT , typename F1 , typename F2 > | |
void | prod_impl (const viennacl::ell_matrix< ScalarType, ALIGNMENT > &sp_mat, const viennacl::matrix_base< NumericT, F1 > &d_mat, viennacl::matrix_base< NumericT, F2 > &result) |
Carries out Sparse Matrix(ELL)-Dense Matrix multiplication. | |
template<typename DMatIndexT , typename ResultIndexT , typename ScalarType , typename NumericT > | |
__global__ void | ell_matrix_d_tr_mat_mul_kernel (const unsigned int *sp_mat_coords, const ScalarType *sp_mat_elements, unsigned int sp_mat_row_num, unsigned int sp_mat_col_num, unsigned int sp_mat_internal_row_num, unsigned int sp_mat_items_per_row, unsigned int sp_mat_aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<class ScalarType , unsigned int ALIGNMENT, class NumericT , typename F1 , typename F2 > | |
void | prod_impl (const viennacl::ell_matrix< ScalarType, ALIGNMENT > &sp_mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT, F2 > &result) |
Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication. | |
template<typename T > | |
__global__ void | hyb_matrix_vec_mul_kernel (const unsigned int *ell_coords, const T *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const T *csr_elements, const T *x, unsigned int start_x, unsigned int inc_x, T *result, unsigned int start_result, unsigned int inc_result, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row) |
template<class ScalarType , unsigned int ALIGNMENT> | |
void | prod_impl (const viennacl::hyb_matrix< ScalarType, ALIGNMENT > &mat, const viennacl::vector_base< ScalarType > &vec, viennacl::vector_base< ScalarType > &result) |
Carries out matrix-vector multiplication with a hyb_matrix. | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | hyb_matrix_d_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int ALIGNMENT, typename F1 , typename F2 > | |
void | prod_impl (const viennacl::hyb_matrix< NumericT, ALIGNMENT > &mat, const viennacl::matrix_base< NumericT, F1 > &d_mat, viennacl::matrix_base< NumericT, F2 > &result) |
Carries out matrix-vector multiplication with a hyb_matrix. | |
template<typename DMatIndexT , typename ResultIndexT , typename NumericT > | |
__global__ void | hyb_matrix_d_tr_mat_mul_kernel (const unsigned int *ell_coords, const NumericT *ell_elements, const unsigned int *csr_rows, const unsigned int *csr_cols, const NumericT *csr_elements, unsigned int row_num, unsigned int internal_row_num, unsigned int items_per_row, unsigned int aligned_items_per_row, const NumericT *d_mat, unsigned int d_mat_row_start, unsigned int d_mat_col_start, unsigned int d_mat_row_inc, unsigned int d_mat_col_inc, unsigned int d_mat_row_size, unsigned int d_mat_col_size, unsigned int d_mat_internal_rows, unsigned int d_mat_internal_cols, NumericT *result, unsigned int result_row_start, unsigned int result_col_start, unsigned int result_row_inc, unsigned int result_col_inc, unsigned int result_row_size, unsigned int result_col_size, unsigned int result_internal_rows, unsigned int result_internal_cols) |
template<typename NumericT , unsigned int ALIGNMENT, typename F1 , typename F2 > | |
void | prod_impl (const viennacl::hyb_matrix< NumericT, ALIGNMENT > &mat, const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > &d_mat, viennacl::matrix_base< NumericT, F2 > &result) |
Carries out matrix-vector multiplication with a hyb_matrix. | |
template<typename T > | |
__global__ void | csr_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_trans_lu_forward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_trans_unit_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_trans_lu_forward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, const T *diagonal_entries, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_trans_unit_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_trans_lu_backward_kernel2 (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, const T *diagonal_entries, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_trans_lu_backward_kernel (const unsigned int *row_indices, const unsigned int *column_indices, const T *elements, const T *diagonal_entries, T *vector, unsigned int size) |
template<typename T > | |
__global__ void | csr_block_trans_unit_lu_forward (const unsigned int *row_jumper_L, const unsigned int *column_indices_L, const T *elements_L, const unsigned int *block_offsets, T *result, unsigned int size) |
template<typename T > | |
__global__ void | csr_block_trans_lu_backward (const unsigned int *row_jumper_U, const unsigned int *column_indices_U, const T *elements_U, const T *diagonal_U, const unsigned int *block_offsets, T *result, unsigned int size) |
template<typename T > | |
__global__ void | av_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
__global__ void | av_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2) |
template<typename T , typename ScalarType1 > | |
void | av (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) |
template<typename T > | |
__global__ void | avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T , typename ScalarType1 , typename ScalarType2 > | |
void | avbv (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename T > | |
__global__ void | avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, const T *fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T > | |
__global__ void | avbv_v_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T fac2, unsigned int options2, const T *vec2, unsigned int start2, unsigned int inc2, T fac3, unsigned int options3, const T *vec3, unsigned int start3, unsigned int inc3) |
template<typename T , typename ScalarType1 , typename ScalarType2 > | |
void | avbv_v (vector_base< T > &vec1, vector_base< T > const &vec2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< T > const &vec3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) |
template<typename T > | |
__global__ void | vector_assign_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int internal_size1, T alpha) |
template<typename T , typename S1 > | |
void | vector_assign (vector_base< T > &vec1, const S1 &alpha, bool up_to_internal_size=false) |
Assign a constant value to a vector (-range/-slice) | |
template<typename T > | |
__global__ void | vector_swap_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | vector_swap (vector_base< T > &vec1, vector_base< T > &vec2) |
Swaps the contents of two vectors, data is copied. | |
template<typename T > | |
__global__ void | element_op_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2, T const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename T > | |
__global__ void | element_op_int_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2, T const *vec3, unsigned int start3, unsigned int inc3, unsigned int op_type) |
template<typename T , typename OP > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< OP > > const &proxy) |
Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax) | |
template<typename OP > | |
void | element_op (vector_base< float > &vec1, vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OP > > const &proxy) |
template<typename OP > | |
void | element_op (vector_base< double > &vec1, vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OP > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_acos_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_acos > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_asin_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_asin > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_atan_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_atan > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_ceil_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_ceil > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_cos_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cos > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_cosh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cosh > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_exp_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_exp > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_fabs_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_fabs > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_abs_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_abs > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_floor_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_floor > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_log_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_log10_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log10 > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_sin_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sin > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_sinh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sinh > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_sqrt_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sqrt > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_tan_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tan > > const &proxy) |
template<typename T > | |
__global__ void | vec_element_tanh_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T const *vec2, unsigned int start2, unsigned int inc2) |
template<typename T > | |
void | element_op (vector_base< T > &vec1, vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tanh > > const &proxy) |
template<typename T > | |
__global__ void | inner_prod_kernel (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, const T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, T *group_buffer) |
template<typename T > | |
__global__ void | vector_sum_kernel_floats (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result) |
template<typename T > | |
__global__ void | vector_sum_kernel_integers (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result) |
template<typename T > | |
__global__ void | vector_sum_kernel_unsigned_integers (const T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int option, T *result) |
template<typename T , typename S3 > | |
void | inner_prod_impl (vector_base< T > const &vec1, vector_base< T > const &vec2, S3 &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). | |
template<typename T > | |
void | inner_prod_cpu (vector_base< T > const &vec1, vector_base< T > const &vec2, T &result) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2). | |
template<typename NumericT > | |
__global__ void | inner_prod_2_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_3_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_4_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, NumericT *group_results) |
template<typename NumericT > | |
__global__ void | inner_prod_8_kernel (const NumericT *x, unsigned int startx, unsigned int stridex, unsigned int sizex, const NumericT *y0, unsigned int start0, unsigned int stride0, const NumericT *y1, unsigned int start1, unsigned int stride1, const NumericT *y2, unsigned int start2, unsigned int stride2, const NumericT *y3, unsigned int start3, unsigned int stride3, const NumericT *y4, unsigned int start4, unsigned int stride4, const NumericT *y5, unsigned int start5, unsigned int stride5, const NumericT *y6, unsigned int start6, unsigned int stride6, const NumericT *y7, unsigned int start7, unsigned int stride7, NumericT *group_results) |
template<typename T > | |
__global__ void | vector_multi_sum_kernel (T const *vec1, T *result, unsigned int start_result, unsigned int inc_result) |
template<typename T > | |
void | inner_prod_impl (vector_base< T > const &x, vector_tuple< T > const &vec_tuple, vector_base< T > &result) |
template<typename T > | |
__global__ void | norm_kernel_floats (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer) |
template<typename T > | |
__global__ void | norm_kernel_integers (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer) |
template<typename T > | |
__global__ void | norm_kernel_unsigned_integers (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int norm_selector, T *group_buffer) |
template<typename T > | |
void | norm_1_impl (vector_base< T > const &vec1, scalar< T > &result) |
Computes the l^1-norm of a vector. | |
template<typename T > | |
void | norm_1_cpu (vector_base< T > const &vec1, T &result) |
Computes the l^1-norm of a vector. | |
template<typename T > | |
void | norm_2_impl (vector_base< T > const &vec1, scalar< T > &result) |
Computes the l^2-norm of a vector - implementation. | |
template<typename T > | |
void | norm_2_cpu (vector_base< T > const &vec1, T &result) |
Computes the l^2-norm of a vector - implementation. | |
template<typename T > | |
void | norm_inf_impl (vector_base< T > const &vec1, scalar< T > &result) |
Computes the supremum-norm of a vector. | |
template<typename T > | |
void | norm_inf_cpu (vector_base< T > const &vec1, T &result) |
Computes the supremum-norm of a vector. | |
template<typename T > | |
__device__ T | cuda_abs (T val) |
__device__ unsigned long | cuda_abs (unsigned long val) |
__device__ unsigned int | cuda_abs (unsigned int val) |
__device__ unsigned short | cuda_abs (unsigned short val) |
__device__ unsigned char | cuda_abs (unsigned char val) |
template<typename T > | |
__global__ void | index_norm_inf_kernel (const T *vec, unsigned int start1, unsigned int inc1, unsigned int size1, unsigned int *result) |
template<typename T > | |
vcl_size_t | index_norm_inf (vector_base< T > const &vec1) |
Computes the index of the first entry that is equal to the supremum-norm in modulus. | |
template<typename T > | |
__global__ void | plane_rotation_kernel (T *vec1, unsigned int start1, unsigned int inc1, unsigned int size1, T *vec2, unsigned int start2, unsigned int inc2, unsigned int size2, T alpha, T beta) |
template<typename T > | |
void | plane_rotation (vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta) |
Computes a plane rotation of two vectors. |
Holds all CUDA compute kernels used by ViennaCL.
void viennacl::linalg::cuda::am | ( | matrix_base< NumericT, F > & | mat1, |
matrix_base< NumericT, F > const & | mat2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha | ||
) |
__global__ void viennacl::linalg::cuda::am_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::am_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::am_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::am_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
void viennacl::linalg::cuda::ambm | ( | matrix_base< NumericT, F > & | mat1, |
matrix_base< NumericT, F > const & | mat2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
matrix_base< NumericT, F > const & | mat3, | ||
ScalarType2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
void viennacl::linalg::cuda::ambm_m | ( | matrix_base< NumericT, F > & | mat1, |
matrix_base< NumericT, F > const & | mat2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
matrix_base< NumericT, F > const & | mat3, | ||
ScalarType2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_m_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_m_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::ambm_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2 | ||
) |
viennacl::enable_if< viennacl::is_scalar<S1>::value && viennacl::is_scalar<S2>::value && viennacl::is_any_scalar<ScalarType1>::value >::type viennacl::linalg::cuda::as | ( | S1 & | s1, |
S2 const & | s2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha | ||
) |
__global__ void viennacl::linalg::cuda::as_kernel | ( | T * | s1, |
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | s2 | ||
) |
__global__ void viennacl::linalg::cuda::as_kernel | ( | T * | s1, |
T | fac2, | ||
unsigned int | options2, | ||
const T * | s2 | ||
) |
viennacl::enable_if< viennacl::is_scalar<S1>::value && viennacl::is_scalar<S2>::value && viennacl::is_scalar<S3>::value && viennacl::is_any_scalar<ScalarType1>::value && viennacl::is_any_scalar<ScalarType2>::value >::type viennacl::linalg::cuda::asbs | ( | S1 & | s1, |
S2 const & | s2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
S3 const & | s3, | ||
ScalarType2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | T * | s1, |
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | s2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | s3 | ||
) |
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | T * | s1, |
T | fac2, | ||
unsigned int | options2, | ||
const T * | s2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | s3 | ||
) |
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | T * | s1, |
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | s2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | s3 | ||
) |
__global__ void viennacl::linalg::cuda::asbs_kernel | ( | T * | s1, |
T | fac2, | ||
unsigned int | options2, | ||
const T * | s2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | s3 | ||
) |
viennacl::enable_if< viennacl::is_scalar<S1>::value && viennacl::is_scalar<S2>::value && viennacl::is_scalar<S3>::value && viennacl::is_any_scalar<ScalarType1>::value && viennacl::is_any_scalar<ScalarType2>::value >::type viennacl::linalg::cuda::asbs_s | ( | S1 & | s1, |
S2 const & | s2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
S3 const & | s3, | ||
ScalarType2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | T * | s1, |
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | s2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | s3 | ||
) |
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | T * | s1, |
T | fac2, | ||
unsigned int | options2, | ||
const T * | s2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | s3 | ||
) |
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | T * | s1, |
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | s2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | s3 | ||
) |
__global__ void viennacl::linalg::cuda::asbs_s_kernel | ( | T * | s1, |
T | fac2, | ||
unsigned int | options2, | ||
const T * | s2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | s3 | ||
) |
void viennacl::linalg::cuda::av | ( | vector_base< T > & | vec1, |
vector_base< T > const & | vec2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha | ||
) |
__global__ void viennacl::linalg::cuda::av_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::av_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
void viennacl::linalg::cuda::avbv | ( | vector_base< T > & | vec1, |
vector_base< T > const & | vec2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
vector_base< T > const & | vec3, | ||
ScalarType2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
__global__ void viennacl::linalg::cuda::avbv_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
void viennacl::linalg::cuda::avbv_v | ( | vector_base< T > & | vec1, |
vector_base< T > const & | vec2, | ||
ScalarType1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
vector_base< T > const & | vec3, | ||
ScalarType2 const & | beta, | ||
vcl_size_t | len_beta, | ||
bool | reciprocal_beta, | ||
bool | flip_sign_beta | ||
) |
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
const T * | fac3, | ||
unsigned int | options3, | ||
const T * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
__global__ void viennacl::linalg::cuda::avbv_v_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T | fac2, | ||
unsigned int | options2, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
T | fac3, | ||
unsigned int | options3, | ||
const T * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3 | ||
) |
__global__ void viennacl::linalg::cuda::compressed_compressed_matrix_vec_mul_kernel | ( | const unsigned int * | row_jumper, |
const unsigned int * | row_indices, | ||
const unsigned int * | column_indices, | ||
const T * | elements, | ||
unsigned int | nonzero_rows, | ||
const T * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
T * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | size_result | ||
) |
__global__ void viennacl::linalg::cuda::compressed_matrix_d_mat_mul_kernel | ( | const unsigned int * | sp_mat_row_indices, |
const unsigned int * | sp_mat_col_indices, | ||
const T * | sp_mat_elements, | ||
const T * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
T * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::compressed_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | sp_mat_row_indices, |
const unsigned int * | sp_mat_col_indices, | ||
const T * | sp_mat_elements, | ||
const T * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
T * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::compressed_matrix_diagonal_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
T * | result, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::compressed_matrix_vec_mul_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
const T * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
T * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | size_result | ||
) |
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_mat_mul_kernel | ( | const unsigned int * | coords, |
const ScalarType * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::coordinate_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | coords, |
const ScalarType * | elements, | ||
const unsigned int * | group_boundaries, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::coordinate_matrix_vec_mul_kernel | ( | const unsigned int * | coords, |
const T * | elements, | ||
const unsigned int * | group_boundaries, | ||
const T * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
T * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result | ||
) |
__global__ void viennacl::linalg::cuda::csr_block_trans_lu_backward | ( | const unsigned int * | row_jumper_U, |
const unsigned int * | column_indices_U, | ||
const T * | elements_U, | ||
const T * | diagonal_U, | ||
const unsigned int * | block_offsets, | ||
T * | result, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_block_trans_unit_lu_forward | ( | const unsigned int * | row_jumper_L, |
const unsigned int * | column_indices_L, | ||
const T * | elements_L, | ||
const unsigned int * | block_offsets, | ||
T * | result, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
const T * | diagonal_entries, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_trans_lu_backward_kernel2 | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
const T * | diagonal_entries, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
const T * | diagonal_entries, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_trans_lu_forward_kernel2 | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_trans_unit_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_unit_lu_backward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__global__ void viennacl::linalg::cuda::csr_unit_lu_forward_kernel | ( | const unsigned int * | row_indices, |
const unsigned int * | column_indices, | ||
const T * | elements, | ||
T * | vector, | ||
unsigned int | size | ||
) |
__device__ T viennacl::linalg::cuda::cuda_abs | ( | T | val | ) |
__device__ unsigned long viennacl::linalg::cuda::cuda_abs | ( | unsigned long | val | ) | [inline] |
__device__ unsigned int viennacl::linalg::cuda::cuda_abs | ( | unsigned int | val | ) | [inline] |
__device__ unsigned short viennacl::linalg::cuda::cuda_abs | ( | unsigned short | val | ) | [inline] |
__device__ unsigned char viennacl::linalg::cuda::cuda_abs | ( | unsigned char | val | ) | [inline] |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_binary< OP > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< float, F > & | A, |
matrix_expression< const matrix_base< float, F >, const matrix_base< float, F >, op_element_binary< OP > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< double, F > & | A, |
matrix_expression< const matrix_base< double, F >, const matrix_base< double, F >, op_element_binary< OP > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_abs > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_acos > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_asin > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_atan > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_ceil > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_cos > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_cosh > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< OP > > const & | proxy | ||
) |
Implementation of the element-wise operation v1 = v2 .* v3 and v1 = v2 ./ v3 (using MATLAB syntax)
vec1 | The result vector (or -range, or -slice) |
proxy | The proxy object holding v2, v3 and the operation |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_exp > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< float > & | vec1, |
vector_expression< const vector_base< float >, const vector_base< float >, op_element_binary< OP > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_fabs > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< double > & | vec1, |
vector_expression< const vector_base< double >, const vector_base< double >, op_element_binary< OP > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_floor > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_acos > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_asin > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_log > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_atan > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_log10 > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_ceil > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cos > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sin > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_cosh > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sinh > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_exp > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_fabs > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_sqrt > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_abs > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_tan > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_floor > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | matrix_base< T, F > & | A, |
matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, op_element_unary< op_tanh > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_log10 > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sin > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sinh > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_sqrt > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tan > > const & | proxy | ||
) |
void viennacl::linalg::cuda::element_op | ( | vector_base< T > & | vec1, |
vector_expression< const vector_base< T >, const vector_base< T >, op_element_unary< op_tanh > > const & | proxy | ||
) |
__global__ void viennacl::linalg::cuda::element_op_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
__global__ void viennacl::linalg::cuda::element_op_int_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
__global__ void viennacl::linalg::cuda::element_op_int_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
T const * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3, | ||
unsigned int | op_type | ||
) |
__global__ void viennacl::linalg::cuda::element_op_int_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
__global__ void viennacl::linalg::cuda::element_op_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
T const * | vec3, | ||
unsigned int | start3, | ||
unsigned int | inc3, | ||
unsigned int | op_type | ||
) |
__global__ void viennacl::linalg::cuda::element_op_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
const T * | C, | ||
unsigned int | C_start1, | ||
unsigned int | C_start2, | ||
unsigned int | C_inc1, | ||
unsigned int | C_inc2, | ||
unsigned int | C_internal_size1, | ||
unsigned int | C_internal_size2, | ||
unsigned int | op_type | ||
) |
__global__ void viennacl::linalg::cuda::ell_matrix_d_mat_mul_kernel | ( | const unsigned int * | sp_mat_coords, |
const ScalarType * | sp_mat_elements, | ||
unsigned int | sp_mat_row_num, | ||
unsigned int | sp_mat_col_num, | ||
unsigned int | sp_mat_internal_row_num, | ||
unsigned int | sp_mat_items_per_row, | ||
unsigned int | sp_mat_aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::ell_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | sp_mat_coords, |
const ScalarType * | sp_mat_elements, | ||
unsigned int | sp_mat_row_num, | ||
unsigned int | sp_mat_col_num, | ||
unsigned int | sp_mat_internal_row_num, | ||
unsigned int | sp_mat_items_per_row, | ||
unsigned int | sp_mat_aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::ell_matrix_vec_mul_kernel | ( | const unsigned int * | coords, |
const T * | elements, | ||
const T * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
T * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | row_num, | ||
unsigned int | col_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row | ||
) |
__global__ void viennacl::linalg::cuda::hyb_matrix_d_mat_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | row_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::hyb_matrix_d_tr_mat_mul_kernel | ( | const unsigned int * | ell_coords, |
const NumericT * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const NumericT * | csr_elements, | ||
unsigned int | row_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row, | ||
const NumericT * | d_mat, | ||
unsigned int | d_mat_row_start, | ||
unsigned int | d_mat_col_start, | ||
unsigned int | d_mat_row_inc, | ||
unsigned int | d_mat_col_inc, | ||
unsigned int | d_mat_row_size, | ||
unsigned int | d_mat_col_size, | ||
unsigned int | d_mat_internal_rows, | ||
unsigned int | d_mat_internal_cols, | ||
NumericT * | result, | ||
unsigned int | result_row_start, | ||
unsigned int | result_col_start, | ||
unsigned int | result_row_inc, | ||
unsigned int | result_col_inc, | ||
unsigned int | result_row_size, | ||
unsigned int | result_col_size, | ||
unsigned int | result_internal_rows, | ||
unsigned int | result_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::hyb_matrix_vec_mul_kernel | ( | const unsigned int * | ell_coords, |
const T * | ell_elements, | ||
const unsigned int * | csr_rows, | ||
const unsigned int * | csr_cols, | ||
const T * | csr_elements, | ||
const T * | x, | ||
unsigned int | start_x, | ||
unsigned int | inc_x, | ||
T * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result, | ||
unsigned int | row_num, | ||
unsigned int | internal_row_num, | ||
unsigned int | items_per_row, | ||
unsigned int | aligned_items_per_row | ||
) |
vcl_size_t viennacl::linalg::cuda::index_norm_inf | ( | vector_base< T > const & | vec1 | ) |
Computes the index of the first entry that is equal to the supremum-norm in modulus.
vec1 | The vector |
__global__ void viennacl::linalg::cuda::index_norm_inf_kernel | ( | const T * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int * | result | ||
) |
__global__ void viennacl::linalg::cuda::inner_prod_2_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
NumericT * | group_results | ||
) |
__global__ void viennacl::linalg::cuda::inner_prod_3_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
const NumericT * | y2, | ||
unsigned int | start2, | ||
unsigned int | stride2, | ||
NumericT * | group_results | ||
) |
__global__ void viennacl::linalg::cuda::inner_prod_4_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
const NumericT * | y2, | ||
unsigned int | start2, | ||
unsigned int | stride2, | ||
const NumericT * | y3, | ||
unsigned int | start3, | ||
unsigned int | stride3, | ||
NumericT * | group_results | ||
) |
__global__ void viennacl::linalg::cuda::inner_prod_8_kernel | ( | const NumericT * | x, |
unsigned int | startx, | ||
unsigned int | stridex, | ||
unsigned int | sizex, | ||
const NumericT * | y0, | ||
unsigned int | start0, | ||
unsigned int | stride0, | ||
const NumericT * | y1, | ||
unsigned int | start1, | ||
unsigned int | stride1, | ||
const NumericT * | y2, | ||
unsigned int | start2, | ||
unsigned int | stride2, | ||
const NumericT * | y3, | ||
unsigned int | start3, | ||
unsigned int | stride3, | ||
const NumericT * | y4, | ||
unsigned int | start4, | ||
unsigned int | stride4, | ||
const NumericT * | y5, | ||
unsigned int | start5, | ||
unsigned int | stride5, | ||
const NumericT * | y6, | ||
unsigned int | start6, | ||
unsigned int | stride6, | ||
const NumericT * | y7, | ||
unsigned int | start7, | ||
unsigned int | stride7, | ||
NumericT * | group_results | ||
) |
void viennacl::linalg::cuda::inner_prod_cpu | ( | vector_base< T > const & | vec1, |
vector_base< T > const & | vec2, | ||
T & | result | ||
) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
vec1 | The first vector |
vec2 | The second vector |
result | The result scalar (on the host) |
void viennacl::linalg::cuda::inner_prod_impl | ( | vector_base< T > const & | vec1, |
vector_base< T > const & | vec2, | ||
S3 & | result | ||
) |
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1, vec2).
vec1 | The first vector |
vec2 | The second vector |
result | The result scalar (on the gpu) |
void viennacl::linalg::cuda::inner_prod_impl | ( | vector_base< T > const & | x, |
vector_tuple< T > const & | vec_tuple, | ||
vector_base< T > & | result | ||
) |
__global__ void viennacl::linalg::cuda::inner_prod_kernel | ( | const T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2, | ||
T * | group_buffer | ||
) |
void viennacl::linalg::cuda::inplace_solve | ( | const matrix_base< NumericT, F1 > & | A, |
matrix_base< NumericT, F2 > & | B, | ||
SOLVERTAG | tag | ||
) |
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notation)
A | The system matrix |
B | The matrix of row vectors, where the solution is directly written to |
tag | Solver tag for identifying the respective triangular solver |
void viennacl::linalg::cuda::inplace_solve | ( | const matrix_base< NumericT, F1 > & | A, |
matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > | proxy_B, | ||
SOLVERTAG | tag | ||
) |
Direct inplace solver for triangular systems with multiple transposed right hand sides, i.e. A \ B^T (MATLAB notation)
A | The system matrix |
proxy_B | The proxy for the transposed matrix of row vectors, where the solution is directly written to |
tag | Solver tag for identifying the respective triangular solver |
void viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > & | proxy_A, |
matrix_base< NumericT, F2 > & | B, | ||
SOLVERTAG | tag | ||
) |
Direct inplace solver for transposed triangular systems with multiple right hand sides, i.e. A^T \ B (MATLAB notation)
proxy_A | The transposed system matrix proxy |
B | The matrix holding the load vectors, where the solution is directly written to |
tag | Solver tag for identifying the respective triangular solver |
void viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > & | proxy_A, |
matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > | proxy_B, | ||
SOLVERTAG | tag | ||
) |
Direct inplace solver for transposed triangular systems with multiple transposed right hand sides, i.e. A^T \ B^T (MATLAB notation)
proxy_A | The transposed system matrix proxy |
proxy_B | The transposed matrix holding the load vectors, where the solution is directly written to |
tag | Solver tag for identifying the respective triangular solver |
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixType & | mat, |
viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::linalg::unit_lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixType & | mat, |
viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::linalg::lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixType & | mat, |
viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::linalg::unit_upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const SparseMatrixType & | mat, |
viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::linalg::upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
void viennacl::linalg::cuda::inplace_solve | ( | const matrix_base< NumericT, F > & | mat, |
vector_base< NumericT > & | vec, | ||
SOLVERTAG | |||
) |
Direct inplace solver for dense triangular systems (non-transposed version)
mat | The system matrix proxy |
vec | The load vector, where the solution is directly written to |
void viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_trans > & | proxy, |
vector_base< NumericT > & | vec, | ||
SOLVERTAG | |||
) |
Direct inplace solver for dense triangular systems (transposed version)
proxy | The system matrix proxy |
vec | The load vector, where the solution is directly written to |
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > & | mat, |
viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::linalg::unit_lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > & | mat, |
viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::linalg::lower_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > & | mat, |
viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::linalg::unit_upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
viennacl::enable_if< viennacl::is_any_sparse_matrix<SparseMatrixType>::value>::type viennacl::linalg::cuda::inplace_solve | ( | const matrix_expression< const SparseMatrixType, const SparseMatrixType, op_trans > & | mat, |
viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::linalg::upper_tag | |||
) |
Carries out triangular inplace solves.
mat | The matrix |
vec | The vector holding the right hand side. Is overwritten by the solution. |
void viennacl::linalg::cuda::matrix_assign | ( | matrix_base< NumericT, F > & | mat, |
NumericT | s, | ||
bool | clear = false |
||
) |
__global__ void viennacl::linalg::cuda::matrix_col_assign_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | alpha | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_diagonal_assign_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | alpha | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_abs_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_acos_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_asin_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_atan_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_ceil_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_cos_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_cosh_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_exp_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_fabs_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_floor_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_log10_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_log_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_sin_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_sinh_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_sqrt_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_tan_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_col_element_tanh_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
void viennacl::linalg::cuda::matrix_column | ( | const matrix_base< NumericT, F > & | mat, |
unsigned int | j, | ||
vector_base< NumericT > & | vec | ||
) |
void viennacl::linalg::cuda::matrix_diag_from_vector | ( | const vector_base< NumericT > & | vec, |
int | k, | ||
matrix_base< NumericT, F > & | mat | ||
) |
void viennacl::linalg::cuda::matrix_diag_to_vector | ( | const matrix_base< NumericT, F > & | mat, |
int | k, | ||
vector_base< NumericT > & | vec | ||
) |
void viennacl::linalg::cuda::matrix_diagonal_assign | ( | matrix_base< NumericT, F > & | mat, |
NumericT | s | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_AT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_col_prod_TT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_AT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_col_row_prod_TT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_AT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_col_prod_TT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_AT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_col_row_row_prod_TT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_lower_solve_kernel | ( | const T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
bool | row_major_A, | ||
bool | transpose_A, | ||
T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_size1, | ||
unsigned int | B_size2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
bool | row_major_B, | ||
bool | transpose_B, | ||
bool | unit_diagonal | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_AT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_col_prod_TT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_AT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_col_row_prod_TT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_AT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_col_prod_TT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_AT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TA_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_row_row_row_prod_TT_kernel | ( | T | alpha, |
const T * | A, | ||
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | B, | ||
unsigned int | B_row_start, | ||
unsigned int | B_col_start, | ||
unsigned int | B_row_inc, | ||
unsigned int | B_col_inc, | ||
unsigned int | B_row_size, | ||
unsigned int | B_col_size, | ||
unsigned int | B_internal_rows, | ||
unsigned int | B_internal_cols, | ||
T | beta, | ||
T * | C, | ||
unsigned int | C_row_start, | ||
unsigned int | C_col_start, | ||
unsigned int | C_row_inc, | ||
unsigned int | C_col_inc, | ||
unsigned int | C_row_size, | ||
unsigned int | C_col_size, | ||
unsigned int | C_internal_rows, | ||
unsigned int | C_internal_cols | ||
) |
__global__ void viennacl::linalg::cuda::matrix_matrix_upper_solve_kernel | ( | const T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
bool | row_major_A, | ||
bool | transpose_A, | ||
T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_size1, | ||
unsigned int | B_size2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2, | ||
bool | row_major_B, | ||
bool | transpose_B, | ||
bool | unit_diagonal | ||
) |
void viennacl::linalg::cuda::matrix_row | ( | const matrix_base< NumericT, F > & | mat, |
unsigned int | i, | ||
vector_base< NumericT > & | vec | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_assign_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | alpha | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_diagonal_assign_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | alpha | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_abs_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_acos_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_asin_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_atan_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_ceil_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_cos_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_cosh_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_exp_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_fabs_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_floor_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_log10_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_log_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_sin_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_sinh_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_sqrt_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_tan_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
__global__ void viennacl::linalg::cuda::matrix_row_element_tanh_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | B, | ||
unsigned int | B_start1, | ||
unsigned int | B_start2, | ||
unsigned int | B_inc1, | ||
unsigned int | B_inc2, | ||
unsigned int | B_internal_size1, | ||
unsigned int | B_internal_size2 | ||
) |
void viennacl::linalg::cuda::norm_1_cpu | ( | vector_base< T > const & | vec1, |
T & | result | ||
) |
Computes the l^1-norm of a vector.
vec1 | The vector |
result | The result scalar |
void viennacl::linalg::cuda::norm_1_impl | ( | vector_base< T > const & | vec1, |
scalar< T > & | result | ||
) |
Computes the l^1-norm of a vector.
vec1 | The vector |
result | The result scalar |
void viennacl::linalg::cuda::norm_2_cpu | ( | vector_base< T > const & | vec1, |
T & | result | ||
) |
Computes the l^2-norm of a vector - implementation.
vec1 | The vector |
result | The result scalar |
void viennacl::linalg::cuda::norm_2_impl | ( | vector_base< T > const & | vec1, |
scalar< T > & | result | ||
) |
Computes the l^2-norm of a vector - implementation.
vec1 | The vector |
result | The result scalar |
void viennacl::linalg::cuda::norm_inf_cpu | ( | vector_base< T > const & | vec1, |
T & | result | ||
) |
Computes the supremum-norm of a vector.
vec1 | The vector |
result | The result scalar |
void viennacl::linalg::cuda::norm_inf_impl | ( | vector_base< T > const & | vec1, |
scalar< T > & | result | ||
) |
Computes the supremum-norm of a vector.
vec1 | The vector |
result | The result scalar |
__global__ void viennacl::linalg::cuda::norm_kernel_floats | ( | const T * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | norm_selector, | ||
T * | group_buffer | ||
) |
__global__ void viennacl::linalg::cuda::norm_kernel_integers | ( | const T * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | norm_selector, | ||
T * | group_buffer | ||
) |
__global__ void viennacl::linalg::cuda::norm_kernel_unsigned_integers | ( | const T * | vec, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | norm_selector, | ||
T * | group_buffer | ||
) |
void viennacl::linalg::cuda::plane_rotation | ( | vector_base< T > & | vec1, |
vector_base< T > & | vec2, | ||
T | alpha, | ||
T | beta | ||
) |
Computes a plane rotation of two vectors.
Computes (x,y) <- (alpha * x + beta * y, -beta * x + alpha * y)
vec1 | The first vector |
vec2 | The second vector |
alpha | The first transformation coefficient |
beta | The second transformation coefficient |
__global__ void viennacl::linalg::cuda::plane_rotation_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2, | ||
T | alpha, | ||
T | beta | ||
) |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< ScalarType, ALIGNMENT > & | mat, |
const viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::vector_base< ScalarType > & | result | ||
) |
Carries out matrix-vector multiplication with a compressed_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< TYPE, ALIGNMENT > & | sp_mat, |
const viennacl::matrix_base< TYPE, F1 > & | d_mat, | ||
viennacl::matrix_base< TYPE, F2 > & | result | ||
) |
Carries out sparse_matrix-dense_matrix multiplication first matrix being compressed.
Implementation of the convenience expression result = prod(mat, vec);
sp_mat | The sparse matrix |
d_mat | The dense matrix |
result | The result matrix |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_matrix< TYPE, ALIGNMENT > & | sp_mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< TYPE, F1 >, const viennacl::matrix_base< TYPE, F1 >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< TYPE, F2 > & | result | ||
) |
Carries out matrix-trans(matrix) multiplication first matrix being compressed and the second transposed.
Implementation of the convenience expression result = prod(sp_mat, d_mat);
sp_mat | The sparse matrix |
d_mat | The transposed dense matrix proxy |
result | The result matrix |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::compressed_compressed_matrix< ScalarType > & | mat, |
const viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::vector_base< ScalarType > & | result | ||
) |
Carries out matrix-vector multiplication with a compressed_compressed_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< ScalarType, ALIGNMENT > & | mat, |
const viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::vector_base< ScalarType > & | result | ||
) |
Carries out matrix-vector multiplication with a coordinate_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< NumericT, ALIGNMENT > & | sp_mat, |
const viennacl::matrix_base< NumericT, F1 > & | d_mat, | ||
viennacl::matrix_base< NumericT, F2 > & | result | ||
) |
Carries out Compressed Matrix(COO)-Dense Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, d_mat);
sp_mat | The Sparse Matrix (Coordinate format) |
d_mat | The Dense Matrix |
result | The Result Matrix |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::coordinate_matrix< ScalarType, ALIGNMENT > & | sp_mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT, F2 > & | result | ||
) |
Carries out Compressed Matrix(COO)-Dense Transposed Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, trans(d_mat));
sp_mat | The Sparse Matrix (Coordinate format) |
d_mat | The Dense Transposed Matrix |
result | The Result Matrix |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< ScalarType, ALIGNMENT > & | mat, |
const viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::vector_base< ScalarType > & | result | ||
) |
Carries out matrix-vector multiplication with a ell_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
void viennacl::linalg::cuda::prod_impl | ( | const matrix_base< NumericT, F > & | mat, |
const vector_base< NumericT > & | vec, | ||
vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< ScalarType, ALIGNMENT > & | sp_mat, |
const viennacl::matrix_base< NumericT, F1 > & | d_mat, | ||
viennacl::matrix_base< NumericT, F2 > & | result | ||
) |
Carries out Sparse Matrix(ELL)-Dense Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, d_mat); sp_mat being in ELL format
sp_mat | The sparse matrix (ELL) |
d_mat | The dense matrix |
result | The result matrix |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_trans > & | mat_trans, |
const vector_base< NumericT > & | vec, | ||
vector_base< NumericT > & | result | ||
) |
Carries out matrix-vector multiplication with a transposed matrix.
Implementation of the convenience expression result = trans(mat) * vec;
mat_trans | The transposed matrix proxy |
vec | The vector |
result | The result vector |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::ell_matrix< ScalarType, ALIGNMENT > & | sp_mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT, F2 > & | result | ||
) |
Carries out Sparse Matrix(ELL)-Dense Transposed Matrix multiplication.
Implementation of the convenience expression result = prod(sp_mat, trans(d_mat)); sp_mat being in ELL format
sp_mat | The sparse matrix (ELL) |
d_mat | The dense matrix |
result | The result matrix |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< ScalarType, ALIGNMENT > & | mat, |
const viennacl::vector_base< ScalarType > & | vec, | ||
viennacl::vector_base< ScalarType > & | result | ||
) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, vec);
mat | The matrix |
vec | The vector |
result | The result vector |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, ALIGNMENT > & | mat, |
const viennacl::matrix_base< NumericT, F1 > & | d_mat, | ||
viennacl::matrix_base< NumericT, F2 > & | result | ||
) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, d_mat);
mat | The sparse matrix |
d_mat | The dense matrix (row- or column-major) |
result | The dense result matrix (row- or column-major) |
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::hyb_matrix< NumericT, ALIGNMENT > & | mat, |
const viennacl::matrix_expression< const viennacl::matrix_base< NumericT, F1 >, const viennacl::matrix_base< NumericT, F1 >, viennacl::op_trans > & | d_mat, | ||
viennacl::matrix_base< NumericT, F2 > & | result | ||
) |
Carries out matrix-vector multiplication with a hyb_matrix.
Implementation of the convenience expression result = prod(mat, trans(d_mat));
mat | The sparse matrix |
d_mat | Transposed matrix proxy object for the rhs dense matrix (row- or column-major) |
result | The dense result matrix (row- or column-major) |
void viennacl::linalg::cuda::prod_impl | ( | const matrix_base< NumericT, F1 > & | A, |
const matrix_base< NumericT, F2 > & | B, | ||
matrix_base< NumericT, F3 > & | C, | ||
ScalarType | alpha, | ||
ScalarType | beta | ||
) |
Carries out matrix-matrix multiplication.
Implementation of C = prod(A, B);
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > & | A, |
const matrix_base< NumericT, F2 > & | B, | ||
matrix_base< NumericT, F3 > & | C, | ||
ScalarType | alpha, | ||
ScalarType | beta | ||
) |
Carries out matrix-matrix multiplication.
Implementation of C = prod(trans(A), B);
void viennacl::linalg::cuda::prod_impl | ( | const matrix_base< NumericT, F1 > & | A, |
const viennacl::matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > & | B, | ||
matrix_base< NumericT, F3 > & | C, | ||
ScalarType | alpha, | ||
ScalarType | beta | ||
) |
Carries out matrix-matrix multiplication.
Implementation of C = prod(A, trans(B));
void viennacl::linalg::cuda::prod_impl | ( | const viennacl::matrix_expression< const matrix_base< NumericT, F1 >, const matrix_base< NumericT, F1 >, op_trans > & | A, |
const viennacl::matrix_expression< const matrix_base< NumericT, F2 >, const matrix_base< NumericT, F2 >, op_trans > & | B, | ||
matrix_base< NumericT, F3 > & | C, | ||
ScalarType | alpha, | ||
ScalarType | beta | ||
) |
Carries out matrix-matrix multiplication.
Implementation of C = prod(trans(A), trans(B));
__global__ void viennacl::linalg::cuda::scalar_swap_kernel | ( | T * | s1, |
T * | s2 | ||
) |
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | val, | ||
unsigned int | options2, | ||
const T * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
__global__ void viennacl::linalg::cuda::scaled_rank1_update_col_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | val, | ||
unsigned int | options2, | ||
const T * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T | val, | ||
unsigned int | options2, | ||
const T * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
__global__ void viennacl::linalg::cuda::scaled_rank1_update_row_kernel | ( | T * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
const T * | val, | ||
unsigned int | options2, | ||
const T * | vec1, | ||
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
const T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2, | ||
unsigned int | size2 | ||
) |
void viennacl::linalg::cuda::scaled_rank_1_update | ( | matrix_base< NumericT, F > & | mat1, |
S1 const & | alpha, | ||
vcl_size_t | len_alpha, | ||
bool | reciprocal_alpha, | ||
bool | flip_sign_alpha, | ||
const vector_base< NumericT > & | vec1, | ||
const vector_base< NumericT > & | vec2 | ||
) |
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update.
Implementation of the convenience expression result += alpha * outer_prod(vec1, vec2);
mat1 | The matrix to be updated |
alpha | The scaling factor (either a viennacl::scalar<>, float, or double) |
len_alpha | Length of the buffer for an eventual final reduction step (currently always '1') |
reciprocal_alpha | Use 1/alpha instead of alpha |
flip_sign_alpha | Use -alpha instead of alpha |
vec1 | The first vector |
vec2 | The second vector |
viennacl::enable_if< viennacl::is_scalar<S1>::value && viennacl::is_scalar<S2>::value >::type viennacl::linalg::cuda::swap | ( | S1 & | s1, |
S2 & | s2 | ||
) |
Swaps the contents of two scalars, data is copied.
s1 | The first scalar |
s2 | The second scalar |
__global__ void viennacl::linalg::cuda::trans_vec_mul_col_kernel | ( | const T * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
T * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
__global__ void viennacl::linalg::cuda::trans_vec_mul_row_kernel | ( | const T * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
T * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_col_kernel | ( | T const * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
unsigned int | options | ||
) |
__global__ void viennacl::linalg::cuda::triangular_substitute_inplace_row_kernel | ( | T const * | A, |
unsigned int | A_start1, | ||
unsigned int | A_start2, | ||
unsigned int | A_inc1, | ||
unsigned int | A_inc2, | ||
unsigned int | A_size1, | ||
unsigned int | A_size2, | ||
unsigned int | A_internal_size1, | ||
unsigned int | A_internal_size2, | ||
T * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
unsigned int | options | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_abs_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_acos_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_asin_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_atan_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_ceil_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_cos_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_cosh_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_exp_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_fabs_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_floor_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_log10_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_log_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_sin_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_sinh_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_sqrt_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_tan_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_element_tanh_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T const * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |
__global__ void viennacl::linalg::cuda::vec_mul_col_kernel | ( | const T * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
T * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
__global__ void viennacl::linalg::cuda::vec_mul_row_kernel | ( | const T * | A, |
unsigned int | A_row_start, | ||
unsigned int | A_col_start, | ||
unsigned int | A_row_inc, | ||
unsigned int | A_col_inc, | ||
unsigned int | A_row_size, | ||
unsigned int | A_col_size, | ||
unsigned int | A_internal_rows, | ||
unsigned int | A_internal_cols, | ||
const T * | v, | ||
unsigned int | v_start, | ||
unsigned int | v_inc, | ||
unsigned int | v_size, | ||
T * | result, | ||
unsigned int | result_start, | ||
unsigned int | result_inc, | ||
unsigned int | result_size | ||
) |
void viennacl::linalg::cuda::vector_assign | ( | vector_base< T > & | vec1, |
const S1 & | alpha, | ||
bool | up_to_internal_size = false |
||
) |
Assign a constant value to a vector (-range/-slice)
vec1 | The vector to which the value should be assigned |
alpha | The value to be assigned |
up_to_internal_size | Specifies whether alpha should also be written to padded memory (mostly used for clearing the whole buffer). |
__global__ void viennacl::linalg::cuda::vector_assign_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | internal_size1, | ||
T | alpha | ||
) |
__global__ void viennacl::linalg::cuda::vector_multi_sum_kernel | ( | T const * | vec1, |
T * | result, | ||
unsigned int | start_result, | ||
unsigned int | inc_result | ||
) |
__global__ void viennacl::linalg::cuda::vector_sum_kernel_floats | ( | const T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
T * | result | ||
) |
__global__ void viennacl::linalg::cuda::vector_sum_kernel_integers | ( | const T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
T * | result | ||
) |
__global__ void viennacl::linalg::cuda::vector_sum_kernel_unsigned_integers | ( | const T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
unsigned int | option, | ||
T * | result | ||
) |
void viennacl::linalg::cuda::vector_swap | ( | vector_base< T > & | vec1, |
vector_base< T > & | vec2 | ||
) |
Swaps the contents of two vectors, data is copied.
vec1 | The first vector (or -range, or -slice) |
vec2 | The second vector (or -range, or -slice) |
__global__ void viennacl::linalg::cuda::vector_swap_kernel | ( | T * | vec1, |
unsigned int | start1, | ||
unsigned int | inc1, | ||
unsigned int | size1, | ||
T * | vec2, | ||
unsigned int | start2, | ||
unsigned int | inc2 | ||
) |