29 #define VIENNACL_WITH_UBLAS 1
31 #include <boost/numeric/ublas/triangular.hpp>
32 #include <boost/numeric/ublas/vector.hpp>
33 #include <boost/numeric/ublas/vector_proxy.hpp>
34 #include <boost/numeric/ublas/matrix_sparse.hpp>
35 #include <boost/numeric/ublas/operation_sparse.hpp>
36 #include <boost/numeric/ublas/lu.hpp>
57 #define BENCHMARK_RUNS 10
60 template<
typename ScalarType>
73 boost::numeric::ublas::vector<ScalarType> ublas_vec1;
74 boost::numeric::ublas::vector<ScalarType> ublas_vec2;
76 boost::numeric::ublas::compressed_matrix<ScalarType> ublas_matrix;
79 std::cout <<
"Error reading Matrix file" << std::endl;
83 std::cout <<
"done reading matrix" << std::endl;
85 ublas_vec1 = boost::numeric::ublas::scalar_vector<ScalarType>(ublas_matrix.size1(),
ScalarType(1.0));
86 ublas_vec2 = ublas_vec1;
103 #ifndef VIENNACL_EXPERIMENTAL_DOUBLE_PRECISION_WITH_STREAM_SDK_ON_GPU
117 std::cout <<
"------- Matrix-Vector product on CPU ----------" << std::endl;
122 boost::numeric::ublas::axpy_prod(ublas_matrix, ublas_vec2, ublas_vec1,
true);
124 exec_time = timer.
get();
125 std::cout <<
"CPU time: " << exec_time << std::endl;
126 std::cout <<
"CPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
127 std::cout << ublas_vec1[0] << std::endl;
130 std::cout <<
"------- Matrix-Vector product with compressed_matrix ----------" << std::endl;
145 exec_time = timer.
get();
146 std::cout <<
"GPU time align1: " << exec_time << std::endl;
147 std::cout <<
"GPU align1 ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
148 std::cout << vcl_vec1[0] << std::endl;
150 std::cout <<
"Testing triangular solves: compressed_matrix" << std::endl;
155 std::cout <<
"ublas..." << std::endl;
158 std::cout <<
"Time elapsed: " << timer.
get() << std::endl;
159 std::cout <<
"ViennaCL..." << std::endl;
164 std::cout <<
"Time elapsed: " << timer.
get() << std::endl;
175 exec_time = timer.
get();
176 std::cout <<
"GPU time align4: " << exec_time << std::endl;
177 std::cout <<
"GPU align4 ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
178 std::cout << vcl_vec1[0] << std::endl;
187 exec_time = timer.
get();
188 std::cout <<
"GPU time align8: " << exec_time << std::endl;
189 std::cout <<
"GPU align8 ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
190 std::cout << vcl_vec1[0] << std::endl;
193 std::cout <<
"------- Matrix-Vector product with coordinate_matrix ----------" << std::endl;
199 for (std::size_t i=0; i<ublas_vec1.size(); ++i)
201 if ( fabs(ublas_vec1[i] - ublas_vec2[i]) /
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
203 std::cout <<
"Error at index " << i <<
": Should: " << ublas_vec1[i] <<
", Is: " << ublas_vec2[i] << std::endl;
217 exec_time = timer.
get();
218 std::cout <<
"GPU time: " << exec_time << std::endl;
219 std::cout <<
"GPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
220 std::cout << vcl_vec1[0] << std::endl;
223 std::cout <<
"------- Matrix-Vector product with ell_matrix ----------" << std::endl;
229 for (std::size_t i=0; i<ublas_vec1.size(); ++i)
231 if ( fabs(ublas_vec1[i] - ublas_vec2[i]) /
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
233 std::cout <<
"Error at index " << i <<
": Should: " << ublas_vec1[i] <<
", Is: " << ublas_vec2[i] << std::endl;
247 exec_time = timer.
get();
248 std::cout <<
"GPU time: " << exec_time << std::endl;
249 std::cout <<
"GPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
250 std::cout << vcl_vec1[0] << std::endl;
253 std::cout <<
"------- Matrix-Vector product with hyb_matrix ----------" << std::endl;
259 for (std::size_t i=0; i<ublas_vec1.size(); ++i)
261 if ( fabs(ublas_vec1[i] - ublas_vec2[i]) /
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
263 std::cout <<
"Error at index " << i <<
": Should: " << ublas_vec1[i] <<
", Is: " << ublas_vec2[i] << std::endl;
277 exec_time = timer.
get();
278 std::cout <<
"GPU time: " << exec_time << std::endl;
279 std::cout <<
"GPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
280 std::cout << vcl_vec1[0] << std::endl;
283 std::cout <<
"------- Matrix-Vector product with sliced_ell_matrix ----------" << std::endl;
289 for (std::size_t i=0; i<ublas_vec1.size(); ++i)
291 if ( fabs(ublas_vec1[i] - ublas_vec2[i]) /
std::max(fabs(ublas_vec1[i]), fabs(ublas_vec2[i])) > 1e-2)
293 std::cout <<
"Error at index " << i <<
": Should: " << ublas_vec1[i] <<
", Is: " << ublas_vec2[i] << std::endl;
307 exec_time = timer.
get();
308 std::cout <<
"GPU time: " << exec_time << std::endl;
309 std::cout <<
"GPU ";
printOps(2.0 * static_cast<double>(ublas_matrix.nnz()), static_cast<double>(exec_time) /
static_cast<double>(
BENCHMARK_RUNS));
310 std::cout << vcl_vec1[0] << std::endl;
318 std::cout << std::endl;
319 std::cout <<
"----------------------------------------------" << std::endl;
320 std::cout <<
" Device Info" << std::endl;
321 std::cout <<
"----------------------------------------------" << std::endl;
323 #ifdef VIENNACL_WITH_OPENCL
326 std::cout << std::endl;
327 std::cout <<
"----------------------------------------------" << std::endl;
328 std::cout <<
"----------------------------------------------" << std::endl;
329 std::cout <<
"## Benchmark :: Sparse" << std::endl;
330 std::cout <<
"----------------------------------------------" << std::endl;
331 std::cout << std::endl;
332 std::cout <<
" -------------------------------" << std::endl;
333 std::cout <<
" # benchmarking single-precision" << std::endl;
334 std::cout <<
" -------------------------------" << std::endl;
335 run_benchmark<float>();
336 #ifdef VIENNACL_WITH_OPENCL
340 std::cout << std::endl;
341 std::cout <<
" -------------------------------" << std::endl;
342 std::cout <<
" # benchmarking double-precision" << std::endl;
343 std::cout <<
" -------------------------------" << std::endl;
344 run_benchmark<double>();
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
void inplace_solve(matrix_base< NumericT > const &A, matrix_base< NumericT > &B, SolverTagT tag)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
void inplace_solve(const matrix_base< NumericT > &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
A reader and writer for the matrix market format is implemented here.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
void trans(matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > const &proxy, matrix_base< NumericT > &temp_trans)
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
T max(const T &lhs, const T &rhs)
Maximum.
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
void printOps(double num_ops, double exec_time)
Implementation of the coordinate_matrix class.
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Implementation of the hyb_matrix class.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Implementations of incomplete factorization preconditioners. Convenience header file.
Sparse matrix class using the sliced ELLPACK with parameters C, .
Implementation of the compressed_matrix class.
Implementation of the sliced_ell_matrix class.
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Implementation of the ell_matrix class.
void prod(const MatrixT1 &A, bool transposed_A, const MatrixT2 &B, bool transposed_B, MatrixT3 &C, ScalarT alpha, ScalarT beta)
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
A tag class representing a lower triangular matrix with unit diagonal.
A sparse square matrix in compressed sparse rows format.
long read_matrix_market_file(MatrixT &mat, const char *file, long index_base=1)
Reads a sparse matrix from a file (MatrixMarket format)
Implementation of the ViennaCL scalar class.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...