ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
solver.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2014, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 /*
19 *
20 * Benchmark: Iterative solver tests (solver.cpp and solver.cu are identical, the latter being required for compilation using CUDA nvcc)
21 *
22 */
23 
24 
25 #ifndef NDEBUG
26  #define NDEBUG
27 #endif
28 
29 #include <boost/numeric/ublas/matrix_sparse.hpp>
30 #include <boost/numeric/ublas/io.hpp>
31 #include <boost/numeric/ublas/operation_sparse.hpp>
32 
33 #define VIENNACL_WITH_UBLAS 1
34 
35 #include "viennacl/scalar.hpp"
36 #include "viennacl/vector.hpp"
39 #include "viennacl/ell_matrix.hpp"
41 #include "viennacl/hyb_matrix.hpp"
42 #include "viennacl/context.hpp"
43 
44 #include "viennacl/linalg/cg.hpp"
47 
48 #include "viennacl/linalg/ilu.hpp"
52 
53 #ifdef VIENNACL_WITH_OPENCL
55 #endif
56 
58 
59 
60 #include <iostream>
61 #include <vector>
62 #include "benchmark-utils.hpp"
63 
64 
65 using namespace boost::numeric;
66 
67 #define BENCHMARK_RUNS 1
68 
69 
70 template<typename ScalarType>
71 ScalarType diff_inf(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2)
72 {
73  ublas::vector<ScalarType> v2_cpu(v2.size());
74  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
75 
76  for (unsigned int i=0;i<v1.size(); ++i)
77  {
78  if ( std::max( fabs(v2_cpu[i]), fabs(v1[i]) ) > 0 )
79  v2_cpu[i] = fabs(v2_cpu[i] - v1[i]) / std::max( fabs(v2_cpu[i]), fabs(v1[i]) );
80  else
81  v2_cpu[i] = 0.0;
82  }
83 
84  return norm_inf(v2_cpu);
85 }
86 
87 template<typename ScalarType>
88 ScalarType diff_2(ublas::vector<ScalarType> & v1, viennacl::vector<ScalarType> & v2)
89 {
90  ublas::vector<ScalarType> v2_cpu(v2.size());
91  viennacl::copy(v2.begin(), v2.end(), v2_cpu.begin());
92 
93  return norm_2(v1 - v2_cpu) / norm_2(v1);
94 }
95 
96 
97 template<typename MatrixType, typename VectorType, typename SolverTag, typename PrecondTag>
98 void run_solver(MatrixType const & matrix, VectorType const & rhs, VectorType const & ref_result, SolverTag const & solver, PrecondTag const & precond, long ops)
99 {
100  Timer timer;
101  VectorType result(rhs);
102  VectorType residual(rhs);
104 
105  timer.start();
106  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
107  {
108  result = viennacl::linalg::solve(matrix, rhs, solver, precond);
109  }
111  double exec_time = timer.get();
112  std::cout << "Exec. time: " << exec_time << std::endl;
113  std::cout << "Est. "; printOps(static_cast<double>(ops), exec_time / BENCHMARK_RUNS);
114  residual -= viennacl::linalg::prod(matrix, result);
115  std::cout << "Relative residual: " << viennacl::linalg::norm_2(residual) / viennacl::linalg::norm_2(rhs) << std::endl;
116  std::cout << "Estimated rel. residual: " << solver.error() << std::endl;
117  std::cout << "Iterations: " << solver.iters() << std::endl;
118  result -= ref_result;
119  std::cout << "Relative deviation from result: " << viennacl::linalg::norm_2(result) / viennacl::linalg::norm_2(ref_result) << std::endl;
120 }
121 
122 
123 template<typename ScalarType>
125 {
126  Timer timer;
127  double exec_time;
128 
129  ScalarType std_factor1 = static_cast<ScalarType>(3.1415);
130  ScalarType std_factor2 = static_cast<ScalarType>(42.0);
131  viennacl::scalar<ScalarType> vcl_factor1(std_factor1, ctx);
132  viennacl::scalar<ScalarType> vcl_factor2(std_factor2, ctx);
133 
134  ublas::vector<ScalarType> ublas_vec1;
135  ublas::vector<ScalarType> ublas_vec2;
136  ublas::vector<ScalarType> ublas_result;
137  unsigned int solver_iters = 100;
138  unsigned int solver_krylov_dim = 20;
139  double solver_tolerance = 1e-6;
140 
141  ublas::compressed_matrix<ScalarType> ublas_matrix;
142  if (!viennacl::io::read_matrix_market_file(ublas_matrix, "../examples/testdata/mat65k.mtx"))
143  {
144  std::cout << "Error reading Matrix file" << std::endl;
145  return EXIT_FAILURE;
146  }
147  //unsigned int cg_mat_size = cg_mat.size();
148  std::cout << "done reading matrix" << std::endl;
149 
150  ublas_result = ublas::scalar_vector<ScalarType>(ublas_matrix.size1(), ScalarType(1.0));
151  ublas_vec1 = ublas::prod(ublas_matrix, ublas_result);
152  ublas_vec2 = ublas_vec1;
153 
154  viennacl::compressed_matrix<ScalarType> vcl_compressed_matrix(ublas_vec1.size(), ublas_vec1.size(), ctx);
155  viennacl::coordinate_matrix<ScalarType> vcl_coordinate_matrix(ublas_vec1.size(), ublas_vec1.size(), ctx);
156  viennacl::ell_matrix<ScalarType> vcl_ell_matrix(ctx);
157  viennacl::sliced_ell_matrix<ScalarType> vcl_sliced_ell_matrix(ctx);
158  viennacl::hyb_matrix<ScalarType> vcl_hyb_matrix(ctx);
159 
160  viennacl::vector<ScalarType> vcl_vec1(ublas_vec1.size(), ctx);
161  viennacl::vector<ScalarType> vcl_vec2(ublas_vec1.size(), ctx);
162  viennacl::vector<ScalarType> vcl_result(ublas_vec1.size(), ctx);
163 
164 
165  //cpu to gpu:
166  viennacl::copy(ublas_matrix, vcl_compressed_matrix);
167  viennacl::copy(ublas_matrix, vcl_coordinate_matrix);
168  viennacl::copy(ublas_matrix, vcl_ell_matrix);
169  viennacl::copy(ublas_matrix, vcl_sliced_ell_matrix);
170  viennacl::copy(ublas_matrix, vcl_hyb_matrix);
171  viennacl::copy(ublas_vec1, vcl_vec1);
172  viennacl::copy(ublas_vec2, vcl_vec2);
173  viennacl::copy(ublas_result, vcl_result);
174 
175 
176  std::cout << "------- Jacobi preconditioner ----------" << std::endl;
180 
181  std::cout << "------- Row-Scaling preconditioner ----------" << std::endl;
185 
189  std::cout << "------- ICHOL0 on CPU (ublas) ----------" << std::endl;
190 
191  timer.start();
193  exec_time = timer.get();
194  std::cout << "Setup time: " << exec_time << std::endl;
195 
196  timer.start();
197  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
198  ublas_ichol0.apply(ublas_vec1);
199  exec_time = timer.get();
200  std::cout << "ublas time: " << exec_time << std::endl;
201 
202  std::cout << "------- ICHOL0 with ViennaCL ----------" << std::endl;
203 
204  timer.start();
206  exec_time = timer.get();
207  std::cout << "Setup time: " << exec_time << std::endl;
208 
210  timer.start();
211  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
212  vcl_ichol0.apply(vcl_vec1);
214  exec_time = timer.get();
215  std::cout << "ViennaCL time: " << exec_time << std::endl;
216 
217 
221  std::cout << "------- ILU0 on with ublas ----------" << std::endl;
222 
223  timer.start();
225  exec_time = timer.get();
226  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
227  timer.start();
228  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
229  ublas_ilu0.apply(ublas_vec1);
230  exec_time = timer.get();
231  std::cout << "ublas ILU0 substitution time (no level scheduling): " << exec_time << std::endl;
232 
233 
234  std::cout << "------- ILU0 with ViennaCL ----------" << std::endl;
235 
236  timer.start();
238  exec_time = timer.get();
239  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
240 
242  timer.start();
243  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
244  vcl_ilu0.apply(vcl_vec1);
246  exec_time = timer.get();
247  std::cout << "ViennaCL ILU0 substitution time (no level scheduling): " << exec_time << std::endl;
248 
249  timer.start();
250  viennacl::linalg::ilu0_tag ilu0_with_level_scheduling; ilu0_with_level_scheduling.use_level_scheduling(true);
251  viennacl::linalg::ilu0_precond< viennacl::compressed_matrix<ScalarType> > vcl_ilu0_level_scheduling(vcl_compressed_matrix, ilu0_with_level_scheduling);
252  exec_time = timer.get();
253  std::cout << "Setup time (with level scheduling): " << exec_time << std::endl;
254 
256  timer.start();
257  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
258  vcl_ilu0_level_scheduling.apply(vcl_vec1);
260  exec_time = timer.get();
261  std::cout << "ViennaCL ILU0 substitution time (with level scheduling): " << exec_time << std::endl;
262 
263 
264 
266 
267  std::cout << "------- Block-ILU0 with ublas ----------" << std::endl;
268 
269  ublas_vec1 = ublas_vec2;
270  viennacl::copy(ublas_vec1, vcl_vec1);
271 
272  timer.start();
274  viennacl::linalg::ilu0_tag> ublas_block_ilu0(ublas_matrix, viennacl::linalg::ilu0_tag());
275  exec_time = timer.get();
276  std::cout << "Setup time: " << exec_time << std::endl;
277 
278  timer.start();
279  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
280  ublas_block_ilu0.apply(ublas_vec1);
281  exec_time = timer.get();
282  std::cout << "ublas time: " << exec_time << std::endl;
283 
284  std::cout << "------- Block-ILU0 with ViennaCL ----------" << std::endl;
285 
286  timer.start();
288  viennacl::linalg::ilu0_tag> vcl_block_ilu0(vcl_compressed_matrix, viennacl::linalg::ilu0_tag());
289  exec_time = timer.get();
290  std::cout << "Setup time: " << exec_time << std::endl;
291 
292  //vcl_block_ilu0.apply(vcl_vec1); //warm-up
294  timer.start();
295  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
296  vcl_block_ilu0.apply(vcl_vec1);
298  exec_time = timer.get();
299  std::cout << "ViennaCL time: " << exec_time << std::endl;
300 
302 
303  std::cout << "------- ILUT with ublas ----------" << std::endl;
304 
305  ublas_vec1 = ublas_vec2;
306  viennacl::copy(ublas_vec1, vcl_vec1);
307 
308  timer.start();
310  exec_time = timer.get();
311  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
312  timer.start();
313  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
314  ublas_ilut.apply(ublas_vec1);
315  exec_time = timer.get();
316  std::cout << "ublas ILUT substitution time (no level scheduling): " << exec_time << std::endl;
317 
318 
319  std::cout << "------- ILUT with ViennaCL ----------" << std::endl;
320 
321  timer.start();
323  exec_time = timer.get();
324  std::cout << "Setup time (no level scheduling): " << exec_time << std::endl;
325 
327  timer.start();
328  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
329  vcl_ilut.apply(vcl_vec1);
331  exec_time = timer.get();
332  std::cout << "ViennaCL ILUT substitution time (no level scheduling): " << exec_time << std::endl;
333 
334  timer.start();
335  viennacl::linalg::ilut_tag ilut_with_level_scheduling; ilut_with_level_scheduling.use_level_scheduling(true);
336  viennacl::linalg::ilut_precond< viennacl::compressed_matrix<ScalarType> > vcl_ilut_level_scheduling(vcl_compressed_matrix, ilut_with_level_scheduling);
337  exec_time = timer.get();
338  std::cout << "Setup time (with level scheduling): " << exec_time << std::endl;
339 
341  timer.start();
342  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
343  vcl_ilut_level_scheduling.apply(vcl_vec1);
345  exec_time = timer.get();
346  std::cout << "ViennaCL ILUT substitution time (with level scheduling): " << exec_time << std::endl;
347 
348 
350 
351  std::cout << "------- Block-ILUT with ublas ----------" << std::endl;
352 
353  ublas_vec1 = ublas_vec2;
354  viennacl::copy(ublas_vec1, vcl_vec1);
355 
356  timer.start();
358  viennacl::linalg::ilut_tag> ublas_block_ilut(ublas_matrix, viennacl::linalg::ilut_tag());
359  exec_time = timer.get();
360  std::cout << "Setup time: " << exec_time << std::endl;
361 
362  //ublas_block_ilut.apply(ublas_vec1);
363  timer.start();
364  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
365  ublas_block_ilut.apply(ublas_vec1);
366  exec_time = timer.get();
367  std::cout << "ublas time: " << exec_time << std::endl;
368 
369  std::cout << "------- Block-ILUT with ViennaCL ----------" << std::endl;
370 
371  timer.start();
373  viennacl::linalg::ilut_tag> vcl_block_ilut(vcl_compressed_matrix, viennacl::linalg::ilut_tag());
374  exec_time = timer.get();
375  std::cout << "Setup time: " << exec_time << std::endl;
376 
377  //vcl_block_ilut.apply(vcl_vec1); //warm-up
379  timer.start();
380  for (int runs=0; runs<BENCHMARK_RUNS; ++runs)
381  vcl_block_ilut.apply(vcl_vec1);
383  exec_time = timer.get();
384  std::cout << "ViennaCL time: " << exec_time << std::endl;
385 
386 
390  long cg_ops = static_cast<long>(solver_iters * (ublas_matrix.nnz() + 6 * ublas_vec2.size()));
391 
392  viennacl::linalg::cg_tag cg_solver(solver_tolerance, solver_iters);
393 
394  std::cout << "------- CG solver (no preconditioner) using ublas ----------" << std::endl;
395  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
396 
397  std::cout << "------- CG solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
398  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
399 
400 #ifdef VIENNACL_WITH_OPENCL
401  bool is_double = (sizeof(ScalarType) == sizeof(double));
402  if (is_double)
403  {
404  std::cout << "------- CG solver, mixed precision (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
405  viennacl::linalg::mixed_precision_cg_tag mixed_precision_cg_solver(solver_tolerance, solver_iters);
406 
407  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, mixed_precision_cg_solver, viennacl::linalg::no_precond(), cg_ops);
408  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, mixed_precision_cg_solver, viennacl::linalg::no_precond(), cg_ops);
409  }
410 #endif
411 
412  std::cout << "------- CG solver (no preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
413  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
414 
415  std::cout << "------- CG solver (no preconditioner) via ViennaCL, ell_matrix ----------" << std::endl;
416  run_solver(vcl_ell_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
417 
418  std::cout << "------- CG solver (no preconditioner) via ViennaCL, sliced_ell_matrix ----------" << std::endl;
419  run_solver(vcl_sliced_ell_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
420 
421  std::cout << "------- CG solver (no preconditioner) via ViennaCL, hyb_matrix ----------" << std::endl;
422  run_solver(vcl_hyb_matrix, vcl_vec2, vcl_result, cg_solver, viennacl::linalg::no_precond(), cg_ops);
423 
424  std::cout << "------- CG solver (ICHOL0 preconditioner) using ublas ----------" << std::endl;
425  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_ichol0, cg_ops);
426 
427  std::cout << "------- CG solver (ICHOL0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
428  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ichol0, cg_ops);
429 
430 
431  std::cout << "------- CG solver (ILU0 preconditioner) using ublas ----------" << std::endl;
432  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_ilu0, cg_ops);
433 
434  std::cout << "------- CG solver (ILU0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
435  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ilu0, cg_ops);
436 
437 
438  std::cout << "------- CG solver (Block-ILU0 preconditioner) using ublas ----------" << std::endl;
439  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_block_ilu0, cg_ops);
440 
441  std::cout << "------- CG solver (Block-ILU0 preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
442  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_block_ilu0, cg_ops);
443 
444  std::cout << "------- CG solver (ILUT preconditioner) using ublas ----------" << std::endl;
445  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_ilut, cg_ops);
446 
447  std::cout << "------- CG solver (ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
448  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ilut, cg_ops);
449 
450  std::cout << "------- CG solver (ILUT preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
451  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, vcl_ilut, cg_ops);
452 
453  std::cout << "------- CG solver (Block-ILUT preconditioner) using ublas ----------" << std::endl;
454  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_block_ilut, cg_ops);
455 
456  std::cout << "------- CG solver (Block-ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
457  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_block_ilut, cg_ops);
458 
459  std::cout << "------- CG solver (Jacobi preconditioner) using ublas ----------" << std::endl;
460  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_jacobi, cg_ops);
461 
462  std::cout << "------- CG solver (Jacobi preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
463  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_jacobi_csr, cg_ops);
464 
465  std::cout << "------- CG solver (Jacobi preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
466  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, vcl_jacobi_coo, cg_ops);
467 
468 
469  std::cout << "------- CG solver (row scaling preconditioner) using ublas ----------" << std::endl;
470  run_solver(ublas_matrix, ublas_vec2, ublas_result, cg_solver, ublas_row_scaling, cg_ops);
471 
472  std::cout << "------- CG solver (row scaling preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
473  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, cg_solver, vcl_row_scaling_csr, cg_ops);
474 
475  std::cout << "------- CG solver (row scaling preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
476  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, cg_solver, vcl_row_scaling_coo, cg_ops);
477 
478 
482 
483  long bicgstab_ops = static_cast<long>(solver_iters * (2 * ublas_matrix.nnz() + 13 * ublas_vec2.size()));
484 
485  viennacl::linalg::bicgstab_tag bicgstab_solver(solver_tolerance, solver_iters);
486 
487  std::cout << "------- BiCGStab solver (no preconditioner) using ublas ----------" << std::endl;
488  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
489 
490  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
491  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
492 
493  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
494  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
495 
496  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, ell_matrix ----------" << std::endl;
497  run_solver(vcl_ell_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
498 
499  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, sliced_ell_matrix ----------" << std::endl;
500  run_solver(vcl_sliced_ell_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
501 
502  std::cout << "------- BiCGStab solver (no preconditioner) via ViennaCL, hyb_matrix ----------" << std::endl;
503  run_solver(vcl_hyb_matrix, vcl_vec2, vcl_result, bicgstab_solver, viennacl::linalg::no_precond(), bicgstab_ops);
504 
505  std::cout << "------- BiCGStab solver (ILUT preconditioner) using ublas ----------" << std::endl;
506  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_ilut, bicgstab_ops);
507 
508  std::cout << "------- BiCGStab solver (ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
509  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_ilut, bicgstab_ops);
510 
511  std::cout << "------- BiCGStab solver (Block-ILUT preconditioner) using ublas ----------" << std::endl;
512  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_block_ilut, bicgstab_ops);
513 
514 #ifdef VIENNACL_WITH_OPENCL
515  std::cout << "------- BiCGStab solver (Block-ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
516  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_block_ilut, bicgstab_ops);
517 #endif
518 
519 // std::cout << "------- BiCGStab solver (ILUT preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
520 // run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_ilut, bicgstab_ops);
521 
522  std::cout << "------- BiCGStab solver (Jacobi preconditioner) using ublas ----------" << std::endl;
523  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_jacobi, bicgstab_ops);
524 
525  std::cout << "------- BiCGStab solver (Jacobi preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
526  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_jacobi_csr, bicgstab_ops);
527 
528  std::cout << "------- BiCGStab solver (Jacobi preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
529  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_jacobi_coo, bicgstab_ops);
530 
531 
532  std::cout << "------- BiCGStab solver (row scaling preconditioner) using ublas ----------" << std::endl;
533  run_solver(ublas_matrix, ublas_vec2, ublas_result, bicgstab_solver, ublas_row_scaling, bicgstab_ops);
534 
535  std::cout << "------- BiCGStab solver (row scaling preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
536  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_row_scaling_csr, bicgstab_ops);
537 
538  std::cout << "------- BiCGStab solver (row scaling preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
539  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, bicgstab_solver, vcl_row_scaling_coo, bicgstab_ops);
540 
541 
545 
546  long gmres_ops = static_cast<long>(solver_iters * (ublas_matrix.nnz() + (solver_iters * 2 + 7) * ublas_vec2.size()));
547 
548  viennacl::linalg::gmres_tag gmres_solver(solver_tolerance, solver_iters, solver_krylov_dim);
549 
550  std::cout << "------- GMRES solver (no preconditioner) using ublas ----------" << std::endl;
551  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
552 
553  std::cout << "------- GMRES solver (no preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
554  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
555 
556  std::cout << "------- GMRES solver (no preconditioner) on GPU, coordinate_matrix ----------" << std::endl;
557  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
558 
559  std::cout << "------- GMRES solver (no preconditioner) on GPU, ell_matrix ----------" << std::endl;
560  run_solver(vcl_ell_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
561 
562  std::cout << "------- GMRES solver (no preconditioner) on GPU, sliced_ell_matrix ----------" << std::endl;
563  run_solver(vcl_sliced_ell_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
564 
565  std::cout << "------- GMRES solver (no preconditioner) on GPU, hyb_matrix ----------" << std::endl;
566  run_solver(vcl_hyb_matrix, vcl_vec2, vcl_result, gmres_solver, viennacl::linalg::no_precond(), gmres_ops);
567 
568  std::cout << "------- GMRES solver (ILUT preconditioner) using ublas ----------" << std::endl;
569  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, ublas_ilut, gmres_ops);
570 
571  std::cout << "------- GMRES solver (ILUT preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
572  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_ilut, gmres_ops);
573 
574  std::cout << "------- GMRES solver (ILUT preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
575  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_ilut, gmres_ops);
576 
577 
578  std::cout << "------- GMRES solver (Jacobi preconditioner) using ublas ----------" << std::endl;
579  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, ublas_jacobi, gmres_ops);
580 
581  std::cout << "------- GMRES solver (Jacobi preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
582  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_jacobi_csr, gmres_ops);
583 
584  std::cout << "------- GMRES solver (Jacobi preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
585  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_jacobi_coo, gmres_ops);
586 
587 
588  std::cout << "------- GMRES solver (row scaling preconditioner) using ublas ----------" << std::endl;
589  run_solver(ublas_matrix, ublas_vec2, ublas_result, gmres_solver, ublas_row_scaling, gmres_ops);
590 
591  std::cout << "------- GMRES solver (row scaling preconditioner) via ViennaCL, compressed_matrix ----------" << std::endl;
592  run_solver(vcl_compressed_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_row_scaling_csr, gmres_ops);
593 
594  std::cout << "------- GMRES solver (row scaling preconditioner) via ViennaCL, coordinate_matrix ----------" << std::endl;
595  run_solver(vcl_coordinate_matrix, vcl_vec2, vcl_result, gmres_solver, vcl_row_scaling_coo, gmres_ops);
596 
597  return EXIT_SUCCESS;
598 }
599 
600 int main()
601 {
602  std::cout << std::endl;
603  std::cout << "----------------------------------------------" << std::endl;
604  std::cout << " Device Info" << std::endl;
605  std::cout << "----------------------------------------------" << std::endl;
606 
607 #ifdef VIENNACL_WITH_OPENCL
609  std::vector<viennacl::ocl::device> const & devices = pf.devices();
610 
611  // Set first device to first context:
612  viennacl::ocl::setup_context(0, devices[0]);
613 
614  // Set second device for second context (use the same device for the second context if only one device available):
615  if (devices.size() > 1)
616  viennacl::ocl::setup_context(1, devices[1]);
617  else
618  viennacl::ocl::setup_context(1, devices[0]);
619 
620  std::cout << viennacl::ocl::current_device().info() << std::endl;
622 #else
623  viennacl::context ctx;
624 #endif
625 
626  std::cout << "---------------------------------------------------------------------------" << std::endl;
627  std::cout << "---------------------------------------------------------------------------" << std::endl;
628  std::cout << " Benchmark for Execution Times of Iterative Solvers provided with ViennaCL " << std::endl;
629  std::cout << "---------------------------------------------------------------------------" << std::endl;
630  std::cout << " Note that the purpose of this benchmark is not to run solvers until" << std::endl;
631  std::cout << " convergence. Instead, only the execution times of a few iterations are" << std::endl;
632  std::cout << " recorded. Residual errors are only printed for information." << std::endl << std::endl;
633 
634 
635  std::cout << std::endl;
636  std::cout << "----------------------------------------------" << std::endl;
637  std::cout << "----------------------------------------------" << std::endl;
638  std::cout << "## Benchmark :: Solver" << std::endl;
639  std::cout << "----------------------------------------------" << std::endl;
640  std::cout << std::endl;
641  std::cout << " -------------------------------" << std::endl;
642  std::cout << " # benchmarking single-precision" << std::endl;
643  std::cout << " -------------------------------" << std::endl;
644  run_benchmark<float>(ctx);
645 #ifdef VIENNACL_WITH_OPENCL
647 #endif
648  {
649  std::cout << std::endl;
650  std::cout << " -------------------------------" << std::endl;
651  std::cout << " # benchmarking double-precision" << std::endl;
652  std::cout << " -------------------------------" << std::endl;
653  run_benchmark<double>(ctx);
654  }
655  return 0;
656 }
657 
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:405
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:86
A reader and writer for the matrix market format is implemented here.
std::vector< platform > get_platforms()
Definition: platform.hpp:124
ILU0 preconditioner class, can be supplied to solve()-routines.
Definition: ilu0.hpp:146
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:226
Incomplete Cholesky preconditioner class with static pattern (ICHOL0), can be supplied to solve()-rou...
Definition: ichol.hpp:127
int run_benchmark(viennacl::context ctx)
Definition: solver.cpp:124
A tag for incomplete Cholesky factorization with static pattern (ILU0)
Definition: ichol.hpp:43
Jacobi preconditioner class, can be supplied to solve()-routines. Generic version for non-ViennaCL ma...
Wrapper class for an OpenCL platform.
Definition: platform.hpp:45
A tag for incomplete LU factorization with static pattern (ILU0)
Definition: ilu0.hpp:58
std::vector< device > devices(cl_device_type dtype=CL_DEVICE_TYPE_DEFAULT)
Returns the available devices of the supplied device type.
Definition: platform.hpp:91
The stabilized bi-conjugate gradient method is implemented here.
ScalarType diff_2(ublas::vector< ScalarType > &v1, viennacl::vector< ScalarType > &v2)
Definition: solver.cpp:88
Implementations of incomplete Cholesky factorization preconditioners with static nonzero pattern...
ScalarType diff_inf(ublas::vector< ScalarType > &v1, viennacl::vector< ScalarType > &v2)
Definition: solver.cpp:71
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
bool use_level_scheduling() const
Definition: ilut.hpp:76
void start()
A tag for a jacobi preconditioner.
A block ILU preconditioner class, can be supplied to solve()-routines.
Definition: block_ilu.hpp:132
double get() const
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
Implementation of a simple Jacobi preconditioner.
Jacobi-type preconditioner class, can be supplied to solve()-routines. This is a diagonal preconditio...
Definition: row_scaling.hpp:87
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
void printOps(double num_ops, double exec_time)
A tag for the solver GMRES. Used for supplying solver parameters and for dispatching the solve() func...
Definition: gmres.hpp:49
Implementation of the coordinate_matrix class.
Implementation of a OpenCL-like context, which serves as a unification of {OpenMP, CUDA, OpenCL} at the user API.
std::string info(vcl_size_t indent=0, char indent_char= ' ') const
Returns an info string with a few properties of the device. Use full_info() to get all details...
Definition: device.hpp:995
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
viennacl::vector< NumericT > solve(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Overload for the pipelined BiCGStab implementation for the ViennaCL sparse matrix types...
Definition: bicgstab.hpp:209
void apply(VectorT &vec) const
Definition: ilut.hpp:266
viennacl::vector< float > v1
Implementation of the hyb_matrix class.
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
Implementations of the generalized minimum residual method are in this file.
#define BENCHMARK_RUNS
Definition: solver.cpp:67
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:827
A tag class representing the use of no preconditioner.
Definition: forwards.h:833
Implementations of incomplete factorization preconditioners. Convenience header file.
A tag for incomplete LU factorization with threshold (ILUT)
Definition: ilut.hpp:45
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:402
Implementation of the compressed_matrix class.
Implementation of the sliced_ell_matrix class.
A tag for a row scaling preconditioner which merely normalizes the equation system such that each row...
Definition: row_scaling.hpp:40
void run_solver(MatrixType const &matrix, VectorType const &rhs, VectorType const &ref_result, SolverTag const &solver, PrecondTag const &precond, long ops)
Definition: solver.cpp:98
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
ILUT preconditioner class, can be supplied to solve()-routines.
Definition: ilut.hpp:252
int main()
Definition: solver.cpp:600
The conjugate gradient method is implemented here.
void apply(VectorT &vec) const
Definition: ilu0.hpp:160
Implementation of the ell_matrix class.
void prod(const MatrixT1 &A, bool transposed_A, const MatrixT2 &B, bool transposed_B, MatrixT3 &C, ScalarT alpha, ScalarT beta)
void apply(VectorT &vec) const
Definition: block_ilu.hpp:174
A row normalization preconditioner is implemented here.
void apply(VectorT &vec) const
Definition: ichol.hpp:141
viennacl::vector< int > v2
bool use_level_scheduling() const
Definition: ilu0.hpp:63
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
T norm_inf(std::vector< T, A > const &v1)
Definition: norm_inf.hpp:60
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
float ScalarType
Definition: fft_1d.cpp:42
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Definition: cg.hpp:48
A sparse square matrix in compressed sparse rows format.
A tag for the stabilized Bi-conjugate gradient solver. Used for supplying solver parameters and for d...
Definition: bicgstab.hpp:47
long read_matrix_market_file(MatrixT &mat, const char *file, long index_base=1)
Reads a sparse matrix from a file (MatrixMarket format)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:834
viennacl::ocl::context & get_context(long i)
Convenience function for returning the current context.
Definition: backend.hpp:225
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
Implementation of the ViennaCL scalar class.
void setup_context(long i, std::vector< cl_device_id > const &devices)
Convenience function for setting devices for a context.
Definition: backend.hpp:231
The conjugate gradient method using mixed precision is implemented here. Experimental.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...