ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
bicgstab.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_BICGSTAB_HPP_
2 #define VIENNACL_LINALG_BICGSTAB_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <cmath>
27 #include <numeric>
28 
29 #include "viennacl/forwards.h"
30 #include "viennacl/tools/tools.hpp"
31 #include "viennacl/linalg/prod.hpp"
35 #include "viennacl/traits/size.hpp"
39 
40 namespace viennacl
41 {
42 namespace linalg
43 {
44 
48 {
49 public:
56  bicgstab_tag(double tol = 1e-8, vcl_size_t max_iters = 400, vcl_size_t max_iters_before_restart = 200)
57  : tol_(tol), iterations_(max_iters), iterations_before_restart_(max_iters_before_restart) {}
58 
60  double tolerance() const { return tol_; }
62  vcl_size_t max_iterations() const { return iterations_; }
64  vcl_size_t max_iterations_before_restart() const { return iterations_before_restart_; }
65 
67  vcl_size_t iters() const { return iters_taken_; }
68  void iters(vcl_size_t i) const { iters_taken_ = i; }
69 
71  double error() const { return last_error_; }
73  void error(double e) const { last_error_ = e; }
74 
75 private:
76  double tol_;
77  vcl_size_t iterations_;
78  vcl_size_t iterations_before_restart_;
79 
80  //return values from solver
81  mutable vcl_size_t iters_taken_;
82  mutable double last_error_;
83 };
84 
85 
86 
87 namespace detail
88 {
90  template<typename MatrixT, typename NumericT>
91  viennacl::vector<NumericT> pipelined_solve(MatrixT const & A, //MatrixType const & A,
93  bicgstab_tag const & tag,
95  {
97 
98  viennacl::vector<NumericT> residual = rhs;
100  viennacl::vector<NumericT> r0star = rhs;
104 
105  // Layout of temporary buffer:
106  // chunk 0: <residual, r_0^*>
107  // chunk 1: <As, As>
108  // chunk 2: <As, s>
109  // chunk 3: <Ap, r_0^*>
110  // chunk 4: <As, r_0^*>
111  // chunk 5: <s, s>
112  vcl_size_t buffer_size_per_vector = 256;
113  vcl_size_t num_buffer_chunks = 6;
114  viennacl::vector<NumericT> inner_prod_buffer = viennacl::zero_vector<NumericT>(num_buffer_chunks*buffer_size_per_vector, viennacl::traits::context(rhs)); // temporary buffer
115  std::vector<NumericT> host_inner_prod_buffer(inner_prod_buffer.size());
116 
117  NumericT norm_rhs_host = viennacl::linalg::norm_2(residual);
118  NumericT beta;
119  NumericT alpha;
120  NumericT omega;
121  NumericT residual_norm = norm_rhs_host;
122  inner_prod_buffer[0] = norm_rhs_host * norm_rhs_host;
123 
124  NumericT r_dot_r0 = 0;
125  NumericT As_dot_As = 0;
126  NumericT As_dot_s = 0;
127  NumericT Ap_dot_r0 = 0;
128  NumericT As_dot_r0 = 0;
129  NumericT s_dot_s = 0;
130 
131  if (norm_rhs_host <= 0) //solution is zero if RHS norm is zero
132  return result;
133 
134  for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
135  {
136  tag.iters(i+1);
137  // Ap = A*p_j
138  // Ap_dot_r0 = <Ap, r_0^*>
140  inner_prod_buffer, buffer_size_per_vector, 3*buffer_size_per_vector);
141 
143 
145  //
147  //viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
148  //Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, ScalarType(0));
149 
150  //alpha = residual_dot_r0 / Ap_dot_r0;
151 
153  //s = residual - alpha * Ap;
154 
156  // s = r - alpha * Ap
157  // <s, s> first stage
158  // dump alpha at end of inner_prod_buffer
160  inner_prod_buffer, buffer_size_per_vector, 5*buffer_size_per_vector);
161 
162  // As = A*s_j
163  // As_dot_As = <As, As>
164  // As_dot_s = <As, s>
165  // As_dot_r0 = <As, r_0^*>
167  inner_prod_buffer, buffer_size_per_vector, 4*buffer_size_per_vector);
168 
170 
171  viennacl::fast_copy(inner_prod_buffer.begin(), inner_prod_buffer.end(), host_inner_prod_buffer.begin());
172 
173  r_dot_r0 = std::accumulate(host_inner_prod_buffer.begin(), host_inner_prod_buffer.begin() + buffer_size_per_vector, NumericT(0));
174  As_dot_As = std::accumulate(host_inner_prod_buffer.begin() + buffer_size_per_vector, host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, NumericT(0));
175  As_dot_s = std::accumulate(host_inner_prod_buffer.begin() + 2 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 3 * buffer_size_per_vector, NumericT(0));
176  Ap_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + 3 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 4 * buffer_size_per_vector, NumericT(0));
177  As_dot_r0 = std::accumulate(host_inner_prod_buffer.begin() + 4 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 5 * buffer_size_per_vector, NumericT(0));
178  s_dot_s = std::accumulate(host_inner_prod_buffer.begin() + 5 * buffer_size_per_vector, host_inner_prod_buffer.begin() + 6 * buffer_size_per_vector, NumericT(0));
179 
180  alpha = r_dot_r0 / Ap_dot_r0;
181  beta = - As_dot_r0 / Ap_dot_r0;
182  omega = As_dot_s / As_dot_As;
183 
184  residual_norm = std::sqrt(s_dot_s - NumericT(2.0) * omega * As_dot_s + omega * omega * As_dot_As);
185  if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance())
186  break;
187 
188  // x_{j+1} = x_j + alpha * p_j + omega * s_j
189  // r_{j+1} = s_j - omega * t_j
190  // p_{j+1} = r_{j+1} + beta * (p_j - omega * q_j)
191  // and compute first stage of r_dot_r0 = <r_{j+1}, r_o^*> for use in next iteration
192  viennacl::linalg::pipelined_bicgstab_vector_update(result, alpha, p, omega, s,
193  residual, As,
194  beta, Ap,
195  r0star, inner_prod_buffer, buffer_size_per_vector);
196  }
197 
198  //store last error estimate:
199  tag.error(residual_norm / norm_rhs_host);
200 
201  return result;
202  }
203 }
204 
205 // compressed_matrix
206 
208 template<typename NumericT>
211  bicgstab_tag const & tag,
213 {
215 }
216 
218 template<typename NumericT>
220  viennacl::vector<NumericT> const & rhs,
221  bicgstab_tag const & tag,
223 {
225 }
226 
228 template<typename NumericT>
231  bicgstab_tag const & tag,
233 {
235 }
236 
238 template<typename NumericT>
241  bicgstab_tag const & tag,
243 {
245 }
246 
247 
248 // coordinate_matrix
249 
251 template<typename NumericT>
254  bicgstab_tag const & tag,
256 {
258 }
259 
261 template<typename NumericT>
263  viennacl::vector<NumericT> const & rhs,
264  bicgstab_tag const & tag,
266 {
268 }
269 
271 template<typename NumericT>
274  bicgstab_tag const & tag,
276 {
278 }
279 
281 template<typename NumericT>
284  bicgstab_tag const & tag,
286 {
288 }
289 
290 // ell_matrix
291 
293 template<typename NumericT>
296  bicgstab_tag const & tag,
298 {
300 }
301 
303 template<typename NumericT>
305  viennacl::vector<NumericT> const & rhs,
306  bicgstab_tag const & tag,
308 {
310 }
311 
313 template<typename NumericT>
316  bicgstab_tag const & tag,
318 {
320 }
321 
323 template<typename NumericT>
326  bicgstab_tag const & tag,
328 {
330 }
331 
332 // sliced_ell_matrix
333 
335 template<typename NumericT>
338  bicgstab_tag const & tag,
340 {
342 }
343 
345 template<typename NumericT>
347  viennacl::vector<NumericT> const & rhs,
348  bicgstab_tag const & tag,
350 {
352 }
353 
355 template<typename NumericT>
358  bicgstab_tag const & tag,
360 {
362 }
363 
365 template<typename NumericT>
368  bicgstab_tag const & tag,
370 {
372 }
373 
374 
375 // hyb_matrix
376 
378 template<typename NumericT>
381  bicgstab_tag const & tag,
383 {
385 }
386 
388 template<typename NumericT>
390  viennacl::vector<NumericT> const & rhs,
391  bicgstab_tag const & tag,
393 {
395 }
396 
398 template<typename NumericT>
401  bicgstab_tag const & tag,
403 {
405 }
406 
408 template<typename NumericT>
411  bicgstab_tag const & tag,
413 {
415 }
416 
417 
418 
419 
420 
430 template<typename MatrixT, typename VectorT>
431 VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag)
432 {
433  typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
434  typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
435  VectorT result = rhs;
436  viennacl::traits::clear(result);
437 
438  VectorT residual = rhs;
439  VectorT p = rhs;
440  VectorT r0star = rhs;
441  VectorT tmp0 = rhs;
442  VectorT tmp1 = rhs;
443  VectorT s = rhs;
444 
445  CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual);
446  CPU_NumericType ip_rr0star = norm_rhs_host * norm_rhs_host;
447  CPU_NumericType beta;
448  CPU_NumericType alpha;
449  CPU_NumericType omega;
450  //ScalarType inner_prod_temp; //temporary variable for inner product computation
451  CPU_NumericType new_ip_rr0star = 0;
452  CPU_NumericType residual_norm = norm_rhs_host;
453 
454  if (norm_rhs_host <= 0) //solution is zero if RHS norm is zero
455  return result;
456 
457  bool restart_flag = true;
458  vcl_size_t last_restart = 0;
459  for (vcl_size_t i = 0; i < tag.max_iterations(); ++i)
460  {
461  if (restart_flag)
462  {
463  residual = rhs;
464  residual -= viennacl::linalg::prod(matrix, result);
465  p = residual;
466  r0star = residual;
467  ip_rr0star = viennacl::linalg::norm_2(residual);
468  ip_rr0star *= ip_rr0star;
469  restart_flag = false;
470  last_restart = i;
471  }
472 
473  tag.iters(i+1);
474  tmp0 = viennacl::linalg::prod(matrix, p);
475  alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
476 
477  s = residual - alpha*tmp0;
478 
479  tmp1 = viennacl::linalg::prod(matrix, s);
480  CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
481  omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);
482 
483  result += alpha * p + omega * s;
484  residual = s - omega * tmp1;
485 
486  new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);
487  residual_norm = viennacl::linalg::norm_2(residual);
488  if (std::fabs(residual_norm / norm_rhs_host) < tag.tolerance())
489  break;
490 
491  beta = new_ip_rr0star / ip_rr0star * alpha/omega;
492  ip_rr0star = new_ip_rr0star;
493 
494  if ( (ip_rr0star <= 0 && ip_rr0star >= 0)
495  || (omega <= 0 && omega >= 0)
496  || (i - last_restart > tag.max_iterations_before_restart())
497  ) //search direction degenerate. A restart might help
498  restart_flag = true;
499 
500  // Execution of
501  // p = residual + beta * (p - omega*tmp0);
502  // without introducing temporary vectors:
503  p -= omega * tmp0;
504  p = residual + beta * p;
505  }
506 
507  //store last error estimate:
508  tag.error(residual_norm / norm_rhs_host);
509 
510  return result;
511 }
512 
513 template<typename MatrixT, typename VectorT>
514 VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag, viennacl::linalg::no_precond)
515 {
516  return solve(matrix, rhs, tag);
517 }
518 
529 template<typename MatrixT, typename VectorT, typename PreconditionerT>
530 VectorT solve(MatrixT const & matrix, VectorT const & rhs, bicgstab_tag const & tag, PreconditionerT const & precond)
531 {
532  typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
533  typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
534  VectorT result = rhs;
535  viennacl::traits::clear(result);
536 
537  VectorT residual = rhs;
538  VectorT r0star = residual; //can be chosen arbitrarily in fact
539  VectorT tmp0 = rhs;
540  VectorT tmp1 = rhs;
541  VectorT s = rhs;
542 
543  VectorT p = residual;
544 
545  CPU_NumericType ip_rr0star = viennacl::linalg::norm_2(residual);
546  CPU_NumericType norm_rhs_host = viennacl::linalg::norm_2(residual);
547  CPU_NumericType beta;
548  CPU_NumericType alpha;
549  CPU_NumericType omega;
550  CPU_NumericType new_ip_rr0star = 0;
551  CPU_NumericType residual_norm = norm_rhs_host;
552 
553  if (!norm_rhs_host) //solution is zero if RHS norm is zero
554  return result;
555 
556  bool restart_flag = true;
557  vcl_size_t last_restart = 0;
558  for (unsigned int i = 0; i < tag.max_iterations(); ++i)
559  {
560  if (restart_flag)
561  {
562  residual = rhs;
563  residual -= viennacl::linalg::prod(matrix, result);
564  precond.apply(residual);
565  p = residual;
566  r0star = residual;
567  ip_rr0star = viennacl::linalg::norm_2(residual);
568  ip_rr0star *= ip_rr0star;
569  restart_flag = false;
570  last_restart = i;
571  }
572 
573  tag.iters(i+1);
574  tmp0 = viennacl::linalg::prod(matrix, p);
575  precond.apply(tmp0);
576  alpha = ip_rr0star / viennacl::linalg::inner_prod(tmp0, r0star);
577 
578  s = residual - alpha*tmp0;
579 
580  tmp1 = viennacl::linalg::prod(matrix, s);
581  precond.apply(tmp1);
582  CPU_NumericType norm_tmp1 = viennacl::linalg::norm_2(tmp1);
583  omega = viennacl::linalg::inner_prod(tmp1, s) / (norm_tmp1 * norm_tmp1);
584 
585  result += alpha * p + omega * s;
586  residual = s - omega * tmp1;
587 
588  residual_norm = viennacl::linalg::norm_2(residual);
589  if (residual_norm / norm_rhs_host < tag.tolerance())
590  break;
591 
592  new_ip_rr0star = viennacl::linalg::inner_prod(residual, r0star);
593 
594  beta = new_ip_rr0star / ip_rr0star * alpha/omega;
595  ip_rr0star = new_ip_rr0star;
596 
597  if (!ip_rr0star || !omega || i - last_restart > tag.max_iterations_before_restart()) //search direction degenerate. A restart might help
598  restart_flag = true;
599 
600  // Execution of
601  // p = residual + beta * (p - omega*tmp0);
602  // without introducing temporary vectors:
603  p -= omega * tmp0;
604  p = residual + beta * p;
605 
606  //std::cout << "Rel. Residual in current step: " << std::sqrt(std::fabs(viennacl::linalg::inner_prod(residual, residual) / norm_rhs_host)) << std::endl;
607  }
608 
609  //store last error estimate:
610  tag.error(residual_norm / norm_rhs_host);
611 
612  return result;
613 }
614 
615 }
616 }
617 
618 #endif
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:405
vcl_size_t max_iterations_before_restart() const
Returns the maximum number of iterations before a restart.
Definition: bicgstab.hpp:64
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:86
viennacl::vector< NumericT > pipelined_solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Implementation of a pipelined stabilized Bi-conjugate gradient solver.
Definition: bicgstab.hpp:91
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
bicgstab_tag(double tol=1e-8, vcl_size_t max_iters=400, vcl_size_t max_iters_before_restart=200)
The constructor.
Definition: bicgstab.hpp:56
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Various little tools used here and there in ViennaCL.
double error() const
Returns the estimated relative error at the end of the solver run.
Definition: bicgstab.hpp:71
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:374
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:89
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
double tolerance() const
Returns the relative tolerance.
Definition: bicgstab.hpp:60
viennacl::vector< NumericT > solve(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Overload for the pipelined BiCGStab implementation for the ViennaCL sparse matrix types...
Definition: bicgstab.hpp:209
void pipelined_bicgstab_vector_update(vector_base< NumericT > &result, NumericT alpha, vector_base< NumericT > &p, NumericT omega, vector_base< NumericT > const &s, vector_base< NumericT > &residual, vector_base< NumericT > const &As, NumericT beta, vector_base< NumericT > const &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
Performs a joint vector update operation needed for an efficient pipelined BiCGStab algorithm...
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:433
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:827
A tag class representing the use of no preconditioner.
Definition: forwards.h:833
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:402
Extracts the underlying context from objects.
Class for representing strided subvectors of a bigger vector x.
Definition: forwards.h:436
vcl_size_t max_iterations() const
Returns the maximum number of iterations.
Definition: bicgstab.hpp:62
std::size_t vcl_size_t
Definition: forwards.h:74
Generic clear functionality for different vector and matrix types.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:238
void pipelined_bicgstab_prod(MatrixT const &A, vector_base< NumericT > const &p, vector_base< NumericT > &Ap, vector_base< NumericT > const &r0star, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a joint vector update operation needed for an efficient pipelined CG algorithm.
Implementations of specialized routines for the iterative solvers.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Definition: bicgstab.hpp:73
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
vcl_size_t iters() const
Return the number of solver iterations:
Definition: bicgstab.hpp:67
void pipelined_bicgstab_update_s(vector_base< NumericT > &s, vector_base< NumericT > &r, vector_base< NumericT > const &Ap, vector_base< NumericT > &inner_prod_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a joint vector update operation needed for an efficient pipelined CG algorithm.
void iters(vcl_size_t i) const
Definition: bicgstab.hpp:68
A tag for the stabilized Bi-conjugate gradient solver. Used for supplying solver parameters and for d...
Definition: bicgstab.hpp:47
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:834
A collection of compile time type deductions.
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)