ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
gmres.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_GMRES_HPP_
2 #define VIENNACL_GMRES_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <cmath>
27 #include <limits>
28 #include "viennacl/forwards.h"
29 #include "viennacl/tools/tools.hpp"
31 #include "viennacl/linalg/prod.hpp"
34 #include "viennacl/traits/size.hpp"
37 
40 
41 
42 namespace viennacl
43 {
44 namespace linalg
45 {
46 
49 class gmres_tag //generalized minimum residual
50 {
51 public:
58  gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20)
59  : tol_(tol), iterations_(max_iterations), krylov_dim_(krylov_dim), iters_taken_(0) {}
60 
62  double tolerance() const { return tol_; }
64  unsigned int max_iterations() const { return iterations_; }
66  unsigned int krylov_dim() const { return krylov_dim_; }
68  unsigned int max_restarts() const
69  {
70  unsigned int ret = iterations_ / krylov_dim_;
71  if (ret > 0 && (ret * krylov_dim_ == iterations_) )
72  return ret - 1;
73  return ret;
74  }
75 
77  unsigned int iters() const { return iters_taken_; }
79  void iters(unsigned int i) const { iters_taken_ = i; }
80 
82  double error() const { return last_error_; }
84  void error(double e) const { last_error_ = e; }
85 
86 private:
87  double tol_;
88  unsigned int iterations_;
89  unsigned int krylov_dim_;
90 
91  //return values from solver
92  mutable unsigned int iters_taken_;
93  mutable double last_error_;
94 };
95 
96 namespace detail
97 {
98 
99  template<typename SrcVectorT, typename DestVectorT>
100  void gmres_copy_helper(SrcVectorT const & src, DestVectorT & dest, vcl_size_t len, vcl_size_t start = 0)
101  {
102  for (vcl_size_t i=0; i<len; ++i)
103  dest[start+i] = src[start+i];
104  }
105 
106  template<typename NumericT, typename DestVectorT>
107  void gmres_copy_helper(viennacl::vector<NumericT> const & src, DestVectorT & dest, vcl_size_t len, vcl_size_t start = 0)
108  {
109  typedef typename viennacl::vector<NumericT>::difference_type difference_type;
110  viennacl::copy( src.begin() + static_cast<difference_type>(start),
111  src.begin() + static_cast<difference_type>(start + len),
112  dest.begin() + static_cast<difference_type>(start));
113  }
114 
123  template<typename VectorT, typename NumericT>
124  void gmres_setup_householder_vector(VectorT const & input_vec, VectorT & hh_vec, NumericT & beta, NumericT & mu, vcl_size_t j)
125  {
126  NumericT input_j = input_vec(j);
127 
128  // copy entries from input vector to householder vector:
129  detail::gmres_copy_helper(input_vec, hh_vec, viennacl::traits::size(hh_vec) - (j+1), j+1);
130 
131  NumericT sigma = viennacl::linalg::norm_2(hh_vec);
132  sigma *= sigma;
133 
134  if (sigma <= 0)
135  {
136  beta = 0;
137  mu = input_j;
138  }
139  else
140  {
141  mu = std::sqrt(sigma + input_j*input_j);
142 
143  NumericT hh_vec_0 = (input_j <= 0) ? (input_j - mu) : (-sigma / (input_j + mu));
144 
145  beta = NumericT(2) * hh_vec_0 * hh_vec_0 / (sigma + hh_vec_0 * hh_vec_0);
146 
147  //divide hh_vec by its diagonal element hh_vec_0
148  hh_vec /= hh_vec_0;
149  hh_vec[j] = NumericT(1);
150  }
151  }
152 
153  // Apply (I - beta h h^T) to x (Householder reflection with Householder vector h)
154  template<typename VectorT, typename NumericT>
155  void gmres_householder_reflect(VectorT & x, VectorT const & h, NumericT beta)
156  {
157  NumericT hT_in_x = viennacl::linalg::inner_prod(h, x);
158  x -= (beta * hT_in_x) * h;
159  }
160 
161 
172  template <typename MatrixType, typename ScalarType>
174  viennacl::vector<ScalarType> const & rhs,
175  gmres_tag const & tag,
177  {
178  viennacl::vector<ScalarType> residual(rhs);
180 
181  viennacl::vector<ScalarType> device_krylov_basis(rhs.internal_size() * tag.krylov_dim(), viennacl::traits::context(rhs)); // not using viennacl::matrix here because of spurious padding in column number
183  std::vector<ScalarType> host_buffer_R(device_buffer_R.size());
184 
185  vcl_size_t buffer_size_per_vector = 128;
186  vcl_size_t num_buffer_chunks = 3;
187  viennacl::vector<ScalarType> device_inner_prod_buffer = viennacl::zero_vector<ScalarType>(num_buffer_chunks*buffer_size_per_vector, viennacl::traits::context(rhs)); // temporary buffer
188  viennacl::vector<ScalarType> device_r_dot_vk_buffer = viennacl::zero_vector<ScalarType>(buffer_size_per_vector * tag.krylov_dim(), viennacl::traits::context(rhs)); // holds result of first reduction stage for <r, v_k> on device
189  viennacl::vector<ScalarType> device_vi_in_vk_buffer = viennacl::zero_vector<ScalarType>(buffer_size_per_vector * tag.krylov_dim(), viennacl::traits::context(rhs)); // holds <v_i, v_k> for i=0..k-1 on device
190  viennacl::vector<ScalarType> device_values_xi_k = viennacl::zero_vector<ScalarType>(tag.krylov_dim(), viennacl::traits::context(rhs)); // holds values \xi_k = <r, v_k> on device
191  std::vector<ScalarType> host_r_dot_vk_buffer(device_r_dot_vk_buffer.size());
192  std::vector<ScalarType> host_values_xi_k(tag.krylov_dim());
193  std::vector<ScalarType> host_values_eta_k_buffer(tag.krylov_dim());
194  std::vector<ScalarType> host_update_coefficients(tag.krylov_dim());
195 
196  ScalarType norm_rhs = viennacl::linalg::norm_2(residual);
197  ScalarType rho_0 = norm_rhs;
198  ScalarType rho = ScalarType(1);
199 
200  tag.iters(0);
201 
202  for (unsigned int restart_count = 0; restart_count <= tag.max_restarts(); ++restart_count)
203  {
204  //
205  // prepare restart:
206  //
207  if (restart_count > 0)
208  {
209  // compute new residual without introducing a temporary for A*x:
210  residual = viennacl::linalg::prod(A, result);
211  residual = rhs - residual;
212 
213  rho_0 = viennacl::linalg::norm_2(residual);
214  }
215 
216  if (rho_0 <= ScalarType(0)) // trivial right hand side?
217  break;
218 
219  residual /= rho_0;
220  rho = ScalarType(1);
221 
222  // check for convergence:
223  if (rho_0 / norm_rhs < tag.tolerance())
224  break;
225 
226  //
227  // minimize in Krylov basis:
228  //
229  vcl_size_t k = 0;
230  for (k = 0; k < static_cast<vcl_size_t>(tag.krylov_dim()); ++k)
231  {
232  if (k == 0)
233  {
234  // compute v0 = A*r and perform first reduction stage for ||v0||
235  viennacl::vector_range<viennacl::vector<ScalarType> > v0(device_krylov_basis, viennacl::range(0, rhs.size()));
236  viennacl::linalg::pipelined_gmres_prod(A, residual, v0, device_inner_prod_buffer);
237 
238  // Normalize v_1 and compute first reduction stage for <r, v_0> in device_r_dot_vk_buffer:
240  device_buffer_R, k*tag.krylov_dim() + k,
241  device_inner_prod_buffer, device_r_dot_vk_buffer,
242  buffer_size_per_vector, k*buffer_size_per_vector);
243  }
244  else
245  {
246  // compute v0 = A*r and perform first reduction stage for ||v0||
247  viennacl::vector_range<viennacl::vector<ScalarType> > vk (device_krylov_basis, viennacl::range( k *rhs.internal_size(), k *rhs.internal_size() + rhs.size()));
248  viennacl::vector_range<viennacl::vector<ScalarType> > vk_minus_1(device_krylov_basis, viennacl::range((k-1)*rhs.internal_size(), (k-1)*rhs.internal_size() + rhs.size()));
249  viennacl::linalg::pipelined_gmres_prod(A, vk_minus_1, vk, device_inner_prod_buffer);
250 
251  //
252  // Gram-Schmidt, stage 1: compute first reduction stage of <v_i, v_k>
253  //
254  viennacl::linalg::pipelined_gmres_gram_schmidt_stage1(device_krylov_basis, rhs.size(), rhs.internal_size(), k, device_vi_in_vk_buffer, buffer_size_per_vector);
255 
256  //
257  // Gram-Schmidt, stage 2: compute second reduction stage of <v_i, v_k> and use that to compute v_k -= sum_i <v_i, v_k> v_i.
258  // Store <v_i, v_k> in R-matrix and compute first reduction stage for ||v_k||
259  //
260  viennacl::linalg::pipelined_gmres_gram_schmidt_stage2(device_krylov_basis, rhs.size(), rhs.internal_size(), k,
261  device_vi_in_vk_buffer,
262  device_buffer_R, tag.krylov_dim(),
263  device_inner_prod_buffer, buffer_size_per_vector);
264 
265  //
266  // Normalize v_k and compute first reduction stage for <r, v_k> in device_r_dot_vk_buffer:
267  //
269  device_buffer_R, k*tag.krylov_dim() + k,
270  device_inner_prod_buffer, device_r_dot_vk_buffer,
271  buffer_size_per_vector, k*buffer_size_per_vector);
272  }
273  }
274 
275  //
276  // Run reduction to obtain the values \xi_k = <r, v_k>.
277  // Note that unlike Algorithm 2.1 in Walker: "A Simpler GMRES", we do not update the residual
278  //
279  viennacl::fast_copy(device_r_dot_vk_buffer.begin(), device_r_dot_vk_buffer.end(), host_r_dot_vk_buffer.begin());
280  for (std::size_t i=0; i<k; ++i)
281  {
282  host_values_xi_k[i] = ScalarType(0);
283  for (std::size_t j=0; j<buffer_size_per_vector; ++j)
284  host_values_xi_k[i] += host_r_dot_vk_buffer[i*buffer_size_per_vector + j];
285  }
286 
287  //
288  // Bring values in R back to host:
289  //
290  viennacl::fast_copy(device_buffer_R.begin(), device_buffer_R.end(), host_buffer_R.begin());
291 
292  //
293  // Check for premature convergence: If the diagonal element drops too far below the first norm, we're done and restrict the Krylov size accordingly.
294  //
295  vcl_size_t full_krylov_dim = k; //needed for proper access to R
296  for (std::size_t i=0; i<k; ++i)
297  {
298  if (std::fabs(host_buffer_R[i + i*k]) < tag.tolerance() * host_buffer_R[0])
299  {
300  k = i;
301  break;
302  }
303  }
304 
305 
306  // Compute error estimator:
307  for (std::size_t i=0; i<k; ++i)
308  {
309  tag.iters( tag.iters() + 1 ); //increase iteration counter
310 
311  // check for accumulation of round-off errors for poorly conditioned systems
312  if (host_values_xi_k[i] >= rho || host_values_xi_k[i] <= -rho)
313  {
314  k = i;
315  break; // restrict Krylov space at this point. No gain from using additional basis vectors, since orthogonality is lost.
316  }
317 
318  // update error estimator
319  rho *= std::sin( std::acos(host_values_xi_k[i] / rho) );
320  }
321 
322  //
323  // Solve minimization problem:
324  //
325  host_values_eta_k_buffer = host_values_xi_k;
326 
327  for (int i2=static_cast<int>(k)-1; i2>-1; --i2)
328  {
329  vcl_size_t i = static_cast<vcl_size_t>(i2);
330  for (vcl_size_t j=static_cast<vcl_size_t>(i)+1; j<k; ++j)
331  host_values_eta_k_buffer[i] -= host_buffer_R[i + j*full_krylov_dim] * host_values_eta_k_buffer[j];
332 
333  host_values_eta_k_buffer[i] /= host_buffer_R[i + i*full_krylov_dim];
334  }
335 
336  //
337  // Update x += rho * z with z = \eta_0 * residual + sum_{i=0}^{k-1} \eta_{i+1} v_i
338  // Note that we have not updated the residual yet, hence this slightly modified as compared to the form given in Algorithm 2.1 in Walker: "A Simpler GMRES"
339  //
340  for (vcl_size_t i=0; i<k; ++i)
341  host_update_coefficients[i] = rho_0 * host_values_eta_k_buffer[i];
342 
343  viennacl::fast_copy(host_update_coefficients.begin(), host_update_coefficients.end(), device_values_xi_k.begin()); //reuse device_values_xi_k_buffer here for simplicity
344 
346  device_krylov_basis, rhs.size(), rhs.internal_size(),
347  device_values_xi_k, k);
348 
349  tag.error( std::fabs(rho*rho_0 / norm_rhs) );
350  }
351 
352  return result;
353  }
354 
355 }
356 
357 // compressed_matrix
358 
360 template<typename NumericT>
363  gmres_tag const & tag,
365 {
367 }
368 
370 template<typename NumericT>
372  viennacl::vector<NumericT> const & rhs,
373  gmres_tag const & tag,
375 {
377 }
378 
380 template<typename NumericT>
383  gmres_tag const & tag,
385 {
387 }
388 
390 template<typename NumericT>
393  gmres_tag const & tag,
395 {
397 }
398 
399 
400 // coordinate_matrix
401 
403 template<typename NumericT>
406  gmres_tag const & tag,
408 {
410 }
411 
413 template<typename NumericT>
415  viennacl::vector<NumericT> const & rhs,
416  gmres_tag const & tag,
418 {
420 }
421 
423 template<typename NumericT>
426  gmres_tag const & tag,
428 {
430 }
431 
433 template<typename NumericT>
436  gmres_tag const & tag,
438 {
440 }
441 
442 
443 // ell_matrix
444 
446 template<typename NumericT>
449  gmres_tag const & tag,
451 {
453 }
454 
456 template<typename NumericT>
458  viennacl::vector<NumericT> const & rhs,
459  gmres_tag const & tag,
461 {
463 }
464 
466 template<typename NumericT>
469  gmres_tag const & tag,
471 {
473 }
474 
476 template<typename NumericT>
479  gmres_tag const & tag,
481 {
483 }
484 
485 
486 // sliced_ell_matrix
487 
489 template<typename NumericT>
492  gmres_tag const & tag,
494 {
496 }
497 
499 template<typename NumericT>
501  viennacl::vector<NumericT> const & rhs,
502  gmres_tag const & tag,
504 {
506 }
507 
509 template<typename NumericT>
512  gmres_tag const & tag,
514 {
516 }
517 
519 template<typename NumericT>
522  gmres_tag const & tag,
524 {
526 }
527 
528 
529 // hyb_matrix
530 
532 template<typename NumericT>
535  gmres_tag const & tag,
537 {
539 }
540 
542 template<typename NumericT>
544  viennacl::vector<NumericT> const & rhs,
545  gmres_tag const & tag,
547 {
549 }
550 
552 template<typename NumericT>
555  gmres_tag const & tag,
557 {
559 }
560 
562 template<typename NumericT>
565  gmres_tag const & tag,
567 {
569 }
570 
571 
572 
583 template<typename MatrixT, typename VectorT, typename PreconditionerT>
584 VectorT solve(MatrixT const & matrix, VectorT const & rhs, gmres_tag const & tag, PreconditionerT const & precond)
585 {
586  typedef typename viennacl::result_of::value_type<VectorT>::type NumericType;
587  typedef typename viennacl::result_of::cpu_value_type<NumericType>::type CPU_NumericType;
588  unsigned int problem_size = static_cast<unsigned int>(viennacl::traits::size(rhs));
589  VectorT result = rhs;
590  viennacl::traits::clear(result);
591 
592  vcl_size_t krylov_dim = static_cast<vcl_size_t>(tag.krylov_dim());
593  if (problem_size < krylov_dim)
594  krylov_dim = problem_size; //A Krylov space larger than the matrix would lead to seg-faults (mathematically, error is certain to be zero already)
595 
596  VectorT res = rhs;
597  VectorT v_k_tilde = rhs;
598  VectorT v_k_tilde_temp = rhs;
599 
600  std::vector< std::vector<CPU_NumericType> > R(krylov_dim, std::vector<CPU_NumericType>(tag.krylov_dim()));
601  std::vector<CPU_NumericType> projection_rhs(krylov_dim);
602 
603  std::vector<VectorT> householder_reflectors(krylov_dim, rhs);
604  std::vector<CPU_NumericType> betas(krylov_dim);
605 
606  CPU_NumericType norm_rhs = viennacl::linalg::norm_2(rhs);
607 
608  if (norm_rhs <= 0) //solution is zero if RHS norm is zero
609  return result;
610 
611  tag.iters(0);
612 
613  for (unsigned int it = 0; it <= tag.max_restarts(); ++it)
614  {
615  //
616  // (Re-)Initialize residual: r = b - A*x (without temporary for the result of A*x)
617  //
618  res = rhs;
619  res -= viennacl::linalg::prod(matrix, result); //initial guess zero
620  precond.apply(res);
621 
622  CPU_NumericType rho_0 = viennacl::linalg::norm_2(res);
623 
624  //
625  // Check for premature convergence
626  //
627  if (rho_0 / norm_rhs < tag.tolerance() ) // norm_rhs is known to be nonzero here
628  {
629  tag.error(rho_0 / norm_rhs);
630  return result;
631  }
632 
633  //
634  // Normalize residual and set 'rho' to 1 as requested in 'A Simpler GMRES' by Walker and Zhou.
635  //
636  res /= rho_0;
637  CPU_NumericType rho = static_cast<CPU_NumericType>(1.0);
638 
639 
640  //
641  // Iterate up until maximal Krylove space dimension is reached:
642  //
643  vcl_size_t k = 0;
644  for (k = 0; k < krylov_dim; ++k)
645  {
646  tag.iters( tag.iters() + 1 ); //increase iteration counter
647 
648  // prepare storage:
650  viennacl::traits::clear(householder_reflectors[k]);
651 
652  //compute v_k = A * v_{k-1} via Householder matrices
653  if (k == 0)
654  {
655  v_k_tilde = viennacl::linalg::prod(matrix, res);
656  precond.apply(v_k_tilde);
657  }
658  else
659  {
660  viennacl::traits::clear(v_k_tilde);
661  v_k_tilde[k-1] = CPU_NumericType(1);
662 
663  //Householder rotations, part 1: Compute P_1 * P_2 * ... * P_{k-1} * e_{k-1}
664  for (int i = static_cast<int>(k)-1; i > -1; --i)
665  detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[vcl_size_t(i)], betas[vcl_size_t(i)]);
666 
667  v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde);
668  precond.apply(v_k_tilde_temp);
669  v_k_tilde = v_k_tilde_temp;
670 
671  //Householder rotations, part 2: Compute P_{k-1} * ... * P_{1} * v_k_tilde
672  for (vcl_size_t i = 0; i < k; ++i)
673  detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[i], betas[i]);
674  }
675 
676  //
677  // Compute Householder reflection for v_k_tilde such that all entries below k-th entry are zero:
678  //
679  CPU_NumericType rho_k_k = 0;
680  detail::gmres_setup_householder_vector(v_k_tilde, householder_reflectors[k], betas[k], rho_k_k, k);
681 
682  //
683  // copy first k entries from v_k_tilde to R[k] in order to fill k-th column with result of
684  // P_k * v_k_tilde = (v[0], ... , v[k-1], norm(v), 0, 0, ...) =: (rho_{1,k}, rho_{2,k}, ..., rho_{k,k}, 0, ..., 0);
685  //
686  detail::gmres_copy_helper(v_k_tilde, R[k], k);
687  R[k][k] = rho_k_k;
688 
689  //
690  // Update residual: r = P_k r
691  // Set zeta_k = r[k] including machine precision considerations: mathematically we have |r[k]| <= rho
692  // Set rho *= sin(acos(r[k] / rho))
693  //
694  detail::gmres_householder_reflect(res, householder_reflectors[k], betas[k]);
695 
696  if (res[k] > rho) //machine precision reached
697  res[k] = rho;
698  if (res[k] < -rho) //machine precision reached
699  res[k] = -rho;
700  projection_rhs[k] = res[k];
701 
702  rho *= std::sin( std::acos(projection_rhs[k] / rho) );
703 
704  if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance()) // Residual is sufficiently reduced, stop here
705  {
706  tag.error( std::fabs(rho*rho_0 / norm_rhs) );
707  ++k;
708  break;
709  }
710  } // for k
711 
712  //
713  // Triangular solver stage:
714  //
715 
716  for (int i2=static_cast<int>(k)-1; i2>-1; --i2)
717  {
718  vcl_size_t i = static_cast<vcl_size_t>(i2);
719  for (vcl_size_t j=i+1; j<k; ++j)
720  projection_rhs[i] -= R[j][i] * projection_rhs[j]; //R is transposed
721 
722  projection_rhs[i] /= R[i][i];
723  }
724 
725  //
726  // Note: 'projection_rhs' now holds the solution (eta_1, ..., eta_k)
727  //
728 
729  res *= projection_rhs[0];
730 
731  if (k > 0)
732  {
733  for (unsigned int i = 0; i < k-1; ++i)
734  res[i] += projection_rhs[i+1];
735  }
736 
737  //
738  // Form z inplace in 'res' by applying P_1 * ... * P_{k}
739  //
740  for (int i=static_cast<int>(k)-1; i>=0; --i)
741  detail::gmres_householder_reflect(res, householder_reflectors[vcl_size_t(i)], betas[vcl_size_t(i)]);
742 
743  res *= rho_0;
744  result += res; // x += rho_0 * z in the paper
745 
746  //
747  // Check for convergence:
748  //
749  tag.error(std::fabs(rho*rho_0 / norm_rhs));
750  if ( tag.error() < tag.tolerance() )
751  return result;
752  }
753 
754  return result;
755 }
756 
759 template<typename MatrixT, typename VectorT>
760 VectorT solve(MatrixT const & A, VectorT const & rhs, gmres_tag const & tag)
761 {
762  return solve(A, rhs, tag, no_precond());
763 }
764 
765 
766 }
767 }
768 
769 #endif
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:405
unsigned int max_restarts() const
Returns the maximum number of GMRES restarts.
Definition: gmres.hpp:68
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:86
viennacl::vector< NumericT > pipelined_solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Implementation of a pipelined stabilized Bi-conjugate gradient solver.
Definition: bicgstab.hpp:91
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
unsigned int max_iterations() const
Returns the maximum number of iterations.
Definition: gmres.hpp:64
Generic size and resize functionality for different vector and matrix types.
unsigned int iters() const
Return the number of solver iterations:
Definition: gmres.hpp:77
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Various little tools used here and there in ViennaCL.
void pipelined_gmres_gram_schmidt_stage1(vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
Computes the first reduction stage for multiple inner products , i=0..k-1.
void pipelined_gmres_update_result(vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t k)
Computes x += eta_0 r + sum_{i=1}^{k-1} eta_i v_{i-1}.
void pipelined_gmres_gram_schmidt_stage2(vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
Computes the second reduction stage for multiple inner products , i=0..k-1, then updates v_k -= v_i and computes the first reduction stage for ||v_k||.
void iters(unsigned int i) const
Set the number of solver iterations (should only be modified by the solver)
Definition: gmres.hpp:79
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:374
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:89
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Definition: gmres.hpp:84
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
A tag for the solver GMRES. Used for supplying solver parameters and for dispatching the solve() func...
Definition: gmres.hpp:49
gmres_tag(double tol=1e-10, unsigned int max_iterations=300, unsigned int krylov_dim=20)
The constructor.
Definition: gmres.hpp:58
viennacl::vector< NumericT > solve(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Overload for the pipelined BiCGStab implementation for the ViennaCL sparse matrix types...
Definition: bicgstab.hpp:209
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:433
Sparse matrix class using the ELLPACK format for storing the nonzeros.
Definition: ell_matrix.hpp:53
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:827
A tag class representing the use of no preconditioner.
Definition: forwards.h:833
void pipelined_gmres_prod(MatrixType const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
Performs a joint vector update operation needed for an efficient pipelined GMRES algorithm.
base_type::difference_type difference_type
Definition: vector.hpp:943
vcl_size_t size() const
Definition: vector_def.hpp:47
Sparse matrix class using the sliced ELLPACK with parameters C, .
Definition: forwards.h:402
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
Extracts the underlying context from objects.
void pipelined_gmres_normalize_vk(vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
Class for representing strided subvectors of a bigger vector x.
Definition: forwards.h:436
void gmres_setup_householder_vector(VectorT const &input_vec, VectorT &hh_vec, NumericT &beta, NumericT &mu, vcl_size_t j)
Computes the householder vector 'hh_vec' which rotates 'input_vec' such that all entries below the j-...
Definition: gmres.hpp:124
void gmres_copy_helper(SrcVectorT const &src, DestVectorT &dest, vcl_size_t len, vcl_size_t start=0)
Definition: gmres.hpp:100
std::size_t vcl_size_t
Definition: forwards.h:74
Generic clear functionality for different vector and matrix types.
double tolerance() const
Returns the relative tolerance.
Definition: gmres.hpp:62
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:238
Proxy classes for vectors.
Implementations of specialized routines for the iterative solvers.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:423
float ScalarType
Definition: fft_1d.cpp:42
double error() const
Returns the estimated relative error at the end of the solver run.
Definition: gmres.hpp:82
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Definition: vector_def.hpp:120
A collection of compile time type deductions.
unsigned int krylov_dim() const
Returns the maximum dimension of the Krylov space before restart.
Definition: gmres.hpp:66
void gmres_householder_reflect(VectorT &x, VectorT const &h, NumericT beta)
Definition: gmres.hpp:155
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)