1 #ifndef VIENNACL_GMRES_HPP_
2 #define VIENNACL_GMRES_HPP_
70 unsigned int ret = iterations_ / krylov_dim_;
71 if (ret > 0 && (ret * krylov_dim_ == iterations_) )
77 unsigned int iters()
const {
return iters_taken_; }
79 void iters(
unsigned int i)
const { iters_taken_ = i; }
82 double error()
const {
return last_error_; }
84 void error(
double e)
const { last_error_ = e; }
88 unsigned int iterations_;
89 unsigned int krylov_dim_;
92 mutable unsigned int iters_taken_;
93 mutable double last_error_;
99 template<
typename SrcVectorT,
typename DestVectorT>
106 template<
typename NumericT,
typename DestVectorT>
111 src.
begin() +
static_cast<difference_type
>(
start + len),
112 dest.begin() +
static_cast<difference_type
>(
start));
123 template<
typename VectorT,
typename NumericT>
126 NumericT input_j = input_vec(j);
141 mu = std::sqrt(sigma + input_j*input_j);
143 NumericT hh_vec_0 = (input_j <= 0) ? (input_j - mu) : (-sigma / (input_j + mu));
145 beta = NumericT(2) * hh_vec_0 * hh_vec_0 / (sigma + hh_vec_0 * hh_vec_0);
149 hh_vec[j] = NumericT(1);
154 template<
typename VectorT,
typename NumericT>
158 x -= (beta * hT_in_x) * h;
172 template <
typename MatrixType,
typename ScalarType>
183 std::vector<ScalarType> host_buffer_R(device_buffer_R.size());
191 std::vector<ScalarType> host_r_dot_vk_buffer(device_r_dot_vk_buffer.
size());
192 std::vector<ScalarType> host_values_xi_k(tag.
krylov_dim());
193 std::vector<ScalarType> host_values_eta_k_buffer(tag.
krylov_dim());
194 std::vector<ScalarType> host_update_coefficients(tag.
krylov_dim());
202 for (
unsigned int restart_count = 0; restart_count <= tag.
max_restarts(); ++restart_count)
207 if (restart_count > 0)
211 residual = rhs - residual;
230 for (k = 0; k < static_cast<vcl_size_t>(tag.
krylov_dim()); ++k)
241 device_inner_prod_buffer, device_r_dot_vk_buffer,
242 buffer_size_per_vector, k*buffer_size_per_vector);
261 device_vi_in_vk_buffer,
263 device_inner_prod_buffer, buffer_size_per_vector);
270 device_inner_prod_buffer, device_r_dot_vk_buffer,
271 buffer_size_per_vector, k*buffer_size_per_vector);
279 viennacl::fast_copy(device_r_dot_vk_buffer.begin(), device_r_dot_vk_buffer.end(), host_r_dot_vk_buffer.begin());
280 for (std::size_t i=0; i<k; ++i)
283 for (std::size_t j=0; j<buffer_size_per_vector; ++j)
284 host_values_xi_k[i] += host_r_dot_vk_buffer[i*buffer_size_per_vector + j];
290 viennacl::fast_copy(device_buffer_R.begin(), device_buffer_R.end(), host_buffer_R.begin());
296 for (std::size_t i=0; i<k; ++i)
298 if (std::fabs(host_buffer_R[i + i*k]) < tag.
tolerance() * host_buffer_R[0])
307 for (std::size_t i=0; i<k; ++i)
312 if (host_values_xi_k[i] >= rho || host_values_xi_k[i] <= -rho)
319 rho *= std::sin( std::acos(host_values_xi_k[i] / rho) );
325 host_values_eta_k_buffer = host_values_xi_k;
327 for (
int i2=static_cast<int>(k)-1; i2>-1; --i2)
330 for (
vcl_size_t j=static_cast<vcl_size_t>(i)+1; j<k; ++j)
331 host_values_eta_k_buffer[i] -= host_buffer_R[i + j*full_krylov_dim] * host_values_eta_k_buffer[j];
333 host_values_eta_k_buffer[i] /= host_buffer_R[i + i*full_krylov_dim];
341 host_update_coefficients[i] = rho_0 * host_values_eta_k_buffer[i];
343 viennacl::fast_copy(host_update_coefficients.begin(), host_update_coefficients.end(), device_values_xi_k.begin());
347 device_values_xi_k, k);
349 tag.
error( std::fabs(rho*rho_0 / norm_rhs) );
360 template<
typename NumericT>
370 template<
typename NumericT>
380 template<
typename NumericT>
390 template<
typename NumericT>
403 template<
typename NumericT>
413 template<
typename NumericT>
423 template<
typename NumericT>
433 template<
typename NumericT>
446 template<
typename NumericT>
456 template<
typename NumericT>
466 template<
typename NumericT>
476 template<
typename NumericT>
489 template<
typename NumericT>
499 template<
typename NumericT>
509 template<
typename NumericT>
519 template<
typename NumericT>
532 template<
typename NumericT>
542 template<
typename NumericT>
552 template<
typename NumericT>
562 template<
typename NumericT>
583 template<
typename MatrixT,
typename VectorT,
typename PreconditionerT>
584 VectorT
solve(MatrixT
const &
matrix, VectorT
const & rhs,
gmres_tag const & tag, PreconditionerT
const & precond)
589 VectorT result = rhs;
593 if (problem_size < krylov_dim)
594 krylov_dim = problem_size;
597 VectorT v_k_tilde = rhs;
598 VectorT v_k_tilde_temp = rhs;
600 std::vector< std::vector<CPU_NumericType> > R(krylov_dim, std::vector<CPU_NumericType>(tag.
krylov_dim()));
601 std::vector<CPU_NumericType> projection_rhs(krylov_dim);
603 std::vector<VectorT> householder_reflectors(krylov_dim, rhs);
604 std::vector<CPU_NumericType> betas(krylov_dim);
613 for (
unsigned int it = 0; it <= tag.
max_restarts(); ++it)
629 tag.
error(rho_0 / norm_rhs);
637 CPU_NumericType rho =
static_cast<CPU_NumericType
>(1.0);
644 for (k = 0; k < krylov_dim; ++k)
656 precond.apply(v_k_tilde);
661 v_k_tilde[k-1] = CPU_NumericType(1);
664 for (
int i = static_cast<int>(k)-1; i > -1; --i)
668 precond.apply(v_k_tilde_temp);
669 v_k_tilde = v_k_tilde_temp;
679 CPU_NumericType rho_k_k = 0;
700 projection_rhs[k] = res[k];
702 rho *= std::sin( std::acos(projection_rhs[k] / rho) );
704 if (std::fabs(rho * rho_0 / norm_rhs) < tag.
tolerance())
706 tag.
error( std::fabs(rho*rho_0 / norm_rhs) );
716 for (
int i2=static_cast<int>(k)-1; i2>-1; --i2)
720 projection_rhs[i] -= R[j][i] * projection_rhs[j];
722 projection_rhs[i] /= R[i][i];
729 res *= projection_rhs[0];
733 for (
unsigned int i = 0; i < k-1; ++i)
734 res[i] += projection_rhs[i+1];
740 for (
int i=static_cast<int>(k)-1; i>=0; --i)
749 tag.
error(std::fabs(rho*rho_0 / norm_rhs));
759 template<
typename MatrixT,
typename VectorT>
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
unsigned int max_restarts() const
Returns the maximum number of GMRES restarts.
T norm_2(std::vector< T, A > const &v1)
viennacl::vector< NumericT > pipelined_solve(MatrixT const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Implementation of a pipelined stabilized Bi-conjugate gradient solver.
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
unsigned int max_iterations() const
Returns the maximum number of iterations.
Generic size and resize functionality for different vector and matrix types.
unsigned int iters() const
Return the number of solver iterations:
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
void pipelined_gmres_gram_schmidt_stage1(vector_base< T > const &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > &vi_in_vk_buffer, vcl_size_t buffer_chunk_size)
Computes the first reduction stage for multiple inner products , i=0..k-1.
void pipelined_gmres_update_result(vector_base< T > &result, vector_base< T > const &residual, vector_base< T > const &krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vector_base< T > const &coefficients, vcl_size_t k)
Computes x += eta_0 r + sum_{i=1}^{k-1} eta_i v_{i-1}.
void pipelined_gmres_gram_schmidt_stage2(vector_base< T > &device_krylov_basis, vcl_size_t v_k_size, vcl_size_t v_k_internal_size, vcl_size_t k, vector_base< T > const &vi_in_vk_buffer, vector_base< T > &R_buffer, vcl_size_t krylov_dim, vector_base< T > &inner_prod_buffer, vcl_size_t buffer_chunk_size)
Computes the second reduction stage for multiple inner products , i=0..k-1, then updates v_k -= v_i and computes the first reduction stage for ||v_k||.
void iters(unsigned int i) const
Set the number of solver iterations (should only be modified by the solver)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL.
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
void error(double e) const
Sets the estimated relative error at the end of the solver run.
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
A tag for the solver GMRES. Used for supplying solver parameters and for dispatching the solve() func...
gmres_tag(double tol=1e-10, unsigned int max_iterations=300, unsigned int krylov_dim=20)
The constructor.
viennacl::vector< NumericT > solve(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Overload for the pipelined BiCGStab implementation for the ViennaCL sparse matrix types...
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Class for representing non-strided subvectors of a bigger vector x.
Sparse matrix class using the ELLPACK format for storing the nonzeros.
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
A tag class representing the use of no preconditioner.
void pipelined_gmres_prod(MatrixType const &A, vector_base< T > const &p, vector_base< T > &Ap, vector_base< T > &inner_prod_buffer)
Performs a joint vector update operation needed for an efficient pipelined GMRES algorithm.
base_type::difference_type difference_type
Sparse matrix class using the sliced ELLPACK with parameters C, .
result_of::size_type< T >::type start(T const &obj)
Extracts the underlying context from objects.
void pipelined_gmres_normalize_vk(vector_base< T > &v_k, vector_base< T > const &residual, vector_base< T > &R_buffer, vcl_size_t offset_in_R, vector_base< T > const &inner_prod_buffer, vector_base< T > &r_dot_vk_buffer, vcl_size_t buffer_chunk_size, vcl_size_t buffer_chunk_offset)
Performs a vector normalization needed for an efficient pipelined GMRES algorithm.
Class for representing strided subvectors of a bigger vector x.
void gmres_setup_householder_vector(VectorT const &input_vec, VectorT &hh_vec, NumericT &beta, NumericT &mu, vcl_size_t j)
Computes the householder vector 'hh_vec' which rotates 'input_vec' such that all entries below the j-...
void gmres_copy_helper(SrcVectorT const &src, DestVectorT &dest, vcl_size_t len, vcl_size_t start=0)
Generic clear functionality for different vector and matrix types.
double tolerance() const
Returns the relative tolerance.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Proxy classes for vectors.
Implementations of specialized routines for the iterative solvers.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
size_type size() const
Returns the length of the vector (cf. std::vector)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
double error() const
Returns the estimated relative error at the end of the solver run.
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
A collection of compile time type deductions.
unsigned int krylov_dim() const
Returns the maximum dimension of the Krylov space before restart.
void gmres_householder_reflect(VectorT &x, VectorT const &h, NumericT beta)
A sparse square matrix, where entries are stored as triplets (i,j, val), where i and j are the row an...
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)