ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_GMRES_HPP_ 00002 #define VIENNACL_GMRES_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <vector> 00026 #include <cmath> 00027 #include <limits> 00028 #include "viennacl/forwards.h" 00029 #include "viennacl/tools/tools.hpp" 00030 #include "viennacl/linalg/norm_2.hpp" 00031 #include "viennacl/linalg/prod.hpp" 00032 #include "viennacl/linalg/inner_prod.hpp" 00033 #include "viennacl/traits/clear.hpp" 00034 #include "viennacl/traits/size.hpp" 00035 #include "viennacl/meta/result_of.hpp" 00036 00037 namespace viennacl 00038 { 00039 namespace linalg 00040 { 00041 00044 class gmres_tag //generalized minimum residual 00045 { 00046 public: 00053 gmres_tag(double tol = 1e-10, unsigned int max_iterations = 300, unsigned int krylov_dim = 20) 00054 : tol_(tol), iterations_(max_iterations), krylov_dim_(krylov_dim), iters_taken_(0) {} 00055 00057 double tolerance() const { return tol_; } 00059 unsigned int max_iterations() const { return iterations_; } 00061 unsigned int krylov_dim() const { return krylov_dim_; } 00063 unsigned int max_restarts() const 00064 { 00065 unsigned int ret = iterations_ / krylov_dim_; 00066 if (ret > 0 && (ret * krylov_dim_ == iterations_) ) 00067 return ret - 1; 00068 return ret; 00069 } 00070 00072 unsigned int iters() const { return iters_taken_; } 00074 void iters(unsigned int i) const { iters_taken_ = i; } 00075 00077 double error() const { return last_error_; } 00079 void error(double e) const { last_error_ = e; } 00080 00081 private: 00082 double tol_; 00083 unsigned int iterations_; 00084 unsigned int krylov_dim_; 00085 00086 //return values from solver 00087 mutable unsigned int iters_taken_; 00088 mutable double last_error_; 00089 }; 00090 00091 namespace detail 00092 { 00093 00094 template <typename SRC_VECTOR, typename DEST_VECTOR> 00095 void gmres_copy_helper(SRC_VECTOR const & src, DEST_VECTOR & dest, vcl_size_t len, vcl_size_t start = 0) 00096 { 00097 for (vcl_size_t i=0; i<len; ++i) 00098 dest[start+i] = src[start+i]; 00099 } 00100 00101 template <typename ScalarType, typename DEST_VECTOR> 00102 void gmres_copy_helper(viennacl::vector<ScalarType> const & src, DEST_VECTOR & dest, vcl_size_t len, vcl_size_t start = 0) 00103 { 00104 typedef typename viennacl::vector<ScalarType>::difference_type difference_type; 00105 viennacl::copy( src.begin() + static_cast<difference_type>(start), 00106 src.begin() + static_cast<difference_type>(start + len), 00107 dest.begin() + static_cast<difference_type>(start)); 00108 } 00109 00118 template <typename VectorType, typename ScalarType> 00119 void gmres_setup_householder_vector(VectorType const & input_vec, VectorType & hh_vec, ScalarType & beta, ScalarType & mu, vcl_size_t j) 00120 { 00121 ScalarType input_j = input_vec(j); 00122 00123 // copy entries from input vector to householder vector: 00124 detail::gmres_copy_helper(input_vec, hh_vec, viennacl::traits::size(hh_vec) - (j+1), j+1); 00125 00126 ScalarType sigma = viennacl::linalg::norm_2(hh_vec); 00127 sigma *= sigma; 00128 00129 if (sigma == 0) 00130 { 00131 beta = 0; 00132 mu = input_j; 00133 } 00134 else 00135 { 00136 mu = std::sqrt(sigma + input_j*input_j); 00137 00138 ScalarType hh_vec_0 = (input_j <= 0) ? (input_j - mu) : (-sigma / (input_j + mu)); 00139 00140 beta = ScalarType(2) * hh_vec_0 * hh_vec_0 / (sigma + hh_vec_0 * hh_vec_0); 00141 00142 //divide hh_vec by its diagonal element hh_vec_0 00143 hh_vec /= hh_vec_0; 00144 hh_vec[j] = ScalarType(1); 00145 } 00146 } 00147 00148 // Apply (I - beta h h^T) to x (Householder reflection with Householder vector h) 00149 template <typename VectorType, typename ScalarType> 00150 void gmres_householder_reflect(VectorType & x, VectorType const & h, ScalarType beta) 00151 { 00152 ScalarType hT_in_x = viennacl::linalg::inner_prod(h, x); 00153 x -= (beta * hT_in_x) * h; 00154 } 00155 00156 } 00157 00168 template <typename MatrixType, typename VectorType, typename PreconditionerType> 00169 VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag, PreconditionerType const & precond) 00170 { 00171 typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType; 00172 typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType; 00173 unsigned int problem_size = static_cast<unsigned int>(viennacl::traits::size(rhs)); 00174 VectorType result = rhs; 00175 viennacl::traits::clear(result); 00176 00177 unsigned int krylov_dim = tag.krylov_dim(); 00178 if (problem_size < tag.krylov_dim()) 00179 krylov_dim = problem_size; //A Krylov space larger than the matrix would lead to seg-faults (mathematically, error is certain to be zero already) 00180 00181 VectorType res = rhs; 00182 VectorType v_k_tilde = rhs; 00183 VectorType v_k_tilde_temp = rhs; 00184 00185 std::vector< std::vector<CPU_ScalarType> > R(krylov_dim, std::vector<CPU_ScalarType>(tag.krylov_dim())); 00186 std::vector<CPU_ScalarType> projection_rhs(krylov_dim); 00187 00188 std::vector<VectorType> householder_reflectors(krylov_dim, rhs); 00189 std::vector<CPU_ScalarType> betas(krylov_dim); 00190 00191 CPU_ScalarType norm_rhs = viennacl::linalg::norm_2(rhs); 00192 00193 if (norm_rhs == 0) //solution is zero if RHS norm is zero 00194 return result; 00195 00196 tag.iters(0); 00197 00198 for (unsigned int it = 0; it <= tag.max_restarts(); ++it) 00199 { 00200 // 00201 // (Re-)Initialize residual: r = b - A*x (without temporary for the result of A*x) 00202 // 00203 res = rhs; 00204 res -= viennacl::linalg::prod(matrix, result); //initial guess zero 00205 precond.apply(res); 00206 00207 CPU_ScalarType rho_0 = viennacl::linalg::norm_2(res); 00208 00209 // 00210 // Check for premature convergence 00211 // 00212 if (rho_0 / norm_rhs < tag.tolerance() ) // norm_rhs is known to be nonzero here 00213 { 00214 tag.error(rho_0 / norm_rhs); 00215 return result; 00216 } 00217 00218 // 00219 // Normalize residual and set 'rho' to 1 as requested in 'A Simpler GMRES' by Walker and Zhou. 00220 // 00221 res /= rho_0; 00222 CPU_ScalarType rho = static_cast<CPU_ScalarType>(1.0); 00223 00224 00225 // 00226 // Iterate up until maximal Krylove space dimension is reached: 00227 // 00228 unsigned int k = 0; 00229 for (k = 0; k < krylov_dim; ++k) 00230 { 00231 tag.iters( tag.iters() + 1 ); //increase iteration counter 00232 00233 // prepare storage: 00234 viennacl::traits::clear(R[k]); 00235 viennacl::traits::clear(householder_reflectors[k]); 00236 00237 //compute v_k = A * v_{k-1} via Householder matrices 00238 if (k == 0) 00239 { 00240 v_k_tilde = viennacl::linalg::prod(matrix, res); 00241 precond.apply(v_k_tilde); 00242 } 00243 else 00244 { 00245 viennacl::traits::clear(v_k_tilde); 00246 v_k_tilde[k-1] = CPU_ScalarType(1); 00247 00248 //Householder rotations, part 1: Compute P_1 * P_2 * ... * P_{k-1} * e_{k-1} 00249 for (int i = k-1; i > -1; --i) 00250 detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[i], betas[i]); 00251 00252 v_k_tilde_temp = viennacl::linalg::prod(matrix, v_k_tilde); 00253 precond.apply(v_k_tilde_temp); 00254 v_k_tilde = v_k_tilde_temp; 00255 00256 //Householder rotations, part 2: Compute P_{k-1} * ... * P_{1} * v_k_tilde 00257 for (unsigned int i = 0; i < k; ++i) 00258 detail::gmres_householder_reflect(v_k_tilde, householder_reflectors[i], betas[i]); 00259 } 00260 00261 // 00262 // Compute Householder reflection for v_k_tilde such that all entries below k-th entry are zero: 00263 // 00264 CPU_ScalarType rho_k_k = 0; 00265 detail::gmres_setup_householder_vector(v_k_tilde, householder_reflectors[k], betas[k], rho_k_k, k); 00266 00267 // 00268 // copy first k entries from v_k_tilde to R[k] in order to fill k-th column with result of 00269 // P_k * v_k_tilde = (v[0], ... , v[k-1], norm(v), 0, 0, ...) =: (rho_{1,k}, rho_{2,k}, ..., rho_{k,k}, 0, ..., 0); 00270 // 00271 detail::gmres_copy_helper(v_k_tilde, R[k], k); 00272 R[k][k] = rho_k_k; 00273 00274 // 00275 // Update residual: r = P_k r 00276 // Set zeta_k = r[k] including machine precision considerations: mathematically we have |r[k]| <= rho 00277 // Set rho *= sin(acos(r[k] / rho)) 00278 // 00279 detail::gmres_householder_reflect(res, householder_reflectors[k], betas[k]); 00280 00281 if (res[k] > rho) //machine precision reached 00282 res[k] = rho; 00283 if (res[k] < -rho) //machine precision reached 00284 res[k] = -rho; 00285 projection_rhs[k] = res[k]; 00286 00287 rho *= std::sin( std::acos(projection_rhs[k] / rho) ); 00288 00289 if (std::fabs(rho * rho_0 / norm_rhs) < tag.tolerance()) // Residual is sufficiently reduced, stop here 00290 { 00291 tag.error( std::fabs(rho*rho_0 / norm_rhs) ); 00292 ++k; 00293 break; 00294 } 00295 } // for k 00296 00297 // 00298 // Triangular solver stage: 00299 // 00300 00301 for (int i=k-1; i>-1; --i) 00302 { 00303 for (unsigned int j=i+1; j<k; ++j) 00304 projection_rhs[i] -= R[j][i] * projection_rhs[j]; //R is transposed 00305 00306 projection_rhs[i] /= R[i][i]; 00307 } 00308 00309 // 00310 // Note: 'projection_rhs' now holds the solution (eta_1, ..., eta_k) 00311 // 00312 00313 res *= projection_rhs[0]; 00314 00315 if (k > 0) 00316 { 00317 for (unsigned int i = 0; i < k-1; ++i) 00318 res[i] += projection_rhs[i+1]; 00319 } 00320 00321 // 00322 // Form z inplace in 'res' by applying P_1 * ... * P_{k} 00323 // 00324 for (int i=k-1; i>=0; --i) 00325 detail::gmres_householder_reflect(res, householder_reflectors[i], betas[i]); 00326 00327 res *= rho_0; 00328 result += res; // x += rho_0 * z in the paper 00329 00330 // 00331 // Check for convergence: 00332 // 00333 tag.error(std::fabs(rho*rho_0 / norm_rhs)); 00334 if ( tag.error() < tag.tolerance() ) 00335 return result; 00336 } 00337 00338 return result; 00339 } 00340 00343 template <typename MatrixType, typename VectorType> 00344 VectorType solve(const MatrixType & matrix, VectorType const & rhs, gmres_tag const & tag) 00345 { 00346 return solve(matrix, rhs, tag, no_precond()); 00347 } 00348 00349 00350 } 00351 } 00352 00353 #endif