ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP 00002 #define VIENNACL_LINALG_DETAIL_SPAI_FSPAI_HPP 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00021 #include <utility> 00022 #include <iostream> 00023 #include <fstream> 00024 #include <string> 00025 #include <algorithm> 00026 #include <vector> 00027 #include <math.h> 00028 #include <map> 00029 00030 //boost includes 00031 #include "boost/numeric/ublas/vector.hpp" 00032 #include "boost/numeric/ublas/matrix.hpp" 00033 #include "boost/numeric/ublas/matrix_proxy.hpp" 00034 #include "boost/numeric/ublas/vector_proxy.hpp" 00035 #include "boost/numeric/ublas/storage.hpp" 00036 #include "boost/numeric/ublas/io.hpp" 00037 #include "boost/numeric/ublas/lu.hpp" 00038 #include "boost/numeric/ublas/triangular.hpp" 00039 #include "boost/numeric/ublas/matrix_expression.hpp" 00040 00041 // ViennaCL includes 00042 #include "viennacl/linalg/prod.hpp" 00043 #include "viennacl/matrix.hpp" 00044 #include "viennacl/compressed_matrix.hpp" 00045 #include "viennacl/linalg/sparse_matrix_operations.hpp" 00046 #include "viennacl/linalg/matrix_operations.hpp" 00047 #include "viennacl/scalar.hpp" 00048 #include "viennacl/linalg/cg.hpp" 00049 #include "viennacl/linalg/inner_prod.hpp" 00050 #include "viennacl/linalg/ilu.hpp" 00051 //#include <omp.h> 00052 00057 namespace viennacl 00058 { 00059 namespace linalg 00060 { 00061 namespace detail 00062 { 00063 namespace spai 00064 { 00065 00070 class fspai_tag{ 00077 public: 00078 fspai_tag( 00079 double residual_norm_threshold = 1e-3, 00080 unsigned int iteration_limit = 5, 00081 bool is_static = false, 00082 bool is_right = false) : 00083 residual_norm_threshold_(residual_norm_threshold), 00084 iteration_limit_(iteration_limit), 00085 is_static_(is_static), 00086 is_right_(is_right) {} 00087 00088 inline double getResidualNormThreshold() const 00089 { return residual_norm_threshold_; } 00090 inline unsigned long getIterationLimit () const 00091 { return iteration_limit_; } 00092 inline bool getIsStatic() const 00093 { return is_static_; } 00094 inline bool getIsRight() const 00095 { return is_right_; } 00096 inline void setResidualNormThreshold(double residual_norm_threshold){ 00097 if(residual_norm_threshold > 0) 00098 residual_norm_threshold_ = residual_norm_threshold; 00099 } 00100 inline void setIterationLimit(unsigned long iteration_limit){ 00101 if(iteration_limit > 0) 00102 iteration_limit_ = iteration_limit; 00103 } 00104 inline void setIsRight(bool is_right){ 00105 is_right_ = is_right; 00106 } 00107 inline void setIsStatic(bool is_static){ 00108 is_static_ = is_static; 00109 } 00110 00111 private: 00112 double residual_norm_threshold_; 00113 unsigned long iteration_limit_; 00114 bool is_static_; 00115 bool is_right_; 00116 }; 00117 00118 00119 // 00120 // Helper: Store A in an STL container of type, exploiting symmetry 00121 // Reason: ublas interface does not allow to iterate over nonzeros of a particular row without starting an iterator1 from the very beginning of the matrix... 00122 // 00123 template <typename MatrixType, typename ScalarType> 00124 void sym_sparse_matrix_to_stl(MatrixType const & A, std::vector<std::map<unsigned int, ScalarType> > & STL_A) 00125 { 00126 STL_A.resize(A.size1()); 00127 for (typename MatrixType::const_iterator1 row_it = A.begin1(); 00128 row_it != A.end1(); 00129 ++row_it) 00130 { 00131 for (typename MatrixType::const_iterator2 col_it = row_it.begin(); 00132 col_it != row_it.end(); 00133 ++col_it) 00134 { 00135 if (col_it.index1() >= col_it.index2()) 00136 STL_A[col_it.index1()][static_cast<unsigned int>(col_it.index2())] = *col_it; 00137 else 00138 break; //go to next row 00139 } 00140 } 00141 } 00142 00143 00144 // 00145 // Generate index sets J_k, k=0,...,N-1 00146 // 00147 template <typename MatrixType> 00148 void generateJ(MatrixType const & A, std::vector<std::vector<vcl_size_t> > & J) 00149 { 00150 for (typename MatrixType::const_iterator1 row_it = A.begin1(); 00151 row_it != A.end1(); 00152 ++row_it) 00153 { 00154 for (typename MatrixType::const_iterator2 col_it = row_it.begin(); 00155 col_it != row_it.end(); 00156 ++col_it) 00157 { 00158 if (col_it.index1() > col_it.index2()) //Matrix is symmetric, thus only work on lower triangular part 00159 { 00160 J[col_it.index2()].push_back(col_it.index1()); 00161 J[col_it.index1()].push_back(col_it.index2()); 00162 } 00163 else 00164 break; //go to next row 00165 } 00166 } 00167 } 00168 00169 00170 // 00171 // Extracts the blocks A(\tilde{J}_k, \tilde{J}_k) from A 00172 // Sets up y_k = A(\tilde{J}_k, k) for the inplace-solution after Cholesky-factoriation 00173 // 00174 template <typename ScalarType, typename MatrixType, typename VectorType> 00175 void fill_blocks(std::vector< std::map<unsigned int, ScalarType> > & A, 00176 std::vector<MatrixType> & blocks, 00177 std::vector<std::vector<vcl_size_t> > const & J, 00178 std::vector<VectorType> & Y) 00179 { 00180 for (vcl_size_t k=0; k<A.size(); ++k) 00181 { 00182 std::vector<vcl_size_t> const & Jk = J[k]; 00183 VectorType & yk = Y[k]; 00184 MatrixType & block_k = blocks[k]; 00185 00186 yk.resize(Jk.size()); 00187 block_k.resize(Jk.size(), Jk.size()); 00188 block_k.clear(); 00189 00190 for (vcl_size_t i=0; i<Jk.size(); ++i) 00191 { 00192 vcl_size_t row_index = Jk[i]; 00193 std::map<unsigned int, ScalarType> & A_row = A[row_index]; 00194 00195 //fill y_k: 00196 yk[i] = A_row[static_cast<unsigned int>(k)]; 00197 00198 for (vcl_size_t j=0; j<Jk.size(); ++j) 00199 { 00200 vcl_size_t col_index = Jk[j]; 00201 if (col_index <= row_index && A_row.find(static_cast<unsigned int>(col_index)) != A_row.end()) //block is symmetric, thus store only lower triangular part 00202 block_k(i, j) = A_row[static_cast<unsigned int>(col_index)]; 00203 } 00204 } 00205 } 00206 } 00207 00208 00209 // 00210 // Perform Cholesky factorization of A inplace. Cf. Schwarz: Numerische Mathematik, vol 5, p. 58 00211 // 00212 template <typename MatrixType> 00213 void cholesky_decompose(MatrixType & A) 00214 { 00215 for (vcl_size_t k=0; k<A.size2(); ++k) 00216 { 00217 if (A(k,k) <= 0) 00218 { 00219 std::cout << "k: " << k << std::endl; 00220 std::cout << "A(k,k): " << A(k,k) << std::endl; 00221 } 00222 00223 assert(A(k,k) > 0); 00224 00225 A(k,k) = std::sqrt(A(k,k)); 00226 00227 for (vcl_size_t i=k+1; i<A.size1(); ++i) 00228 { 00229 A(i,k) /= A(k,k); 00230 for (vcl_size_t j=k+1; j<=i; ++j) 00231 A(i,j) -= A(i,k) * A(j,k); 00232 } 00233 } 00234 } 00235 00236 00237 // 00238 // Compute x in Ax = b, where A is already Cholesky factored (A = L L^T) 00239 // 00240 template <typename MatrixType, typename VectorType> 00241 void cholesky_solve(MatrixType const & L, VectorType & b) 00242 { 00243 // inplace forward solve L x = b 00244 for (vcl_size_t i=0; i<L.size1(); ++i) 00245 { 00246 for (vcl_size_t j=0; j<i; ++j) 00247 b[i] -= L(i,j) * b[j]; 00248 b[i] /= L(i,i); 00249 } 00250 00251 // inplace backward solve L^T x = b: 00252 for (vcl_size_t i=L.size1()-1; ; --i) 00253 { 00254 for (vcl_size_t k=i+1; k<L.size1(); ++k) 00255 b[i] -= L(k,i) * b[k]; 00256 b[i] /= L(i,i); 00257 00258 if (i==0) //vcl_size_t might be unsigned, therefore manual check for equality with zero here 00259 break; 00260 } 00261 } 00262 00263 00264 00265 // 00266 // Compute the Cholesky factor L from the sparse vectors y_k 00267 // 00268 template <typename MatrixType, typename VectorType1> 00269 void computeL(MatrixType const & A, 00270 MatrixType & L, 00271 MatrixType & L_trans, 00272 std::vector<VectorType1> & Y, 00273 std::vector<std::vector<vcl_size_t> > & J) 00274 { 00275 typedef typename VectorType1::value_type ScalarType; 00276 typedef std::vector<std::map<unsigned int, ScalarType> > STLSparseMatrixType; 00277 00278 STLSparseMatrixType L_temp(A.size1()); 00279 00280 for (vcl_size_t k=0; k<A.size1(); ++k) 00281 { 00282 std::vector<vcl_size_t> const & Jk = J[k]; 00283 VectorType1 const & yk = Y[k]; 00284 00285 //compute L(k,k): 00286 ScalarType Lkk = A(k,k); 00287 for (vcl_size_t i=0; i<Jk.size(); ++i) 00288 Lkk -= A(Jk[i],k) * yk[i]; 00289 00290 Lkk = ScalarType(1) / std::sqrt(Lkk); 00291 L_temp[k][static_cast<unsigned int>(k)] = Lkk; 00292 L_trans(k,k) = Lkk; 00293 00294 //write lower diagonal entries: 00295 for (vcl_size_t i=0; i<Jk.size(); ++i) 00296 { 00297 L_temp[Jk[i]][static_cast<unsigned int>(k)] = -Lkk * yk[i]; 00298 L_trans(k, Jk[i]) = -Lkk * yk[i]; 00299 } 00300 } //for k 00301 00302 00303 //build L from L_temp 00304 for (vcl_size_t i=0; i<L_temp.size(); ++i) 00305 for (typename std::map<unsigned int, ScalarType>::const_iterator it = L_temp[i].begin(); 00306 it != L_temp[i].end(); 00307 ++it) 00308 L(i, it->first) = it->second; 00309 } 00310 00311 00312 // 00313 // Top level FSPAI function 00314 // 00315 template <typename MatrixType> 00316 void computeFSPAI(MatrixType const & A, 00317 MatrixType const & PatternA, 00318 MatrixType & L, 00319 MatrixType & L_trans, 00320 fspai_tag) 00321 { 00322 typedef typename MatrixType::value_type ScalarType; 00323 typedef boost::numeric::ublas::matrix<ScalarType> DenseMatrixType; 00324 typedef std::vector<std::map<unsigned int, ScalarType> > SparseMatrixType; 00325 00326 // 00327 // preprocessing: Store A in a STL container: 00328 // 00329 //std::cout << "Transferring to STL container:" << std::endl; 00330 std::vector<std::vector<ScalarType> > y_k(A.size1()); 00331 SparseMatrixType STL_A(A.size1()); 00332 sym_sparse_matrix_to_stl(A, STL_A); 00333 00334 00335 // 00336 // Step 1: Generate pattern indices 00337 // 00338 //std::cout << "computeFSPAI(): Generating pattern..." << std::endl; 00339 std::vector<std::vector<vcl_size_t> > J(A.size1()); 00340 generateJ(PatternA, J); 00341 00342 // 00343 // Step 2: Set up matrix blocks 00344 // 00345 //std::cout << "computeFSPAI(): Setting up matrix blocks..." << std::endl; 00346 std::vector<DenseMatrixType> subblocks_A(A.size1()); 00347 fill_blocks(STL_A, subblocks_A, J, y_k); 00348 STL_A.clear(); //not needed anymore 00349 00350 // 00351 // Step 3: Cholesky-factor blocks 00352 // 00353 //std::cout << "computeFSPAI(): Cholesky-factorization..." << std::endl; 00354 for (vcl_size_t i=0; i<subblocks_A.size(); ++i) 00355 { 00356 //std::cout << "Block before: " << subblocks_A[i] << std::endl; 00357 cholesky_decompose(subblocks_A[i]); 00358 //std::cout << "Block after: " << subblocks_A[i] << std::endl; 00359 } 00360 00361 00362 /*vcl_size_t num_bytes = 0; 00363 for (vcl_size_t i=0; i<subblocks_A.size(); ++i) 00364 num_bytes += 8*subblocks_A[i].size1()*subblocks_A[i].size2();*/ 00365 //std::cout << "Memory for FSPAI matrix: " << num_bytes / (1024.0 * 1024.0) << " MB" << std::endl; 00366 00367 // 00368 // Step 4: Solve for y_k 00369 // 00370 //std::cout << "computeFSPAI(): Cholesky-solve..." << std::endl; 00371 for (vcl_size_t i=0; i<y_k.size(); ++i) 00372 { 00373 if (subblocks_A[i].size1() > 0) //block might be empty... 00374 { 00375 //y_k[i].resize(subblocks_A[i].size1()); 00376 //std::cout << "y_k[" << i << "]: "; 00377 //for (vcl_size_t j=0; j<y_k[i].size(); ++j) 00378 // std::cout << y_k[i][j] << " "; 00379 //std::cout << std::endl; 00380 cholesky_solve(subblocks_A[i], y_k[i]); 00381 } 00382 } 00383 00384 00385 // 00386 // Step 5: Set up Cholesky factors L and L_trans 00387 // 00388 //std::cout << "computeFSPAI(): Computing L..." << std::endl; 00389 L.resize(A.size1(), A.size2(), false); 00390 L.reserve(A.nnz(), false); 00391 L_trans.resize(A.size1(), A.size2(), false); 00392 L_trans.reserve(A.nnz(), false); 00393 computeL(A, L, L_trans, y_k, J); 00394 00395 //std::cout << "L: " << L << std::endl; 00396 } 00397 00398 00399 00400 } 00401 } 00402 } 00403 } 00404 00405 #endif