ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
amg.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_AMG_HPP_
2 #define VIENNACL_LINALG_AMG_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
27 #include <boost/numeric/ublas/matrix.hpp>
28 #include <boost/numeric/ublas/lu.hpp>
29 #include <boost/numeric/ublas/operation.hpp>
30 #include <boost/numeric/ublas/vector_proxy.hpp>
31 #include <boost/numeric/ublas/matrix_proxy.hpp>
32 #include <boost/numeric/ublas/vector.hpp>
33 #include <boost/numeric/ublas/triangular.hpp>
34 #include <vector>
35 #include <cmath>
36 #include "viennacl/forwards.h"
37 #include "viennacl/tools/tools.hpp"
38 #include "viennacl/linalg/prod.hpp"
40 
44 
45 #include <map>
46 
47 #ifdef VIENNACL_WITH_OPENMP
48  #include <omp.h>
49 #endif
50 
52 
53 #define VIENNACL_AMG_COARSE_LIMIT 50
54 #define VIENNACL_AMG_MAX_LEVELS 100
55 
56 namespace viennacl
57 {
58 namespace linalg
59 {
60 
62 
63 
71 template<typename InternalT1, typename InternalT2>
72 void amg_setup(InternalT1 & A, InternalT1 & P, InternalT2 & pointvector, amg_tag & tag)
73 {
74  typedef typename InternalT2::value_type PointVectorType;
75 
76  unsigned int i, iterations, c_points, f_points;
78 
79  // Set number of iterations. If automatic coarse grid construction is chosen (0), then set a maximum size and stop during the process.
80  iterations = tag.get_coarselevels();
81  if (iterations == 0)
82  iterations = VIENNACL_AMG_MAX_LEVELS;
83 
84  // For parallel coarsenings build data structures (number of threads set automatically).
86  slicing.init(iterations);
87 
88  for (i=0; i<iterations; ++i)
89  {
90  // Initialize Pointvector on level i and construct points.
91  pointvector[i] = PointVectorType(static_cast<unsigned int>(A[i].size1()));
92  pointvector[i].init_points();
93 
94  // Construct C and F points on coarse level (i is fine level, i+1 coarse level).
95  detail::amg::amg_coarse (i, A, pointvector, slicing, tag);
96 
97  // Calculate number of C and F points on level i.
98  c_points = pointvector[i].get_cpoints();
99  f_points = pointvector[i].get_fpoints();
100 
101  #if defined (VIENNACL_AMG_DEBUG) //or defined(VIENNACL_AMG_DEBUGBENCH)
102  std::cout << "Level " << i << ": ";
103  std::cout << "No of C points = " << c_points << ", ";
104  std::cout << "No of F points = " << f_points << std::endl;
105  #endif
106 
107  // Stop routine when the maximal coarse level is found (no C or F point). Coarsest level is level i.
108  if (c_points == 0 || f_points == 0)
109  break;
110 
111  // Construct interpolation matrix for level i.
112  detail::amg::amg_interpol (i, A, P, pointvector, tag);
113 
114  // Compute coarse grid operator (A[i+1] = R * A[i] * P) with R = trans(P).
115  detail::amg::amg_galerkin_prod(A[i], P[i], A[i+1]);
116 
117  // Test triple matrix product. Very slow for large matrix sizes (ublas).
118  // test_triplematprod(A[i],P[i],A[i+1]);
119 
120  pointvector[i].delete_points();
121 
122  #ifdef VIENNACL_AMG_DEBUG
123  std::cout << "Coarse Grid Operator Matrix:" << std::endl;
124  printmatrix (A[i+1]);
125  #endif
126 
127  // If Limit of coarse points is reached then stop. Coarsest level is level i+1.
128  if (tag.get_coarselevels() == 0 && c_points <= VIENNACL_AMG_COARSE_LIMIT)
129  {
130  tag.set_coarselevels(i+1);
131  return;
132  }
133  }
134  tag.set_coarselevels(i);
135 }
136 
145 template<typename MatrixT, typename InternalT1, typename InternalT2>
146 void amg_init(MatrixT const & mat, InternalT1 & A, InternalT1 & P, InternalT2 & pointvector, amg_tag & tag)
147 {
148  //typedef typename MatrixType::value_type ScalarType;
149  typedef typename InternalT1::value_type SparseMatrixType;
150 
151  if (tag.get_coarselevels() > 0)
152  {
153  A.resize(tag.get_coarselevels()+1);
154  P.resize(tag.get_coarselevels());
155  pointvector.resize(tag.get_coarselevels());
156  }
157  else
158  {
159  A.resize(VIENNACL_AMG_MAX_LEVELS+1);
160  P.resize(VIENNACL_AMG_MAX_LEVELS);
161  pointvector.resize(VIENNACL_AMG_MAX_LEVELS);
162  }
163 
164  // Insert operator matrix as operator for finest level.
165  SparseMatrixType A0(mat);
166  A.insert_element(0, A0);
167 }
168 
178 template<typename InternalT1, typename InternalT2>
179 void amg_transform_cpu(InternalT1 & A, InternalT1 & P, InternalT1 & R, InternalT2 & A_setup, InternalT2 & P_setup, amg_tag & tag)
180 {
181  //typedef typename InternalType1::value_type MatrixType;
182 
183  // Resize internal data structures to actual size.
184  A.resize(tag.get_coarselevels()+1);
185  P.resize(tag.get_coarselevels());
186  R.resize(tag.get_coarselevels());
187 
188  // Transform into matrix type.
189  for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
190  {
191  A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
192  A[i] = A_setup[i];
193  }
194  for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
195  {
196  P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
197  P[i] = P_setup[i];
198  }
199  for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
200  {
201  R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
202  P_setup[i].set_trans(true);
203  R[i] = P_setup[i];
204  P_setup[i].set_trans(false);
205  }
206 }
207 
218 template<typename InternalT1, typename InternalT2>
219 void amg_transform_gpu(InternalT1 & A, InternalT1 & P, InternalT1 & R, InternalT2 & A_setup, InternalT2 & P_setup, amg_tag & tag, viennacl::context ctx)
220 {
221  typedef typename InternalT2::value_type::value_type NumericType;
222 
223  // Resize internal data structures to actual size.
224  A.resize(tag.get_coarselevels()+1);
225  P.resize(tag.get_coarselevels());
226  R.resize(tag.get_coarselevels());
227 
228  // Copy to GPU using the internal sparse matrix structure: std::vector<std::map>.
229  for (unsigned int i=0; i<tag.get_coarselevels()+1; ++i)
230  {
232  //A[i].resize(A_setup[i].size1(),A_setup[i].size2(),false);
233  viennacl::copy(*(A_setup[i].get_internal_pointer()),A[i]);
234  }
235  for (unsigned int i=0; i<tag.get_coarselevels(); ++i)
236  {
237  // find number of nonzeros in P:
238  vcl_size_t nonzeros = 0;
239  for (vcl_size_t j=0; j<P_setup[i].get_internal_pointer()->size(); ++j)
240  nonzeros += (*P_setup[i].get_internal_pointer())[j].size();
241 
243  //P[i].resize(P_setup[i].size1(),P_setup[i].size2(),false);
244  viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter<NumericType>(*(P_setup[i].get_internal_pointer()), P_setup[i].size1(), P_setup[i].size2()), P[i], nonzeros);
245  //viennacl::copy((boost::numeric::ublas::compressed_matrix<ScalarType>)P_setup[i],P[i]);
246 
248  //R[i].resize(P_setup[i].size2(),P_setup[i].size1(),false);
249  P_setup[i].set_trans(true);
250  viennacl::detail::copy_impl(tools::const_sparse_matrix_adapter<NumericType>(*(P_setup[i].get_internal_pointer()), P_setup[i].size1(), P_setup[i].size2()), R[i], nonzeros);
251  P_setup[i].set_trans(false);
252  }
253 }
254 
263 template<typename InternalVectorT, typename SparseMatrixT>
264 void amg_setup_apply(InternalVectorT & result, InternalVectorT & rhs, InternalVectorT & residual, SparseMatrixT const & A, amg_tag const & tag)
265 {
266  typedef typename InternalVectorT::value_type VectorType;
267 
268  result.resize(tag.get_coarselevels()+1);
269  rhs.resize(tag.get_coarselevels()+1);
270  residual.resize(tag.get_coarselevels());
271 
272  for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level)
273  {
274  result[level] = VectorType(A[level].size1());
275  result[level].clear();
276  rhs[level] = VectorType(A[level].size1());
277  rhs[level].clear();
278  }
279  for (unsigned int level=0; level < tag.get_coarselevels(); ++level)
280  {
281  residual[level] = VectorType(A[level].size1());
282  residual[level].clear();
283  }
284 }
285 
286 
296 template<typename InternalVectorT, typename SparseMatrixT>
297 void amg_setup_apply(InternalVectorT & result, InternalVectorT & rhs, InternalVectorT & residual, SparseMatrixT const & A, amg_tag const & tag, viennacl::context ctx)
298 {
299  typedef typename InternalVectorT::value_type VectorType;
300 
301  result.resize(tag.get_coarselevels()+1);
302  rhs.resize(tag.get_coarselevels()+1);
303  residual.resize(tag.get_coarselevels());
304 
305  for (unsigned int level=0; level < tag.get_coarselevels()+1; ++level)
306  {
307  result[level] = VectorType(A[level].size1(), ctx);
308  rhs[level] = VectorType(A[level].size1(), ctx);
309  }
310  for (unsigned int level=0; level < tag.get_coarselevels(); ++level)
311  {
312  residual[level] = VectorType(A[level].size1(), ctx);
313  }
314 }
315 
316 
324 template<typename NumericT, typename SparseMatrixT>
325 void amg_lu(boost::numeric::ublas::compressed_matrix<NumericT> & op, boost::numeric::ublas::permutation_matrix<> & permutation, SparseMatrixT const & A)
326 {
327  typedef typename SparseMatrixT::const_iterator1 ConstRowIterator;
328  typedef typename SparseMatrixT::const_iterator2 ConstColIterator;
329 
330  // Copy to operator matrix. Needed
331  op.resize(A.size1(),A.size2(),false);
332  for (ConstRowIterator row_iter = A.begin1(); row_iter != A.end1(); ++row_iter)
333  for (ConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
334  op (col_iter.index1(), col_iter.index2()) = *col_iter;
335 
336  // Permutation matrix has to be reinitialized with actual size. Do not clear() or resize()!
337  permutation = boost::numeric::ublas::permutation_matrix<> (op.size1());
338  boost::numeric::ublas::lu_factorize(op, permutation);
339 }
340 
343 template<typename MatrixT>
345 {
346  typedef typename MatrixT::value_type NumericType;
347  typedef boost::numeric::ublas::vector<NumericType> VectorType;
350 
355 
356  boost::numeric::ublas::vector<SparseMatrixType> A_setup_;
357  boost::numeric::ublas::vector<SparseMatrixType> P_setup_;
358  boost::numeric::ublas::vector<MatrixT> A_;
359  boost::numeric::ublas::vector<MatrixT> P_;
360  boost::numeric::ublas::vector<MatrixT> R_;
361  boost::numeric::ublas::vector<PointVectorType> pointvector_;
362 
363  mutable boost::numeric::ublas::compressed_matrix<NumericType> op_;
364  mutable boost::numeric::ublas::permutation_matrix<> permutation_;
365 
366  mutable boost::numeric::ublas::vector<VectorType> result_;
367  mutable boost::numeric::ublas::vector<VectorType> rhs_;
368  mutable boost::numeric::ublas::vector<VectorType> residual_;
369 
370  mutable bool done_init_apply_;
371 
372  amg_tag tag_;
373 public:
374 
375  amg_precond(): permutation_(0) {}
381  amg_precond(MatrixT const & mat, amg_tag const & tag): permutation_(0)
382  {
383  tag_ = tag;
384  // Initialize data structures.
385  amg_init (mat, A_setup_, P_setup_, pointvector_, tag_);
386 
387  done_init_apply_ = false;
388  }
389 
392  void setup()
393  {
394  // Start setup phase.
395  amg_setup(A_setup_, P_setup_, pointvector_, tag_);
396  // Transform to CPU-Matrixtype for precondition phase.
397  amg_transform_cpu(A_, P_, R_, A_setup_, P_setup_, tag_);
398 
399  done_init_apply_ = false;
400  }
401 
406  void init_apply() const
407  {
408  // Setup precondition phase (Data structures).
409  amg_setup_apply(result_, rhs_, residual_, A_setup_, tag_);
410  // Do LU factorization for direct solve.
411  amg_lu(op_, permutation_, A_setup_[tag_.get_coarselevels()]);
412 
413  done_init_apply_ = true;
414  }
415 
421  template<typename VectorT>
422  NumericType calc_complexity(VectorT & avgstencil)
423  {
424  avgstencil = VectorT(tag_.get_coarselevels()+1);
425  unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
426 
427  for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level)
428  {
429  level_coefficients = 0;
430  for (InternalRowIterator row_iter = A_setup_[level].begin1(); row_iter != A_setup_[level].end1(); ++row_iter)
431  {
432  for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
433  {
434  if (level == 0)
435  systemmat_nonzero++;
436  nonzero++;
437  level_coefficients++;
438  }
439  }
440  avgstencil[level] = static_cast<NumericType>(level_coefficients)/static_cast<NumericType>(A_setup_[level].size1());
441  }
442  return static_cast<NumericType>(nonzero) / static_cast<NumericType>(systemmat_nonzero);
443  }
444 
449  template<typename VectorT>
450  void apply(VectorT & vec) const
451  {
452  // Build data structures and do lu factorization before first iteration step.
453  if (!done_init_apply_)
454  init_apply();
455 
456  int level;
457 
458  // Precondition operation (Yang, p.3)
459  rhs_[0] = vec;
460  for (level=0; level<static_cast<int>(tag_.get_coarselevels()); level++)
461  {
462  result_[level].clear();
463 
464  // Apply Smoother presmooth_ times.
465  smooth_jacobi (level, tag_.get_presmooth(), result_[level], rhs_[level]);
466 
467  #ifdef VIENNACL_AMG_DEBUG
468  std::cout << "After presmooth:" << std::endl;
469  printvector(result_[level]);
470  #endif
471 
472  // Compute residual.
473  residual_[level] = rhs_[level] - boost::numeric::ublas::prod(A_[level], result_[level]);
474 
475  #ifdef VIENNACL_AMG_DEBUG
476  std::cout << "Residual:" << std::endl;
477  printvector(residual_[level]);
478  #endif
479 
480  // Restrict to coarse level. Restricted residual is RHS of coarse level.
481  rhs_[level+1] = boost::numeric::ublas::prod(R_[level], residual_[level]);
482 
483  #ifdef VIENNACL_AMG_DEBUG
484  std::cout << "Restricted Residual: " << std::endl;
485  printvector(rhs_[level+1]);
486  #endif
487  }
488 
489  // On highest level use direct solve to solve equation.
490  result_[level] = rhs_[level];
491  boost::numeric::ublas::lu_substitute(op_, permutation_, result_[level]);
492 
493  #ifdef VIENNACL_AMG_DEBUG
494  std::cout << "After direct solve: " << std::endl;
495  printvector(result_[level]);
496  #endif
497 
498  for (level=tag_.get_coarselevels()-1; level >= 0; level--)
499  {
500  #ifdef VIENNACL_AMG_DEBUG
501  std::cout << "Coarse Error: " << std::endl;
502  printvector(result_[level+1]);
503  #endif
504 
505  // Interpolate error to fine level. Correct solution by adding error.
506  result_[level] += boost::numeric::ublas::prod(P_[level], result_[level+1]);
507 
508  #ifdef VIENNACL_AMG_DEBUG
509  std::cout << "Corrected Result: " << std::endl;
510  printvector(result_[level]);
511  #endif
512 
513  // Apply Smoother postsmooth_ times.
514  smooth_jacobi(level, tag_.get_postsmooth(), result_[level], rhs_[level]);
515 
516  #ifdef VIENNACL_AMG_DEBUG
517  std::cout << "After postsmooth: " << std::endl;
518  printvector(result_[level]);
519  #endif
520  }
521  vec = result_[0];
522  }
523 
530  template<typename VectorT>
531  void smooth_jacobi(int level, int const iterations, VectorT & x, VectorT const & rhs_smooth) const
532  {
533  VectorT old_result(x.size());
534  long index;
535 
536  for (int i=0; i<iterations; ++i)
537  {
538  old_result = x;
539  x.clear();
540 #ifdef VIENNACL_WITH_OPENMP
541  #pragma omp parallel for
542 #endif
543  for (index=0; index < static_cast<long>(A_setup_[level].size1()); ++index)
544  {
545  InternalConstRowIterator row_iter = A_setup_[level].begin1();
546  row_iter += index;
547  NumericType sum = 0;
548  NumericType diag = 1;
549  for (InternalConstColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
550  {
551  if (col_iter.index1() == col_iter.index2())
552  diag = *col_iter;
553  else
554  sum += *col_iter * old_result[col_iter.index2()];
555  }
556  x[index]= static_cast<NumericType>(tag_.get_jacobiweight()) * (rhs_smooth[index] - sum) / diag + (1-static_cast<NumericType>(tag_.get_jacobiweight())) * old_result[index];
557  }
558  }
559  }
560 
561  amg_tag & tag() { return tag_; }
562 };
563 
568 template<typename NumericT, unsigned int AlignmentV>
569 class amg_precond< compressed_matrix<NumericT, AlignmentV> >
570 {
575 
580 
581  boost::numeric::ublas::vector<SparseMatrixType> A_setup_;
582  boost::numeric::ublas::vector<SparseMatrixType> P_setup_;
583  boost::numeric::ublas::vector<MatrixType> A_;
584  boost::numeric::ublas::vector<MatrixType> P_;
585  boost::numeric::ublas::vector<MatrixType> R_;
586  boost::numeric::ublas::vector<PointVectorType> pointvector_;
587 
588  mutable boost::numeric::ublas::compressed_matrix<NumericT> op_;
589  mutable boost::numeric::ublas::permutation_matrix<> permutation_;
590 
591  mutable boost::numeric::ublas::vector<VectorType> result_;
592  mutable boost::numeric::ublas::vector<VectorType> rhs_;
593  mutable boost::numeric::ublas::vector<VectorType> residual_;
594 
595  viennacl::context ctx_;
596 
597  mutable bool done_init_apply_;
598 
599  amg_tag tag_;
600 
601 public:
602 
603  amg_precond(): permutation_(0) {}
604 
610  amg_precond(compressed_matrix<NumericT, AlignmentV> const & mat, amg_tag const & tag): permutation_(0), ctx_(viennacl::traits::context(mat))
611  {
612  tag_ = tag;
613 
614  // Copy to CPU. Internal structure of sparse matrix is used for copy operation.
615  std::vector<std::map<unsigned int, NumericT> > mat2 = std::vector<std::map<unsigned int, NumericT> >(mat.size1());
616  viennacl::copy(mat, mat2);
617 
618  // Initialize data structures.
619  amg_init (mat2, A_setup_, P_setup_, pointvector_, tag_);
620 
621  done_init_apply_ = false;
622  }
623 
626  void setup()
627  {
628  // Start setup phase.
629  amg_setup(A_setup_, P_setup_, pointvector_, tag_);
630  // Transform to GPU-Matrixtype for precondition phase.
631  amg_transform_gpu(A_, P_, R_, A_setup_, P_setup_, tag_, ctx_);
632 
633  done_init_apply_ = false;
634  }
635 
640  void init_apply() const
641  {
642  // Setup precondition phase (Data structures).
643  amg_setup_apply(result_, rhs_, residual_, A_setup_, tag_, ctx_);
644  // Do LU factorization for direct solve.
645  amg_lu(op_, permutation_, A_setup_[tag_.get_coarselevels()]);
646 
647  done_init_apply_ = true;
648  }
649 
655  template<typename VectorT>
656  NumericT calc_complexity(VectorT & avgstencil)
657  {
658  avgstencil = VectorT(tag_.get_coarselevels()+1);
659  unsigned int nonzero=0, systemmat_nonzero=0, level_coefficients=0;
660 
661  for (unsigned int level=0; level < tag_.get_coarselevels()+1; ++level)
662  {
663  level_coefficients = 0;
664  for (InternalRowIterator row_iter = A_setup_[level].begin1(); row_iter != A_setup_[level].end1(); ++row_iter)
665  {
666  for (InternalColIterator col_iter = row_iter.begin(); col_iter != row_iter.end(); ++col_iter)
667  {
668  if (level == 0)
669  systemmat_nonzero++;
670  nonzero++;
671  level_coefficients++;
672  }
673  }
674  avgstencil[level] = level_coefficients/static_cast<double>(A_[level].size1());
675  }
676  return nonzero/static_cast<double>(systemmat_nonzero);
677  }
678 
683  template<typename VectorT>
684  void apply(VectorT & vec) const
685  {
686  if (!done_init_apply_)
687  init_apply();
688 
689  vcl_size_t level;
690 
691  // Precondition operation (Yang, p.3).
692  rhs_[0] = vec;
693  for (level=0; level < tag_.get_coarselevels(); level++)
694  {
695  result_[level].clear();
696 
697  // Apply Smoother presmooth_ times.
698  smooth_jacobi(level, tag_.get_presmooth(), result_[level], rhs_[level]);
699 
700  #ifdef VIENNACL_AMG_DEBUG
701  std::cout << "After presmooth: " << std::endl;
702  printvector(result_[level]);
703  #endif
704 
705  // Compute residual.
706  //residual[level] = rhs_[level] - viennacl::linalg::prod(A_[level], result_[level]);
707  residual_[level] = viennacl::linalg::prod(A_[level], result_[level]);
708  residual_[level] = rhs_[level] - residual_[level];
709 
710  #ifdef VIENNACL_AMG_DEBUG
711  std::cout << "Residual: " << std::endl;
712  printvector(residual_[level]);
713  #endif
714 
715  // Restrict to coarse level. Result is RHS of coarse level equation.
716  //residual_coarse[level] = viennacl::linalg::prod(R[level],residual[level]);
717  rhs_[level+1] = viennacl::linalg::prod(R_[level], residual_[level]);
718 
719  #ifdef VIENNACL_AMG_DEBUG
720  std::cout << "Restricted Residual: " << std::endl;
721  printvector(rhs_[level+1]);
722  #endif
723  }
724 
725  // On highest level use direct solve to solve equation (on the CPU)
726  //TODO: Use GPU direct solve!
727  result_[level] = rhs_[level];
728  boost::numeric::ublas::vector<NumericT> result_cpu(result_[level].size());
729 
730  viennacl::copy(result_[level], result_cpu);
731  boost::numeric::ublas::lu_substitute(op_, permutation_, result_cpu);
732  viennacl::copy(result_cpu, result_[level]);
733 
734  #ifdef VIENNACL_AMG_DEBUG
735  std::cout << "After direct solve: " << std::endl;
736  printvector (result[level]);
737  #endif
738 
739  for (int level2 = static_cast<int>(tag_.get_coarselevels()-1); level2 >= 0; level2--)
740  {
741  level = static_cast<vcl_size_t>(level2);
742 
743  #ifdef VIENNACL_AMG_DEBUG
744  std::cout << "Coarse Error: " << std::endl;
745  printvector(result[level+1]);
746  #endif
747 
748  // Interpolate error to fine level and correct solution.
749  result_[level] += viennacl::linalg::prod(P_[level], result_[level+1]);
750 
751  #ifdef VIENNACL_AMG_DEBUG
752  std::cout << "Corrected Result: " << std::endl;
753  printvector(result_[level]);
754  #endif
755 
756  // Apply Smoother postsmooth_ times.
757  smooth_jacobi(level, tag_.get_postsmooth(), result_[level], rhs_[level]);
758 
759  #ifdef VIENNACL_AMG_DEBUG
760  std::cout << "After postsmooth: " << std::endl;
761  printvector(result_[level]);
762  #endif
763  }
764  vec = result_[0];
765  }
766 
773  template<typename VectorT>
774  void smooth_jacobi(vcl_size_t level, unsigned int iterations, VectorT & x, VectorT const & rhs_smooth) const
775  {
776  VectorType old_result = x;
777 
778  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(x).context());
781 
782  for (unsigned int i=0; i<iterations; ++i)
783  {
784  if (i > 0)
785  old_result = x;
786  x.clear();
787  viennacl::ocl::enqueue(k(A_[level].handle1().opencl_handle(), A_[level].handle2().opencl_handle(), A_[level].handle().opencl_handle(),
788  static_cast<NumericT>(tag_.get_jacobiweight()),
789  viennacl::traits::opencl_handle(old_result),
790  viennacl::traits::opencl_handle(x),
791  viennacl::traits::opencl_handle(rhs_smooth),
792  static_cast<cl_uint>(rhs_smooth.size())));
793 
794  }
795  }
796 
797  amg_tag & tag() { return tag_; }
798 };
799 
800 }
801 }
802 
803 
804 
805 #endif
806 
void apply(VectorT &vec) const
Precondition Operation.
Definition: amg.hpp:684
Debug functionality for AMG. To be removed.
unsigned int get_coarselevels() const
Definition: amg_base.hpp:111
void amg_lu(boost::numeric::ublas::compressed_matrix< NumericT > &op, boost::numeric::ublas::permutation_matrix<> &permutation, SparseMatrixT const &A)
Pre-compute LU factorization for direct solve (ublas library).
Definition: amg.hpp:325
const vcl_size_t & size1() const
Returns the number of rows.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
void lu_substitute(matrix< NumericT, F1, AlignmentV1 > const &A, matrix< NumericT, F2, AlignmentV2 > &B)
LU substitution for the system LU = rhs.
Definition: lu.hpp:201
void init_apply() const
Prepare data structures for preconditioning: Build data structures for precondition phase...
Definition: amg.hpp:640
#define VIENNACL_AMG_COARSE_RS3
Definition: amg_base.hpp:44
NumericT calc_complexity(VectorT &avgstencil)
Returns complexity measures.
Definition: amg.hpp:656
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
This file provides the forward declarations for the main types used within ViennaCL.
unsigned int get_postsmooth() const
Definition: amg_base.hpp:108
#define VIENNACL_AMG_COARSE_LIMIT
Definition: amg.hpp:53
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
void amg_setup(InternalT1 &A, InternalT1 &P, InternalT2 &pointvector, amg_tag &tag)
Setup AMG preconditioner.
Definition: amg.hpp:72
statement sum(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Definition: preset.hpp:246
#define VIENNACL_AMG_COARSE_RS0
Definition: amg_base.hpp:43
void smooth_jacobi(int level, int const iterations, VectorT &x, VectorT const &rhs_smooth) const
(Weighted) Jacobi Smoother (CPU version)
Definition: amg.hpp:531
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
A const iterator for sparse matrices of type std::vector > ...
Definition: adapter.hpp:48
void init_apply() const
Prepare data structures for preconditioning: Build data structures for precondition phase...
Definition: amg.hpp:406
const_sparse_matrix_adapted_iterator< NumericT, SizeT,!is_iterator1, true > end() const
Definition: adapter.hpp:163
#define VIENNACL_AMG_MAX_LEVELS
Definition: amg.hpp:54
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
A tag for algebraic multigrid (AMG). Used to transport information from the user to the implementatio...
Definition: amg_base.hpp:61
Main kernel class for generating OpenCL kernels for compressed_matrix.
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:607
Implementations of several variants of the AMG coarsening procedure (setup phase). Experimental.
void init(unsigned int levels, unsigned int threads=0)
Definition: amg_base.hpp:1191
void amg_interpol(unsigned int level, InternalT1 &A, InternalT1 &P, InternalT2 &pointvector, amg_tag &tag)
Calls the right function to build interpolation matrix.
unsigned int get_presmooth() const
Definition: amg_base.hpp:105
Adapts a constant sparse matrix type made up from std::vector > to basic ub...
Definition: adapter.hpp:183
A non-const iterator for sparse matrices of type std::vector > ...
Definition: adapter.hpp:237
AMG preconditioner class, can be supplied to solve()-routines.
Definition: amg.hpp:344
std::size_t vcl_size_t
Definition: forwards.h:74
void amg_galerkin_prod(SparseMatrixT &A, SparseMatrixT &P, SparseMatrixT &RES)
Sparse Galerkin product: Calculates RES = trans(P)*A*P.
Definition: amg_base.hpp:1368
void prod(const MatrixT1 &A, bool transposed_A, const MatrixT2 &B, bool transposed_B, MatrixT3 &C, ScalarT alpha, ScalarT beta)
vector_expression< const matrix_base< NumericT >, const int, op_matrix_diag > diag(const matrix_base< NumericT > &A, int k=0)
Definition: matrix.hpp:838
void setup()
Start setup phase for this class and copy data structures.
Definition: amg.hpp:392
void printvector(VectorT const &)
Definition: amg_debug.hpp:80
A class for the sparse matrix type. Uses vector of maps as data structure for higher performance and ...
Definition: amg_base.hpp:385
Implementations of dense direct solvers are found here.
void amg_init(MatrixT const &mat, InternalT1 &A, InternalT1 &P, InternalT2 &pointvector, amg_tag &tag)
Initialize AMG preconditioner.
Definition: amg.hpp:146
const_sparse_matrix_adapted_iterator< NumericT, SizeT,!is_iterator1, true > begin() const
Definition: adapter.hpp:159
void copy_impl(const CPUMatrixT &cpu_matrix, compressed_compressed_matrix< NumericT > &gpu_matrix, vcl_size_t nonzero_rows, vcl_size_t nonzeros)
amg_precond(compressed_matrix< NumericT, AlignmentV > const &mat, amg_tag const &tag)
The constructor. Builds data structures.
Definition: amg.hpp:610
void smooth_jacobi(vcl_size_t level, unsigned int iterations, VectorT &x, VectorT const &rhs_smooth) const
Jacobi Smoother (GPU version)
Definition: amg.hpp:774
NumericType calc_complexity(VectorT &avgstencil)
Returns complexity measures.
Definition: amg.hpp:422
void amg_coarse(unsigned int level, InternalT1 &A, InternalT2 &pointvector, InternalT3 &slicing, amg_tag &tag)
Calls the right coarsening procedure.
Definition: amg_coarse.hpp:52
unsigned int get_coarse() const
Definition: amg_base.hpp:90
sparse_matrix_adapted_iterator< NumericT, SizeT,!is_iterator1 > begin() const
Definition: adapter.hpp:331
void clear()
Resets all entries to zero. Does not change the size of the vector.
Definition: vector.hpp:861
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
static void init(viennacl::ocl::context &ctx)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void amg_transform_cpu(InternalT1 &A, InternalT1 &P, InternalT1 &R, InternalT2 &A_setup, InternalT2 &P_setup, amg_tag &tag)
Save operators after setup phase for CPU computation.
Definition: amg.hpp:179
void amg_transform_gpu(InternalT1 &A, InternalT1 &P, InternalT1 &R, InternalT2 &A_setup, InternalT2 &P_setup, amg_tag &tag, viennacl::context ctx)
Save operators after setup phase for GPU computation.
Definition: amg.hpp:219
void setup()
Start setup phase for this class and copy data structures.
Definition: amg.hpp:626
A sparse square matrix in compressed sparse rows format.
Helper classes and functions for the AMG preconditioner. Experimental.
amg_precond(MatrixT const &mat, amg_tag const &tag)
The constructor. Saves system matrix, tag and builds data structures for setup.
Definition: amg.hpp:381
void printmatrix(MatrixT &, int)
Definition: amg_debug.hpp:77
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
detail::amg::amg_tag amg_tag
Definition: amg.hpp:61
void apply(VectorT &vec) const
Precondition Operation.
Definition: amg.hpp:450
void set_coarselevels(unsigned int coarselevels)
Definition: amg_base.hpp:110
A class for the matrix slicing for parallel coarsening schemes (RS0/RS3).
Definition: amg_base.hpp:1176
void lu_factorize(matrix< NumericT, viennacl::row_major > &A)
LU factorization of a row-major dense matrix.
Definition: lu.hpp:42
A class for the AMG points. Holds pointers of type amg_point in a vector that can be accessed using [...
Definition: amg_base.hpp:946
void amg_setup_apply(InternalVectorT &result, InternalVectorT &rhs, InternalVectorT &residual, SparseMatrixT const &A, amg_tag const &tag)
Setup data structures for precondition phase.
Definition: amg.hpp:264
Implementations of several variants of the AMG interpolation operators (setup phase). Experimental.
void switch_memory_context(T &obj, viennacl::context new_ctx)
Generic convenience routine for migrating data of an object to a new memory domain.
Definition: memory.hpp:622