ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ilut.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_ILUT_HPP_
2 #define VIENNACL_LINALG_DETAIL_ILUT_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <cmath>
27 #include <iostream>
28 #include "viennacl/forwards.h"
29 #include "viennacl/tools/tools.hpp"
30 
33 
35 
36 #include <map>
37 
38 namespace viennacl
39 {
40 namespace linalg
41 {
42 
45 class ilut_tag
46 {
47  public:
54  ilut_tag(unsigned int entries_per_row = 20,
55  double drop_tolerance = 1e-4,
56  bool with_level_scheduling = false)
57  : entries_per_row_(entries_per_row),
58  drop_tolerance_(drop_tolerance),
59  use_level_scheduling_(with_level_scheduling) {}
60 
61  void set_drop_tolerance(double tol)
62  {
63  if (tol > 0)
64  drop_tolerance_ = tol;
65  }
66  double get_drop_tolerance() const { return drop_tolerance_; }
67 
68  void set_entries_per_row(unsigned int e)
69  {
70  if (e > 0)
71  entries_per_row_ = e;
72  }
73 
74  unsigned int get_entries_per_row() const { return entries_per_row_; }
75 
76  bool use_level_scheduling() const { return use_level_scheduling_; }
77  void use_level_scheduling(bool b) { use_level_scheduling_ = b; }
78 
79  private:
80  unsigned int entries_per_row_;
81  double drop_tolerance_;
82  bool use_level_scheduling_;
83 };
84 
85 
87 template<typename NumericT, typename SizeT, typename SparseVectorT>
89  SizeT row,
90  SparseVectorT & w)
91 {
92  assert( (A.handle1().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILUT") );
93  assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILUT") );
94  assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILUT") );
95 
96  NumericT const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericT>(A.handle());
97  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
98  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
99 
100  SizeT row_i_begin = static_cast<SizeT>(row_buffer[row]);
101  SizeT row_i_end = static_cast<SizeT>(row_buffer[row+1]);
102  NumericT row_norm = 0;
103  for (SizeT buf_index_i = row_i_begin; buf_index_i < row_i_end; ++buf_index_i) //Note: We do not assume that the column indices within a row are sorted
104  {
105  NumericT entry = elements[buf_index_i];
106  w[col_buffer[buf_index_i]] = entry;
107  row_norm += entry * entry;
108  }
109  return std::sqrt(row_norm);
110 }
111 
113 template<typename NumericT, typename SizeT, typename SparseVectorT>
114 NumericT setup_w(std::vector< std::map<SizeT, NumericT> > const & A,
115  SizeT row,
116  SparseVectorT & w)
117 {
118  NumericT row_norm = 0;
119  w = A[row];
120  for (typename std::map<SizeT, NumericT>::const_iterator iter_w = w.begin(); iter_w != w.end(); ++iter_w)
121  row_norm += iter_w->second * iter_w->second;
122 
123  return std::sqrt(row_norm);
124 }
125 
126 
135 template<typename SparseMatrixTypeT, typename NumericT, typename SizeT>
136 void precondition(SparseMatrixTypeT const & A,
137  std::vector< std::map<SizeT, NumericT> > & output,
138  ilut_tag const & tag)
139 {
140  typedef std::map<SizeT, NumericT> SparseVector;
141  typedef typename SparseVector::iterator SparseVectorIterator;
142  typedef typename std::map<SizeT, NumericT>::const_iterator OutputRowConstIterator;
143  typedef std::multimap<NumericT, std::pair<SizeT, NumericT> > TemporarySortMap;
144 
145  assert(viennacl::traits::size1(A) == output.size() && bool("Output matrix size mismatch") );
146 
147  SparseVector w;
148  TemporarySortMap temp_map;
149 
150  for (SizeT i=0; i<viennacl::traits::size1(A); ++i) // Line 1
151  {
152 /* if (i%10 == 0)
153  std::cout << i << std::endl;*/
154 
155  //line 2: set up w
156  NumericT row_norm = setup_w(A, i, w);
157  NumericT tau_i = static_cast<NumericT>(tag.get_drop_tolerance()) * row_norm;
158 
159  //line 3:
160  for (SparseVectorIterator w_k = w.begin(); w_k != w.end(); ++w_k)
161  {
162  SizeT k = w_k->first;
163  if (k >= i)
164  break;
165 
166  //line 4:
167  NumericT a_kk = output[k][k];
168  if (a_kk <= 0 && a_kk >= 0) // a_kk == 0
169  {
170  std::cerr << "ViennaCL: FATAL ERROR in ILUT(): Diagonal entry is zero in row " << k
171  << " while processing line " << i << "!" << std::endl;
172  throw "ILUT zero diagonal!";
173  }
174 
175  NumericT w_k_entry = w_k->second / a_kk;
176  w_k->second = w_k_entry;
177 
178  //line 5: (dropping rule to w_k)
179  if ( std::fabs(w_k_entry) > tau_i)
180  {
181  //line 7:
182  for (OutputRowConstIterator u_k = output[k].begin(); u_k != output[k].end(); ++u_k)
183  {
184  if (u_k->first > k)
185  w[u_k->first] -= w_k_entry * u_k->second;
186  }
187  }
188  //else
189  // w.erase(k);
190 
191  } //for w_k
192 
193  //Line 10: Apply a dropping rule to w
194  //Sort entries which are kept
195  temp_map.clear();
196  for (SparseVectorIterator w_k = w.begin(); w_k != w.end(); ++w_k)
197  {
198  SizeT k = w_k->first;
199  NumericT w_k_entry = w_k->second;
200 
201  NumericT abs_w_k = std::fabs(w_k_entry);
202  if ( (abs_w_k > tau_i) || (k == i) )//do not drop diagonal element!
203  {
204 
205  if (abs_w_k <= 0) // this can only happen for diagonal entry
206  throw "Triangular factor in ILUT singular!";
207 
208  temp_map.insert(std::make_pair(abs_w_k, std::make_pair(k, w_k_entry)));
209  }
210  }
211 
212  //Lines 10-12: write the largest p values to L and U
213  SizeT written_L = 0;
214  SizeT written_U = 0;
215  for (typename TemporarySortMap::reverse_iterator iter = temp_map.rbegin(); iter != temp_map.rend(); ++iter)
216  {
217  std::map<SizeT, NumericT> & row_i = output[i];
218  SizeT j = (iter->second).first;
219  NumericT w_j_entry = (iter->second).second;
220 
221  if (j < i) // Line 11: entry for L
222  {
223  if (written_L < tag.get_entries_per_row())
224  {
225  row_i[j] = w_j_entry;
226  ++written_L;
227  }
228  }
229  else if (j == i) // Diagonal entry is always kept
230  {
231  row_i[j] = w_j_entry;
232  }
233  else //Line 12: entry for U
234  {
235  if (written_U < tag.get_entries_per_row())
236  {
237  row_i[j] = w_j_entry;
238  ++written_U;
239  }
240  }
241  }
242 
243  w.clear(); //Line 13
244 
245  } //for i
246 }
247 
248 
251 template<typename MatrixT>
253 {
254  typedef typename MatrixT::value_type NumericType;
255 
256 public:
257  ilut_precond(MatrixT const & mat, ilut_tag const & tag) : tag_(tag), LU_(mat.size1(), mat.size2())
258  {
259  //initialize preconditioner:
260  //std::cout << "Start CPU precond" << std::endl;
261  init(mat);
262  //std::cout << "End CPU precond" << std::endl;
263  }
264 
265  template<typename VectorT>
266  void apply(VectorT & vec) const
267  {
268  //Note: Since vec can be a rather arbitrary vector type, we call the more generic version in the backend manually:
269  unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_.handle1());
270  unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_.handle2());
271  NumericType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<NumericType>(LU_.handle());
272 
273  viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, LU_.size2(), unit_lower_tag());
274  viennacl::linalg::host_based::detail::csr_inplace_solve<NumericType>(row_buffer, col_buffer, elements, vec, LU_.size2(), upper_tag());
275  }
276 
277 private:
278  void init(MatrixT const & mat)
279  {
282  viennacl::switch_memory_context(temp, host_context);
283 
284  viennacl::copy(mat, temp);
285 
286  std::vector< std::map<unsigned int, NumericType> > LU_temp(mat.size1());
287 
288  viennacl::linalg::precondition(temp, LU_temp, tag_);
289 
290  viennacl::switch_memory_context(LU_, host_context);
291  viennacl::copy(LU_temp, LU_);
292  }
293 
294  ilut_tag const & tag_;
296 };
297 
298 
303 template<typename NumericT, unsigned int AlignmentV>
304 class ilut_precond< viennacl::compressed_matrix<NumericT, AlignmentV> >
305 {
307 
308 public:
309  ilut_precond(MatrixType const & mat, ilut_tag const & tag)
310  : tag_(tag),
311  LU_(mat.size1(), mat.size2(), viennacl::traits::context(mat))
312  {
313  //initialize preconditioner:
314  //std::cout << "Start GPU precond" << std::endl;
315  init(mat);
316  //std::cout << "End GPU precond" << std::endl;
317  }
318 
320  {
322  {
323  if (tag_.use_level_scheduling())
324  {
325  //std::cout << "Using multifrontal on GPU..." << std::endl;
327  multifrontal_L_row_index_arrays_,
328  multifrontal_L_row_buffers_,
329  multifrontal_L_col_buffers_,
330  multifrontal_L_element_buffers_,
331  multifrontal_L_row_elimination_num_list_);
332 
333  vec = viennacl::linalg::element_div(vec, multifrontal_U_diagonal_);
334 
336  multifrontal_U_row_index_arrays_,
337  multifrontal_U_row_buffers_,
338  multifrontal_U_col_buffers_,
339  multifrontal_U_element_buffers_,
340  multifrontal_U_row_elimination_num_list_);
341  }
342  else
343  {
345  viennacl::context old_context = viennacl::traits::context(vec);
346  viennacl::switch_memory_context(vec, host_context);
349  viennacl::switch_memory_context(vec, old_context);
350  }
351  }
352  else //apply ILUT directly:
353  {
356  }
357  }
358 
359 private:
360  void init(MatrixType const & mat)
361  {
363  viennacl::switch_memory_context(LU_, host_context);
364 
365  std::vector< std::map<unsigned int, NumericT> > LU_temp(mat.size1());
366 
368  {
369  viennacl::linalg::precondition(mat, LU_temp, tag_);
370  }
371  else //we need to copy to CPU
372  {
373  viennacl::compressed_matrix<NumericT> cpu_mat(mat.size1(), mat.size2(), viennacl::traits::context(mat));
374  viennacl::switch_memory_context(cpu_mat, host_context);
375 
376  cpu_mat = mat;
377 
378  viennacl::linalg::precondition(cpu_mat, LU_temp, tag_);
379  }
380 
381  viennacl::copy(LU_temp, LU_);
382 
383  if (!tag_.use_level_scheduling())
384  return;
385 
386  //
387  // multifrontal part:
388  //
389 
390  viennacl::switch_memory_context(multifrontal_U_diagonal_, host_context);
391  multifrontal_U_diagonal_.resize(LU_.size1(), false);
393 
395  multifrontal_U_diagonal_, //dummy
396  multifrontal_L_row_index_arrays_,
397  multifrontal_L_row_buffers_,
398  multifrontal_L_col_buffers_,
399  multifrontal_L_element_buffers_,
400  multifrontal_L_row_elimination_num_list_);
401 
402 
404  multifrontal_U_diagonal_,
405  multifrontal_U_row_index_arrays_,
406  multifrontal_U_row_buffers_,
407  multifrontal_U_col_buffers_,
408  multifrontal_U_element_buffers_,
409  multifrontal_U_row_elimination_num_list_);
410 
411  //
412  // Bring to device if necessary:
413  //
414 
415  // L:
416 
417  for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_index_arrays_.begin();
418  it != multifrontal_L_row_index_arrays_.end();
419  ++it)
420  viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
421 
422  for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_row_buffers_.begin();
423  it != multifrontal_L_row_buffers_.end();
424  ++it)
425  viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
426 
427  for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_col_buffers_.begin();
428  it != multifrontal_L_col_buffers_.end();
429  ++it)
430  viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
431 
432  for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_L_element_buffers_.begin();
433  it != multifrontal_L_element_buffers_.end();
434  ++it)
435  viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat));
436 
437 
438  // U:
439 
440  viennacl::switch_memory_context(multifrontal_U_diagonal_, viennacl::traits::context(mat));
441 
442  for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_index_arrays_.begin();
443  it != multifrontal_U_row_index_arrays_.end();
444  ++it)
445  viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
446 
447  for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_row_buffers_.begin();
448  it != multifrontal_U_row_buffers_.end();
449  ++it)
450  viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
451 
452  for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_col_buffers_.begin();
453  it != multifrontal_U_col_buffers_.end();
454  ++it)
455  viennacl::backend::switch_memory_context<unsigned int>(*it, viennacl::traits::context(mat));
456 
457  for (typename std::list< viennacl::backend::mem_handle >::iterator it = multifrontal_U_element_buffers_.begin();
458  it != multifrontal_U_element_buffers_.end();
459  ++it)
460  viennacl::backend::switch_memory_context<NumericT>(*it, viennacl::traits::context(mat));
461 
462 
463  }
464 
465  ilut_tag const & tag_;
467 
468  std::list<viennacl::backend::mem_handle> multifrontal_L_row_index_arrays_;
469  std::list<viennacl::backend::mem_handle> multifrontal_L_row_buffers_;
470  std::list<viennacl::backend::mem_handle> multifrontal_L_col_buffers_;
471  std::list<viennacl::backend::mem_handle> multifrontal_L_element_buffers_;
472  std::list<vcl_size_t > multifrontal_L_row_elimination_num_list_;
473 
474  viennacl::vector<NumericT> multifrontal_U_diagonal_;
475  std::list<viennacl::backend::mem_handle> multifrontal_U_row_index_arrays_;
476  std::list<viennacl::backend::mem_handle> multifrontal_U_row_buffers_;
477  std::list<viennacl::backend::mem_handle> multifrontal_U_col_buffers_;
478  std::list<viennacl::backend::mem_handle> multifrontal_U_element_buffers_;
479  std::list<vcl_size_t > multifrontal_U_row_elimination_num_list_;
480 };
481 
482 } // namespace linalg
483 } // namespace viennacl
484 
485 
486 
487 
488 #endif
489 
490 
491 
const vcl_size_t & size2() const
Returns the number of columns.
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
void inplace_solve(const matrix_base< NumericT > &A, matrix_base< NumericT > &B, SolverTagT)
Direct inplace solver for triangular systems with multiple right hand sides, i.e. A \ B (MATLAB notat...
ilut_precond(MatrixT const &mat, ilut_tag const &tag)
Definition: ilut.hpp:257
NumericT setup_w(viennacl::compressed_matrix< NumericT > const &A, SizeT row, SparseVectorT &w)
Dispatcher overload for extracting the row of nonzeros of a compressed matrix.
Definition: ilut.hpp:88
const vcl_size_t & size1() const
Returns the number of rows.
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
void set_entries_per_row(unsigned int e)
Definition: ilut.hpp:68
void precondition(viennacl::compressed_matrix< NumericT > &A, ilu0_tag const &)
Implementation of a ILU-preconditioner with static pattern. Optimized version for CSR matrices...
Definition: ilu0.hpp:78
This file provides the forward declarations for the main types used within ViennaCL.
bool use_level_scheduling() const
Definition: ilut.hpp:76
void level_scheduling_setup_U(viennacl::compressed_matrix< NumericT, AlignmentV > const &LU, viennacl::vector< NumericT > const &diagonal_LU, std::list< viennacl::backend::mem_handle > &row_index_arrays, std::list< viennacl::backend::mem_handle > &row_buffers, std::list< viennacl::backend::mem_handle > &col_buffers, std::list< viennacl::backend::mem_handle > &element_buffers, std::list< vcl_size_t > &row_elimination_num_list)
Definition: common.hpp:208
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
void level_scheduling_substitute(viennacl::vector< NumericT > &vec, std::list< viennacl::backend::mem_handle > const &row_index_arrays, std::list< viennacl::backend::mem_handle > const &row_buffers, std::list< viennacl::backend::mem_handle > const &col_buffers, std::list< viennacl::backend::mem_handle > const &element_buffers, std::list< vcl_size_t > const &row_elimination_num_list)
Definition: common.hpp:224
const handle_type & handle() const
Returns the OpenCL handle to the matrix entry array.
const handle_type & handle1() const
Returns the OpenCL handle to the row index array.
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
void apply(VectorT &vec) const
Definition: ilut.hpp:266
A tag class representing an upper triangular matrix.
Definition: forwards.h:814
A tag for incomplete LU factorization with threshold (ILUT)
Definition: ilut.hpp:45
Implementation of the compressed_matrix class.
unsigned int get_entries_per_row() const
Definition: ilut.hpp:74
const handle_type & handle2() const
Returns the OpenCL handle to the column index array.
ILUT preconditioner class, can be supplied to solve()-routines.
Definition: ilut.hpp:252
Common routines for single-threaded or OpenMP-enabled execution on CPU.
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:853
viennacl::memory_types memory_type() const
Definition: context.hpp:76
void set_drop_tolerance(double tol)
Definition: ilut.hpp:61
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
void use_level_scheduling(bool b)
Definition: ilut.hpp:77
A tag class representing a lower triangular matrix with unit diagonal.
Definition: forwards.h:819
void row_info(compressed_matrix< NumericT, AlignmentV > const &mat, vector_base< NumericT > &vec, viennacl::linalg::detail::row_info_types info_selector)
void level_scheduling_setup_L(viennacl::compressed_matrix< NumericT, AlignmentV > const &LU, viennacl::vector< NumericT > const &diagonal_LU, std::list< viennacl::backend::mem_handle > &row_index_arrays, std::list< viennacl::backend::mem_handle > &row_buffers, std::list< viennacl::backend::mem_handle > &col_buffers, std::list< viennacl::backend::mem_handle > &element_buffers, std::list< vcl_size_t > &row_elimination_num_list)
Definition: common.hpp:191
Common routines used within ILU-type preconditioners.
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128
ilut_tag(unsigned int entries_per_row=20, double drop_tolerance=1e-4, bool with_level_scheduling=false)
The constructor.
Definition: ilut.hpp:54
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Definition: mem_handle.hpp:118
double get_drop_tolerance() const
Definition: ilut.hpp:66
void switch_memory_context(T &obj, viennacl::context new_ctx)
Generic convenience routine for migrating data of an object to a new memory domain.
Definition: memory.hpp:622