ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
mixed_precision_cg.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
2 #define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <vector>
26 #include <map>
27 #include <cmath>
28 #include "viennacl/forwards.h"
29 #include "viennacl/tools/tools.hpp"
30 #include "viennacl/linalg/ilu.hpp"
31 #include "viennacl/linalg/prod.hpp"
34 #include "viennacl/traits/size.hpp"
36 #include "viennacl/ocl/backend.hpp"
37 #include "viennacl/ocl/kernel.hpp"
39 
41 
42 namespace viennacl
43 {
44  namespace linalg
45  {
46 
50  {
51  public:
58  mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {}
59 
61  double tolerance() const { return tol_; }
63  float inner_tolerance() const { return inner_tol_; }
65  unsigned int max_iterations() const { return iterations_; }
66 
68  unsigned int iters() const { return iters_taken_; }
69  void iters(unsigned int i) const { iters_taken_ = i; }
70 
72  double error() const { return last_error_; }
74  void error(double e) const { last_error_ = e; }
75 
76 
77  private:
78  double tol_;
79  unsigned int iterations_;
80  float inner_tol_;
81 
82  //return values from solver
83  mutable unsigned int iters_taken_;
84  mutable double last_error_;
85  };
86 
87 
88  static const char * double_float_conversion_program =
89  "#if defined(cl_khr_fp64)\n"
90  "# pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
91  "#elif defined(cl_amd_fp64)\n"
92  "# pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
93  "#endif\n"
94  "__kernel void assign_double_to_float(\n"
95  " __global float * vec1,\n"
96  " __global const double * vec2, \n"
97  " unsigned int size) \n"
98  "{ \n"
99  " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
100  " vec1[i] = (float)(vec2[i]);\n"
101  "};\n\n"
102  "__kernel void inplace_add_float_to_double(\n"
103  " __global double * vec1,\n"
104  " __global const float * vec2, \n"
105  " unsigned int size) \n"
106  "{ \n"
107  " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
108  " vec1[i] += (double)(vec2[i]);\n"
109  "};\n";
110 
111 
121  template<typename MatrixType, typename VectorType>
122  VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag)
123  {
124  //typedef typename VectorType::value_type ScalarType;
126  typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType;
127 
128  //TODO: Assert CPU_ScalarType == double
129 
130  //std::cout << "Starting CG" << std::endl;
131  vcl_size_t problem_size = viennacl::traits::size(rhs);
132  VectorType result(rhs);
133  viennacl::traits::clear(result);
134 
135  VectorType residual = rhs;
136 
137  CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs);
138  CPU_ScalarType new_ip_rr = 0;
139  CPU_ScalarType norm_rhs_squared = ip_rr;
140 
141  if (norm_rhs_squared <= 0) //solution is zero if RHS norm is zero
142  return result;
143 
144  static bool first = true;
145 
146  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
147  if (first)
148  {
149  ctx.add_program(double_float_conversion_program, "double_float_conversion_program");
150  }
151 
152  viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs));
153  viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs));
154  viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs));
155  viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs));
156  float inner_ip_rr = static_cast<float>(ip_rr);
157  float new_inner_ip_rr = 0;
158  float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr);
159  float alpha;
160  float beta;
161 
162  viennacl::ocl::kernel & assign_double_to_float = ctx.get_kernel("double_float_conversion_program", "assign_double_to_float");
163  viennacl::ocl::kernel & inplace_add_float_to_double = ctx.get_kernel("double_float_conversion_program", "inplace_add_float_to_double");
164 
165  // transfer rhs to single precision:
166  viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
167  rhs.handle().opencl_handle(),
168  cl_uint(rhs.size())
169  ) );
170  //std::cout << "copying p_low_precision..." << std::endl;
171  //assign_double_to_float(p_low_precision.handle(), residual.handle(), residual.size());
172  residual_low_precision = p_low_precision;
173 
174  // transfer matrix to single precision:
175  viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs));
176  viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, sizeof(cl_uint) * (matrix.size1() + 1) );
177  viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, sizeof(cl_uint) * (matrix.nnz()) );
178 
179  viennacl::ocl::enqueue( assign_double_to_float(matrix_low_precision.handle().opencl_handle(),
180  matrix.handle().opencl_handle(),
181  cl_uint(matrix.nnz())
182  ) );
183  //std::cout << "copying matrix_low_precision..." << std::endl;
184  //assign_double_to_float(const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle()), matrix.handle(), matrix.nnz());
185 
186  //std::cout << "Starting CG solver iterations... " << std::endl;
187 
188 
189  for (unsigned int i = 0; i < tag.max_iterations(); ++i)
190  {
191  tag.iters(i+1);
192 
193  // lower precision 'inner iteration'
194  tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision);
195 
196  alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision);
197  result_low_precision += alpha * p_low_precision;
198  residual_low_precision -= alpha * tmp_low_precision;
199 
200  new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision);
201 
202  beta = new_inner_ip_rr / inner_ip_rr;
203  inner_ip_rr = new_inner_ip_rr;
204 
205  p_low_precision = residual_low_precision + beta * p_low_precision;
206 
207 
208 
209  if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1)
210  {
211  //std::cout << "outer correction at i=" << i << std::endl;
212  //result += result_low_precision;
213  viennacl::ocl::enqueue( inplace_add_float_to_double(result.handle().opencl_handle(),
214  result_low_precision.handle().opencl_handle(),
215  cl_uint(result.size())
216  ) );
217 
218  // residual = b - Ax (without introducing a temporary)
219  residual = viennacl::linalg::prod(matrix, result);
220  residual = rhs - residual;
221 
222  new_ip_rr = viennacl::linalg::inner_prod(residual, residual);
223  if (new_ip_rr / norm_rhs_squared < tag.tolerance() * tag.tolerance())//squared norms involved here
224  break;
225 
226  // p_low_precision = residual;
227  viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
228  residual.handle().opencl_handle(),
229  cl_uint(residual.size())
230  ) );
231  result_low_precision.clear();
232  residual_low_precision = p_low_precision;
233  initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr);
234  inner_ip_rr = static_cast<float>(new_ip_rr);
235  }
236  }
237 
238  //store last error estimate:
239  tag.error(std::sqrt(new_ip_rr / norm_rhs_squared));
240 
241  return result;
242  }
243 
244  template<typename MatrixType, typename VectorType>
245  VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond)
246  {
247  return solve(matrix, rhs, tag);
248  }
249 
250 
251  }
252 }
253 
254 #endif
Generic size and resize functionality for different vector and matrix types.
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:58
Various little tools used here and there in ViennaCL.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
float inner_tolerance() const
Returns the relative tolerance.
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
A dense matrix class.
Definition: forwards.h:374
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:89
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
viennacl::vector< NumericT > solve(viennacl::compressed_matrix< NumericT > const &A, viennacl::vector_base< NumericT > const &rhs, bicgstab_tag const &tag, viennacl::linalg::no_precond)
Overload for the pipelined BiCGStab implementation for the ViennaCL sparse matrix types...
Definition: bicgstab.hpp:209
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
viennacl::ocl::program & add_program(cl_program p, std::string const &prog_name)
Adds a program to the context.
Definition: context.hpp:370
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:607
A tag class representing the use of no preconditioner.
Definition: forwards.h:833
Implementations of incomplete factorization preconditioners. Convenience header file.
unsigned int max_iterations() const
Returns the maximum number of iterations.
double tolerance() const
Returns the relative tolerance.
std::size_t vcl_size_t
Definition: forwards.h:74
Generic clear functionality for different vector and matrix types.
mixed_precision_cg_tag(double tol=1e-8, unsigned int max_iterations=300, float inner_tol=1e-2f)
The constructor.
double error() const
Returns the estimated relative error at the end of the solver run.
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:238
Implementations of the OpenCL backend, where all contexts are stored in.
Proxy classes for vectors.
void memory_copy(mem_handle const &src_buffer, mem_handle &dst_buffer, vcl_size_t src_offset, vcl_size_t dst_offset, vcl_size_t bytes_to_copy)
Copies 'bytes_to_copy' bytes from address 'src_buffer + src_offset' to memory starting at address 'ds...
Definition: memory.hpp:140
void clear()
Resets all entries to zero. Does not change the size of the vector.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:40
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:50
Representation of an OpenCL kernel in ViennaCL.
float ScalarType
Definition: fft_1d.cpp:42
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:89
A tag for the conjugate gradient Used for supplying solver parameters and for dispatching the solve()...
void error(double e) const
Sets the estimated relative error at the end of the solver run.
A collection of compile time type deductions.
unsigned int iters() const
Return the number of solver iterations:
const handle_type & handle() const
Returns the memory handle.
Definition: vector_def.hpp:128
Main interface routines for memory management.