ViennaCL - The Vienna Computing Library  1.5.2
viennacl/linalg/mixed_precision_cg.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
00002 #define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include <vector>
00026 #include <map>
00027 #include <cmath>
00028 #include "viennacl/forwards.h"
00029 #include "viennacl/tools/tools.hpp"
00030 #include "viennacl/linalg/ilu.hpp"
00031 #include "viennacl/linalg/prod.hpp"
00032 #include "viennacl/linalg/inner_prod.hpp"
00033 #include "viennacl/traits/clear.hpp"
00034 #include "viennacl/traits/size.hpp"
00035 #include "viennacl/meta/result_of.hpp"
00036 #include "viennacl/ocl/backend.hpp"
00037 #include "viennacl/ocl/kernel.hpp"
00038 #include "viennacl/backend/memory.hpp"
00039 
00040 #include "viennacl/vector_proxy.hpp"
00041 
00042 namespace viennacl
00043 {
00044   namespace linalg
00045   {
00046 
00049     class mixed_precision_cg_tag
00050     {
00051       public:
00058         mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {}
00059 
00061         double tolerance() const { return tol_; }
00063         float inner_tolerance() const { return inner_tol_; }
00065         unsigned int max_iterations() const { return iterations_; }
00066 
00068         unsigned int iters() const { return iters_taken_; }
00069         void iters(unsigned int i) const { iters_taken_ = i; }
00070 
00072         double error() const { return last_error_; }
00074         void error(double e) const { last_error_ = e; }
00075 
00076 
00077       private:
00078         double tol_;
00079         unsigned int iterations_;
00080         float inner_tol_;
00081 
00082         //return values from solver
00083         mutable unsigned int iters_taken_;
00084         mutable double last_error_;
00085     };
00086 
00087 
00088     const char * double_float_conversion_program =
00089     "#if defined(cl_khr_fp64)\n"
00090     "#  pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
00091     "#elif defined(cl_amd_fp64)\n"
00092     "#  pragma OPENCL EXTENSION cl_amd_fp64: enable\n"
00093     "#endif\n"
00094     "__kernel void assign_double_to_float(\n"
00095     "          __global float * vec1,\n"
00096     "          __global const double * vec2, \n"
00097     "          unsigned int size) \n"
00098     "{ \n"
00099     "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00100     "    vec1[i] = (float)(vec2[i]);\n"
00101     "};\n\n"
00102     "__kernel void inplace_add_float_to_double(\n"
00103     "          __global double * vec1,\n"
00104     "          __global const float * vec2, \n"
00105     "          unsigned int size) \n"
00106     "{ \n"
00107     "  for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n"
00108     "    vec1[i] += (double)(vec2[i]);\n"
00109     "};\n";
00110 
00111 
00121     template <typename MatrixType, typename VectorType>
00122     VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag)
00123     {
00124       //typedef typename VectorType::value_type      ScalarType;
00125       typedef typename viennacl::result_of::value_type<VectorType>::type        ScalarType;
00126       typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type    CPU_ScalarType;
00127 
00128       //TODO: Assert CPU_ScalarType == double
00129 
00130       //std::cout << "Starting CG" << std::endl;
00131       vcl_size_t problem_size = viennacl::traits::size(rhs);
00132       VectorType result(rhs);
00133       viennacl::traits::clear(result);
00134 
00135       VectorType residual = rhs;
00136 
00137       CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs);
00138       CPU_ScalarType new_ip_rr = 0;
00139       CPU_ScalarType norm_rhs_squared = ip_rr;
00140 
00141       if (norm_rhs_squared == 0) //solution is zero if RHS norm is zero
00142         return result;
00143 
00144       static bool first = true;
00145 
00146       viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context());
00147       if (first)
00148       {
00149         ctx.add_program(double_float_conversion_program, "double_float_conversion_program");
00150       }
00151 
00152       viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs));
00153       viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs));
00154       viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs));
00155       viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs));
00156       float inner_ip_rr = static_cast<float>(ip_rr);
00157       float new_inner_ip_rr = 0;
00158       float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr);
00159       float alpha;
00160       float beta;
00161 
00162       viennacl::ocl::kernel & assign_double_to_float      = ctx.get_kernel("double_float_conversion_program", "assign_double_to_float");
00163       viennacl::ocl::kernel & inplace_add_float_to_double = ctx.get_kernel("double_float_conversion_program", "inplace_add_float_to_double");
00164 
00165       // transfer rhs to single precision:
00166       viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
00167                                                      rhs.handle().opencl_handle(),
00168                                                      cl_uint(rhs.size())
00169                                                     ) );
00170       //std::cout << "copying p_low_precision..." << std::endl;
00171       //assign_double_to_float(p_low_precision.handle(), residual.handle(), residual.size());
00172       residual_low_precision = p_low_precision;
00173 
00174       // transfer matrix to single precision:
00175       viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs));
00176       viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, sizeof(cl_uint) * (matrix.size1() + 1) );
00177       viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, sizeof(cl_uint) * (matrix.nnz()) );
00178 
00179       viennacl::ocl::enqueue( assign_double_to_float(matrix_low_precision.handle().opencl_handle(),
00180                                                      matrix.handle().opencl_handle(),
00181                                                      cl_uint(matrix.nnz())
00182                                                     ) );
00183       //std::cout << "copying matrix_low_precision..." << std::endl;
00184       //assign_double_to_float(const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle()), matrix.handle(), matrix.nnz());
00185 
00186       //std::cout << "Starting CG solver iterations... " << std::endl;
00187 
00188 
00189       for (unsigned int i = 0; i < tag.max_iterations(); ++i)
00190       {
00191         tag.iters(i+1);
00192 
00193         // lower precision 'inner iteration'
00194         tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision);
00195 
00196         alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision);
00197         result_low_precision += alpha * p_low_precision;
00198         residual_low_precision -= alpha * tmp_low_precision;
00199 
00200         new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision);
00201 
00202         beta = new_inner_ip_rr / inner_ip_rr;
00203         inner_ip_rr = new_inner_ip_rr;
00204 
00205         p_low_precision = residual_low_precision + beta * p_low_precision;
00206 
00207 
00208 
00209         if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1)
00210         {
00211           //std::cout << "outer correction at i=" << i << std::endl;
00212           //result += result_low_precision;
00213           viennacl::ocl::enqueue( inplace_add_float_to_double(result.handle().opencl_handle(),
00214                                                               result_low_precision.handle().opencl_handle(),
00215                                                               cl_uint(result.size())
00216                                                              ) );
00217 
00218           // residual = b - Ax  (without introducing a temporary)
00219           residual = viennacl::linalg::prod(matrix, result);
00220           residual = rhs - residual;
00221 
00222           new_ip_rr = viennacl::linalg::inner_prod(residual, residual);
00223           if (new_ip_rr / norm_rhs_squared < tag.tolerance() *  tag.tolerance())//squared norms involved here
00224             break;
00225 
00226           // p_low_precision = residual;
00227           viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(),
00228                                                          residual.handle().opencl_handle(),
00229                                                          cl_uint(residual.size())
00230                                                         ) );
00231           result_low_precision.clear();
00232           residual_low_precision = p_low_precision;
00233           initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr);
00234           inner_ip_rr = static_cast<float>(new_ip_rr);
00235         }
00236       }
00237 
00238       //store last error estimate:
00239       tag.error(std::sqrt(new_ip_rr / norm_rhs_squared));
00240 
00241       return result;
00242     }
00243 
00244     template <typename MatrixType, typename VectorType>
00245     VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond)
00246     {
00247       return solve(matrix, rhs, tag);
00248     }
00249 
00250 
00251   }
00252 }
00253 
00254 #endif