ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_ 00002 #define VIENNACL_LINALG_MIXED_PRECISION_CG_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include <vector> 00026 #include <map> 00027 #include <cmath> 00028 #include "viennacl/forwards.h" 00029 #include "viennacl/tools/tools.hpp" 00030 #include "viennacl/linalg/ilu.hpp" 00031 #include "viennacl/linalg/prod.hpp" 00032 #include "viennacl/linalg/inner_prod.hpp" 00033 #include "viennacl/traits/clear.hpp" 00034 #include "viennacl/traits/size.hpp" 00035 #include "viennacl/meta/result_of.hpp" 00036 #include "viennacl/ocl/backend.hpp" 00037 #include "viennacl/ocl/kernel.hpp" 00038 #include "viennacl/backend/memory.hpp" 00039 00040 #include "viennacl/vector_proxy.hpp" 00041 00042 namespace viennacl 00043 { 00044 namespace linalg 00045 { 00046 00049 class mixed_precision_cg_tag 00050 { 00051 public: 00058 mixed_precision_cg_tag(double tol = 1e-8, unsigned int max_iterations = 300, float inner_tol = 1e-2f) : tol_(tol), iterations_(max_iterations), inner_tol_(inner_tol) {} 00059 00061 double tolerance() const { return tol_; } 00063 float inner_tolerance() const { return inner_tol_; } 00065 unsigned int max_iterations() const { return iterations_; } 00066 00068 unsigned int iters() const { return iters_taken_; } 00069 void iters(unsigned int i) const { iters_taken_ = i; } 00070 00072 double error() const { return last_error_; } 00074 void error(double e) const { last_error_ = e; } 00075 00076 00077 private: 00078 double tol_; 00079 unsigned int iterations_; 00080 float inner_tol_; 00081 00082 //return values from solver 00083 mutable unsigned int iters_taken_; 00084 mutable double last_error_; 00085 }; 00086 00087 00088 const char * double_float_conversion_program = 00089 "#if defined(cl_khr_fp64)\n" 00090 "# pragma OPENCL EXTENSION cl_khr_fp64: enable\n" 00091 "#elif defined(cl_amd_fp64)\n" 00092 "# pragma OPENCL EXTENSION cl_amd_fp64: enable\n" 00093 "#endif\n" 00094 "__kernel void assign_double_to_float(\n" 00095 " __global float * vec1,\n" 00096 " __global const double * vec2, \n" 00097 " unsigned int size) \n" 00098 "{ \n" 00099 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00100 " vec1[i] = (float)(vec2[i]);\n" 00101 "};\n\n" 00102 "__kernel void inplace_add_float_to_double(\n" 00103 " __global double * vec1,\n" 00104 " __global const float * vec2, \n" 00105 " unsigned int size) \n" 00106 "{ \n" 00107 " for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0))\n" 00108 " vec1[i] += (double)(vec2[i]);\n" 00109 "};\n"; 00110 00111 00121 template <typename MatrixType, typename VectorType> 00122 VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag) 00123 { 00124 //typedef typename VectorType::value_type ScalarType; 00125 typedef typename viennacl::result_of::value_type<VectorType>::type ScalarType; 00126 typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType; 00127 00128 //TODO: Assert CPU_ScalarType == double 00129 00130 //std::cout << "Starting CG" << std::endl; 00131 vcl_size_t problem_size = viennacl::traits::size(rhs); 00132 VectorType result(rhs); 00133 viennacl::traits::clear(result); 00134 00135 VectorType residual = rhs; 00136 00137 CPU_ScalarType ip_rr = viennacl::linalg::inner_prod(rhs, rhs); 00138 CPU_ScalarType new_ip_rr = 0; 00139 CPU_ScalarType norm_rhs_squared = ip_rr; 00140 00141 if (norm_rhs_squared == 0) //solution is zero if RHS norm is zero 00142 return result; 00143 00144 static bool first = true; 00145 00146 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context()); 00147 if (first) 00148 { 00149 ctx.add_program(double_float_conversion_program, "double_float_conversion_program"); 00150 } 00151 00152 viennacl::vector<float> residual_low_precision(problem_size, viennacl::traits::context(rhs)); 00153 viennacl::vector<float> result_low_precision(problem_size, viennacl::traits::context(rhs)); 00154 viennacl::vector<float> p_low_precision(problem_size, viennacl::traits::context(rhs)); 00155 viennacl::vector<float> tmp_low_precision(problem_size, viennacl::traits::context(rhs)); 00156 float inner_ip_rr = static_cast<float>(ip_rr); 00157 float new_inner_ip_rr = 0; 00158 float initial_inner_rhs_norm_squared = static_cast<float>(ip_rr); 00159 float alpha; 00160 float beta; 00161 00162 viennacl::ocl::kernel & assign_double_to_float = ctx.get_kernel("double_float_conversion_program", "assign_double_to_float"); 00163 viennacl::ocl::kernel & inplace_add_float_to_double = ctx.get_kernel("double_float_conversion_program", "inplace_add_float_to_double"); 00164 00165 // transfer rhs to single precision: 00166 viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(), 00167 rhs.handle().opencl_handle(), 00168 cl_uint(rhs.size()) 00169 ) ); 00170 //std::cout << "copying p_low_precision..." << std::endl; 00171 //assign_double_to_float(p_low_precision.handle(), residual.handle(), residual.size()); 00172 residual_low_precision = p_low_precision; 00173 00174 // transfer matrix to single precision: 00175 viennacl::compressed_matrix<float> matrix_low_precision(matrix.size1(), matrix.size2(), matrix.nnz(), viennacl::traits::context(rhs)); 00176 viennacl::backend::memory_copy(matrix.handle1(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle1()), 0, 0, sizeof(cl_uint) * (matrix.size1() + 1) ); 00177 viennacl::backend::memory_copy(matrix.handle2(), const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle2()), 0, 0, sizeof(cl_uint) * (matrix.nnz()) ); 00178 00179 viennacl::ocl::enqueue( assign_double_to_float(matrix_low_precision.handle().opencl_handle(), 00180 matrix.handle().opencl_handle(), 00181 cl_uint(matrix.nnz()) 00182 ) ); 00183 //std::cout << "copying matrix_low_precision..." << std::endl; 00184 //assign_double_to_float(const_cast<viennacl::backend::mem_handle &>(matrix_low_precision.handle()), matrix.handle(), matrix.nnz()); 00185 00186 //std::cout << "Starting CG solver iterations... " << std::endl; 00187 00188 00189 for (unsigned int i = 0; i < tag.max_iterations(); ++i) 00190 { 00191 tag.iters(i+1); 00192 00193 // lower precision 'inner iteration' 00194 tmp_low_precision = viennacl::linalg::prod(matrix_low_precision, p_low_precision); 00195 00196 alpha = inner_ip_rr / viennacl::linalg::inner_prod(tmp_low_precision, p_low_precision); 00197 result_low_precision += alpha * p_low_precision; 00198 residual_low_precision -= alpha * tmp_low_precision; 00199 00200 new_inner_ip_rr = viennacl::linalg::inner_prod(residual_low_precision, residual_low_precision); 00201 00202 beta = new_inner_ip_rr / inner_ip_rr; 00203 inner_ip_rr = new_inner_ip_rr; 00204 00205 p_low_precision = residual_low_precision + beta * p_low_precision; 00206 00207 00208 00209 if (new_inner_ip_rr < tag.inner_tolerance() * initial_inner_rhs_norm_squared || i == tag.max_iterations()-1) 00210 { 00211 //std::cout << "outer correction at i=" << i << std::endl; 00212 //result += result_low_precision; 00213 viennacl::ocl::enqueue( inplace_add_float_to_double(result.handle().opencl_handle(), 00214 result_low_precision.handle().opencl_handle(), 00215 cl_uint(result.size()) 00216 ) ); 00217 00218 // residual = b - Ax (without introducing a temporary) 00219 residual = viennacl::linalg::prod(matrix, result); 00220 residual = rhs - residual; 00221 00222 new_ip_rr = viennacl::linalg::inner_prod(residual, residual); 00223 if (new_ip_rr / norm_rhs_squared < tag.tolerance() * tag.tolerance())//squared norms involved here 00224 break; 00225 00226 // p_low_precision = residual; 00227 viennacl::ocl::enqueue( assign_double_to_float(p_low_precision.handle().opencl_handle(), 00228 residual.handle().opencl_handle(), 00229 cl_uint(residual.size()) 00230 ) ); 00231 result_low_precision.clear(); 00232 residual_low_precision = p_low_precision; 00233 initial_inner_rhs_norm_squared = static_cast<float>(new_ip_rr); 00234 inner_ip_rr = static_cast<float>(new_ip_rr); 00235 } 00236 } 00237 00238 //store last error estimate: 00239 tag.error(std::sqrt(new_ip_rr / norm_rhs_squared)); 00240 00241 return result; 00242 } 00243 00244 template <typename MatrixType, typename VectorType> 00245 VectorType solve(const MatrixType & matrix, VectorType const & rhs, mixed_precision_cg_tag const & tag, viennacl::linalg::no_precond) 00246 { 00247 return solve(matrix, rhs, tag); 00248 } 00249 00250 00251 } 00252 } 00253 00254 #endif