ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_ELL_MATRIX_HPP_ 00002 #define VIENNACL_ELL_MATRIX_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00028 #include "viennacl/forwards.h" 00029 #include "viennacl/vector.hpp" 00030 00031 #include "viennacl/tools/tools.hpp" 00032 00033 #include "viennacl/linalg/sparse_matrix_operations.hpp" 00034 00035 namespace viennacl 00036 { 00052 template<typename SCALARTYPE, unsigned int ALIGNMENT /* see forwards.h for default argument */> 00053 class ell_matrix 00054 { 00055 public: 00056 typedef viennacl::backend::mem_handle handle_type; 00057 typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType> value_type; 00058 typedef vcl_size_t size_type; 00059 00060 ell_matrix() : rows_(0), cols_(0), maxnnz_(0) {} 00061 00062 ell_matrix(viennacl::context ctx) : rows_(0), cols_(0), maxnnz_(0) 00063 { 00064 coords_.switch_active_handle_id(ctx.memory_type()); 00065 elements_.switch_active_handle_id(ctx.memory_type()); 00066 00067 #ifdef VIENNACL_WITH_OPENCL 00068 if (ctx.memory_type() == OPENCL_MEMORY) 00069 { 00070 coords_.opencl_handle().context(ctx.opencl_context()); 00071 elements_.opencl_handle().context(ctx.opencl_context()); 00072 } 00073 #endif 00074 } 00075 00076 public: 00077 vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, ALIGNMENT); } 00078 vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, ALIGNMENT); } 00079 00080 vcl_size_t size1() const { return rows_; } 00081 vcl_size_t size2() const { return cols_; } 00082 00083 vcl_size_t internal_maxnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(maxnnz_, ALIGNMENT); } 00084 vcl_size_t maxnnz() const { return maxnnz_; } 00085 00086 vcl_size_t nnz() const { return rows_ * maxnnz_; } 00087 vcl_size_t internal_nnz() const { return internal_size1() * internal_maxnnz(); } 00088 00089 handle_type & handle() { return elements_; } 00090 const handle_type & handle() const { return elements_; } 00091 00092 handle_type & handle2() { return coords_; } 00093 const handle_type & handle2() const { return coords_; } 00094 00095 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment 00096 template <typename CPU_MATRIX> 00097 friend void copy(const CPU_MATRIX & cpu_matrix, ell_matrix & gpu_matrix ); 00098 #else 00099 template <typename CPU_MATRIX, typename T, unsigned int ALIGN> 00100 friend void copy(const CPU_MATRIX & cpu_matrix, ell_matrix<T, ALIGN> & gpu_matrix ); 00101 #endif 00102 00103 private: 00104 vcl_size_t rows_; 00105 vcl_size_t cols_; 00106 vcl_size_t maxnnz_; 00107 00108 handle_type coords_; 00109 handle_type elements_; 00110 }; 00111 00112 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00113 void copy(const CPU_MATRIX& cpu_matrix, ell_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix ) 00114 { 00115 assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00116 assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00117 00118 if(cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0) 00119 { 00120 //determine max capacity for row 00121 vcl_size_t max_entries_per_row = 0; 00122 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) 00123 { 00124 vcl_size_t num_entries = 0; 00125 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) 00126 { 00127 ++num_entries; 00128 } 00129 00130 max_entries_per_row = std::max(max_entries_per_row, num_entries); 00131 } 00132 00133 //setup GPU matrix 00134 gpu_matrix.maxnnz_ = max_entries_per_row; 00135 gpu_matrix.rows_ = cpu_matrix.size1(); 00136 gpu_matrix.cols_ = cpu_matrix.size2(); 00137 00138 vcl_size_t nnz = gpu_matrix.internal_nnz(); 00139 00140 viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), nnz); 00141 std::vector<SCALARTYPE> elements(nnz, 0); 00142 00143 // std::cout << "ELL_MATRIX copy " << gpu_matrix.maxnnz_ << " " << gpu_matrix.rows_ << " " << gpu_matrix.cols_ << " " 00144 // << gpu_matrix.internal_maxnnz() << "\n"; 00145 00146 for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it) 00147 { 00148 vcl_size_t data_index = 0; 00149 00150 for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it) 00151 { 00152 coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2()); 00153 elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it; 00154 //std::cout << *col_it << "\n"; 00155 data_index++; 00156 } 00157 } 00158 00159 viennacl::backend::memory_create(gpu_matrix.handle2(), coords.raw_size(), traits::context(gpu_matrix.handle2()), coords.get()); 00160 viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * elements.size(), traits::context(gpu_matrix.handle()), &(elements[0])); 00161 } 00162 } 00163 00164 template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT> 00165 void copy(const ell_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix, CPU_MATRIX& cpu_matrix) 00166 { 00167 assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") ); 00168 assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") ); 00169 00170 if(gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0) 00171 { 00172 std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz()); 00173 viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), gpu_matrix.internal_nnz()); 00174 00175 viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * elements.size(), &(elements[0])); 00176 viennacl::backend::memory_read(gpu_matrix.handle2(), 0, coords.raw_size(), coords.get()); 00177 00178 for(vcl_size_t row = 0; row < gpu_matrix.size1(); row++) 00179 { 00180 for(vcl_size_t ind = 0; ind < gpu_matrix.internal_maxnnz(); ind++) 00181 { 00182 vcl_size_t offset = gpu_matrix.internal_size1() * ind + row; 00183 00184 if(elements[offset] == static_cast<SCALARTYPE>(0.0)) 00185 continue; 00186 00187 if(coords[offset] >= gpu_matrix.size2()) 00188 { 00189 std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << coords[offset] << " " << gpu_matrix.size2() << std::endl; 00190 return; 00191 } 00192 00193 cpu_matrix(row, coords[offset]) = elements[offset]; 00194 } 00195 } 00196 } 00197 } 00198 00199 00200 // 00201 // Specify available operations: 00202 // 00203 00206 namespace linalg 00207 { 00208 namespace detail 00209 { 00210 // x = A * y 00211 template <typename T, unsigned int A> 00212 struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> > 00213 { 00214 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00215 { 00216 // check for the special case x = A * x 00217 if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs())) 00218 { 00219 viennacl::vector<T> temp(lhs); 00220 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00221 lhs = temp; 00222 } 00223 else 00224 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs); 00225 } 00226 }; 00227 00228 template <typename T, unsigned int A> 00229 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> > 00230 { 00231 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00232 { 00233 viennacl::vector<T> temp(lhs); 00234 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00235 lhs += temp; 00236 } 00237 }; 00238 00239 template <typename T, unsigned int A> 00240 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> > 00241 { 00242 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs) 00243 { 00244 viennacl::vector<T> temp(lhs); 00245 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp); 00246 lhs -= temp; 00247 } 00248 }; 00249 00250 00251 // x = A * vec_op 00252 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00253 struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00254 { 00255 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00256 { 00257 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00258 viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs); 00259 } 00260 }; 00261 00262 // x = A * vec_op 00263 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00264 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00265 { 00266 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00267 { 00268 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00269 viennacl::vector<T> temp_result(lhs); 00270 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); 00271 lhs += temp_result; 00272 } 00273 }; 00274 00275 // x = A * vec_op 00276 template <typename T, unsigned int A, typename LHS, typename RHS, typename OP> 00277 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> > 00278 { 00279 static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs) 00280 { 00281 viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs)); 00282 viennacl::vector<T> temp_result(lhs); 00283 viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result); 00284 lhs -= temp_result; 00285 } 00286 }; 00287 00288 } // namespace detail 00289 } // namespace linalg 00290 00292 } 00293 00294 #endif 00295 00296