ViennaCL - The Vienna Computing Library  1.5.2
viennacl/ell_matrix.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_ELL_MATRIX_HPP_
00002 #define VIENNACL_ELL_MATRIX_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00028 #include "viennacl/forwards.h"
00029 #include "viennacl/vector.hpp"
00030 
00031 #include "viennacl/tools/tools.hpp"
00032 
00033 #include "viennacl/linalg/sparse_matrix_operations.hpp"
00034 
00035 namespace viennacl
00036 {
00052     template<typename SCALARTYPE, unsigned int ALIGNMENT /* see forwards.h for default argument */>
00053     class ell_matrix
00054     {
00055       public:
00056         typedef viennacl::backend::mem_handle                                                              handle_type;
00057         typedef scalar<typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT<SCALARTYPE>::ResultType>   value_type;
00058         typedef vcl_size_t                                                                                 size_type;
00059 
00060         ell_matrix() : rows_(0), cols_(0), maxnnz_(0) {}
00061 
00062         ell_matrix(viennacl::context ctx) : rows_(0), cols_(0), maxnnz_(0)
00063         {
00064             coords_.switch_active_handle_id(ctx.memory_type());
00065           elements_.switch_active_handle_id(ctx.memory_type());
00066 
00067 #ifdef VIENNACL_WITH_OPENCL
00068           if (ctx.memory_type() == OPENCL_MEMORY)
00069           {
00070               coords_.opencl_handle().context(ctx.opencl_context());
00071             elements_.opencl_handle().context(ctx.opencl_context());
00072           }
00073 #endif
00074         }
00075 
00076       public:
00077         vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, ALIGNMENT); }
00078         vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, ALIGNMENT); }
00079 
00080         vcl_size_t size1() const { return rows_; }
00081         vcl_size_t size2() const { return cols_; }
00082 
00083         vcl_size_t internal_maxnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(maxnnz_, ALIGNMENT); }
00084         vcl_size_t maxnnz() const { return maxnnz_; }
00085 
00086         vcl_size_t nnz() const { return rows_ * maxnnz_; }
00087         vcl_size_t internal_nnz() const { return internal_size1() * internal_maxnnz(); }
00088 
00089               handle_type & handle()       { return elements_; }
00090         const handle_type & handle() const { return elements_; }
00091 
00092               handle_type & handle2()       { return coords_; }
00093         const handle_type & handle2() const { return coords_; }
00094 
00095       #if defined(_MSC_VER) && _MSC_VER < 1500          //Visual Studio 2005 needs special treatment
00096         template <typename CPU_MATRIX>
00097         friend void copy(const CPU_MATRIX & cpu_matrix, ell_matrix & gpu_matrix );
00098       #else
00099         template <typename CPU_MATRIX, typename T, unsigned int ALIGN>
00100         friend void copy(const CPU_MATRIX & cpu_matrix, ell_matrix<T, ALIGN> & gpu_matrix );
00101       #endif
00102 
00103       private:
00104         vcl_size_t rows_;
00105         vcl_size_t cols_;
00106         vcl_size_t maxnnz_;
00107 
00108         handle_type coords_;
00109         handle_type elements_;
00110     };
00111 
00112     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00113     void copy(const CPU_MATRIX& cpu_matrix, ell_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix )
00114     {
00115       assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
00116       assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
00117 
00118       if(cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0)
00119       {
00120         //determine max capacity for row
00121         vcl_size_t max_entries_per_row = 0;
00122         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
00123         {
00124           vcl_size_t num_entries = 0;
00125           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
00126           {
00127               ++num_entries;
00128           }
00129 
00130           max_entries_per_row = std::max(max_entries_per_row, num_entries);
00131         }
00132 
00133         //setup GPU matrix
00134         gpu_matrix.maxnnz_ = max_entries_per_row;
00135         gpu_matrix.rows_ = cpu_matrix.size1();
00136         gpu_matrix.cols_ = cpu_matrix.size2();
00137 
00138         vcl_size_t nnz = gpu_matrix.internal_nnz();
00139 
00140         viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), nnz);
00141         std::vector<SCALARTYPE> elements(nnz, 0);
00142 
00143         // std::cout << "ELL_MATRIX copy " << gpu_matrix.maxnnz_ << " " << gpu_matrix.rows_ << " " << gpu_matrix.cols_ << " "
00144         //             << gpu_matrix.internal_maxnnz() << "\n";
00145 
00146         for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
00147         {
00148           vcl_size_t data_index = 0;
00149 
00150           for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
00151           {
00152             coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2());
00153             elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it;
00154             //std::cout << *col_it << "\n";
00155               data_index++;
00156           }
00157         }
00158 
00159         viennacl::backend::memory_create(gpu_matrix.handle2(), coords.raw_size(),                   traits::context(gpu_matrix.handle2()), coords.get());
00160         viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * elements.size(), traits::context(gpu_matrix.handle()), &(elements[0]));
00161       }
00162     }
00163 
00164     template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
00165     void copy(const ell_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix, CPU_MATRIX& cpu_matrix)
00166     {
00167       assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
00168       assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
00169 
00170       if(gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)
00171       {
00172         std::vector<SCALARTYPE> elements(gpu_matrix.internal_nnz());
00173         viennacl::backend::typesafe_host_array<unsigned int> coords(gpu_matrix.handle2(), gpu_matrix.internal_nnz());
00174 
00175         viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * elements.size(), &(elements[0]));
00176         viennacl::backend::memory_read(gpu_matrix.handle2(), 0, coords.raw_size(), coords.get());
00177 
00178         for(vcl_size_t row = 0; row < gpu_matrix.size1(); row++)
00179         {
00180           for(vcl_size_t ind = 0; ind < gpu_matrix.internal_maxnnz(); ind++)
00181           {
00182             vcl_size_t offset = gpu_matrix.internal_size1() * ind + row;
00183 
00184             if(elements[offset] == static_cast<SCALARTYPE>(0.0))
00185                 continue;
00186 
00187             if(coords[offset] >= gpu_matrix.size2())
00188             {
00189                 std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << coords[offset] << " " << gpu_matrix.size2() << std::endl;
00190                 return;
00191             }
00192 
00193             cpu_matrix(row, coords[offset]) = elements[offset];
00194           }
00195         }
00196       }
00197     }
00198 
00199 
00200     //
00201     // Specify available operations:
00202     //
00203 
00206     namespace linalg
00207     {
00208       namespace detail
00209       {
00210         // x = A * y
00211         template <typename T, unsigned int A>
00212         struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> >
00213         {
00214             static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
00215             {
00216               // check for the special case x = A * x
00217               if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
00218               {
00219                 viennacl::vector<T> temp(lhs);
00220                 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
00221                 lhs = temp;
00222               }
00223               else
00224                 viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
00225             }
00226         };
00227 
00228         template <typename T, unsigned int A>
00229         struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> >
00230         {
00231             static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
00232             {
00233               viennacl::vector<T> temp(lhs);
00234               viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
00235               lhs += temp;
00236             }
00237         };
00238 
00239         template <typename T, unsigned int A>
00240         struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> >
00241         {
00242             static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
00243             {
00244               viennacl::vector<T> temp(lhs);
00245               viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
00246               lhs -= temp;
00247             }
00248         };
00249 
00250 
00251         // x = A * vec_op
00252         template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
00253         struct op_executor<vector_base<T>, op_assign, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
00254         {
00255             static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
00256             {
00257               viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
00258               viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
00259             }
00260         };
00261 
00262         // x = A * vec_op
00263         template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
00264         struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
00265         {
00266             static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
00267             {
00268               viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
00269               viennacl::vector<T> temp_result(lhs);
00270               viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
00271               lhs += temp_result;
00272             }
00273         };
00274 
00275         // x = A * vec_op
00276         template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
00277         struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
00278         {
00279             static void apply(vector_base<T> & lhs, vector_expression<const ell_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
00280             {
00281               viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
00282               viennacl::vector<T> temp_result(lhs);
00283               viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
00284               lhs -= temp_result;
00285             }
00286         };
00287 
00288      } // namespace detail
00289    } // namespace linalg
00290 
00292 }
00293 
00294 #endif
00295 
00296