ViennaCL - The Vienna Computing Library  1.5.2
viennacl/matrix_proxy.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_MATRIX_PROXY_HPP_
00002 #define VIENNACL_MATRIX_PROXY_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include "viennacl/forwards.h"
00026 #include "viennacl/range.hpp"
00027 #include "viennacl/matrix.hpp"
00028 #include "viennacl/linalg/matrix_operations.hpp"
00029 
00030 namespace viennacl
00031 {
00032 
00037   template <typename MatrixType>
00038   class matrix_range : public matrix_base<typename MatrixType::cpu_value_type, typename MatrixType::orientation_functor>
00039   {
00040       typedef matrix_base<typename MatrixType::cpu_value_type,
00041                           typename MatrixType::orientation_functor>    base_type;
00042       typedef matrix_range<MatrixType>                                 self_type;
00043 
00044     public:
00045       typedef typename MatrixType::orientation_category       orientation_category;
00046 
00047       typedef typename MatrixType::value_type     value_type;
00048       typedef typename viennacl::result_of::cpu_value_type<value_type>::type    cpu_value_type;
00049       typedef range::size_type                    size_type;
00050       typedef range::difference_type              difference_type;
00051       typedef value_type                          reference;
00052       typedef const value_type &                  const_reference;
00053 
00054       matrix_range(MatrixType & A,
00055                    range const & row_range,
00056                    range const & col_range) : base_type(A.handle(),
00057                                                         row_range.size(), row_range.start(), 1, A.internal_size1(),
00058                                                         col_range.size(), col_range.start(), 1, A.internal_size2()) {}
00059 
00060       using base_type::operator=;
00061 
00062   };
00063 
00064 
00068 
00069   //row_major:
00070   template <typename CPU_MATRIX, typename SCALARTYPE>
00071   void copy(const CPU_MATRIX & cpu_matrix,
00072             matrix_range<matrix<SCALARTYPE, row_major, 1> > & gpu_matrix_range )
00073   {
00074     assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00075            && (cpu_matrix.size2() == gpu_matrix_range.size2())
00076            && bool("Matrix size mismatch!"));
00077 
00078     if ( gpu_matrix_range.start2() != 0)
00079     {
00080       std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
00081 
00082       //copy each stride separately:
00083       for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
00084       {
00085         for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
00086           entries[j] = cpu_matrix(i,j);
00087 
00088         vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
00089         vcl_size_t num_entries = gpu_matrix_range.size2();
00090         viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00091       //std::cout << "Strided copy worked!" << std::endl;
00092       }
00093     }
00094     else
00095     {
00096       //full block can be copied:
00097       std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
00098 
00099       //copy each stride separately:
00100       for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
00101         for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
00102           entries[i*gpu_matrix_range.internal_size2() + j] = cpu_matrix(i,j);
00103 
00104       vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
00105       vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.internal_size2();
00106       viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00107       //std::cout << "Block copy worked!" << std::endl;
00108     }
00109   }
00110 
00111   //column_major:
00112   template <typename CPU_MATRIX, typename SCALARTYPE>
00113   void copy(const CPU_MATRIX & cpu_matrix,
00114             matrix_range<matrix<SCALARTYPE, column_major, 1> > & gpu_matrix_range )
00115   {
00116     assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00117            && (cpu_matrix.size2() == gpu_matrix_range.size2())
00118            && bool("Matrix size mismatch!"));
00119 
00120      if ( gpu_matrix_range.start1() != 0 ||  gpu_matrix_range.size1() != gpu_matrix_range.size1())
00121      {
00122        std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
00123 
00124        //copy each stride separately:
00125        for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
00126        {
00127          for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
00128            entries[i] = cpu_matrix(i,j);
00129 
00130          vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
00131          vcl_size_t num_entries = gpu_matrix_range.size1();
00132          viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00133         //std::cout << "Strided copy worked!" << std::endl;
00134        }
00135      }
00136      else
00137      {
00138        //full block can be copied:
00139        std::vector<SCALARTYPE> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
00140 
00141        //copy each stride separately:
00142        for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
00143          for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
00144            entries[i + j*gpu_matrix_range.internal_size1()] = cpu_matrix(i,j);
00145 
00146        vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
00147        vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
00148        viennacl::backend::memory_write(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00149        //std::cout << "Block copy worked!" << std::endl;
00150      }
00151 
00152   }
00153 
00154 
00158 
00159 
00160   //row_major:
00161   template <typename CPU_MATRIX, typename SCALARTYPE>
00162   void copy(matrix_range<matrix<SCALARTYPE, row_major, 1> > const & gpu_matrix_range,
00163             CPU_MATRIX & cpu_matrix)
00164   {
00165     assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00166            && (cpu_matrix.size2() == gpu_matrix_range.size2())
00167            && bool("Matrix size mismatch!"));
00168 
00169      if ( gpu_matrix_range.start2() != 0)
00170      {
00171        std::vector<SCALARTYPE> entries(gpu_matrix_range.size2());
00172 
00173        //copy each stride separately:
00174        for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
00175        {
00176          vcl_size_t start_offset = (gpu_matrix_range.start1() + i) * gpu_matrix_range.internal_size2() + gpu_matrix_range.start2();
00177          vcl_size_t num_entries = gpu_matrix_range.size2();
00178          viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00179         //std::cout << "Strided copy worked!" << std::endl;
00180 
00181         for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
00182           cpu_matrix(i,j) = entries[j];
00183        }
00184      }
00185      else
00186      {
00187        //full block can be copied:
00188        std::vector<SCALARTYPE> entries(gpu_matrix_range.size1()*gpu_matrix_range.internal_size2());
00189 
00190        vcl_size_t start_offset = gpu_matrix_range.start1() * gpu_matrix_range.internal_size2();
00191        vcl_size_t num_entries = gpu_matrix_range.size1() * gpu_matrix_range.size2();
00192          viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00193        //std::cout << "Block copy worked!" << std::endl;
00194 
00195        for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
00196          for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
00197            cpu_matrix(i,j) = entries[i*gpu_matrix_range.internal_size2() + j];
00198     }
00199 
00200   }
00201 
00202 
00203   //column_major:
00204   template <typename CPU_MATRIX, typename SCALARTYPE>
00205   void copy(matrix_range<matrix<SCALARTYPE, column_major, 1> > const & gpu_matrix_range,
00206             CPU_MATRIX & cpu_matrix)
00207   {
00208     assert( (cpu_matrix.size1() == gpu_matrix_range.size1())
00209            && (cpu_matrix.size2() == gpu_matrix_range.size2())
00210            && bool("Matrix size mismatch!"));
00211 
00212      if ( gpu_matrix_range.start1() != 0)
00213      {
00214        std::vector<SCALARTYPE> entries(gpu_matrix_range.size1());
00215 
00216        //copy each stride separately:
00217        for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
00218        {
00219          vcl_size_t start_offset = (gpu_matrix_range.start2() + j) * gpu_matrix_range.internal_size1() + gpu_matrix_range.start1();
00220          vcl_size_t num_entries = gpu_matrix_range.size1();
00221          viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00222         //std::cout << "Strided copy worked!" << std::endl;
00223 
00224         for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
00225           cpu_matrix(i,j) = entries[i];
00226        }
00227      }
00228      else
00229      {
00230        //full block can be copied:
00231        std::vector<SCALARTYPE> entries(gpu_matrix_range.internal_size1()*gpu_matrix_range.size2());
00232 
00233        //copy each stride separately:
00234        vcl_size_t start_offset = gpu_matrix_range.start2() * gpu_matrix_range.internal_size1();
00235        vcl_size_t num_entries = gpu_matrix_range.internal_size1() * gpu_matrix_range.size2();
00236        viennacl::backend::memory_read(gpu_matrix_range.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00237        //std::cout << "Block copy worked!" << std::endl;
00238 
00239        for (vcl_size_t i=0; i < gpu_matrix_range.size1(); ++i)
00240          for (vcl_size_t j=0; j < gpu_matrix_range.size2(); ++j)
00241            cpu_matrix(i,j) = entries[i + j*gpu_matrix_range.internal_size1()];
00242      }
00243 
00244   }
00245 
00246 
00247   //
00248   // Convenience function
00249   //
00250   template <typename MatrixType>
00251   matrix_range<MatrixType> project(MatrixType & A, viennacl::range const & r1, viennacl::range const & r2)
00252   {
00253     assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of range invalid!"));
00254 
00255     return matrix_range<MatrixType>(A, r1, r2);
00256   }
00257 
00258 
00259   template <typename MatrixType>
00260   matrix_range<MatrixType> project(matrix_range<MatrixType> & A, viennacl::range const & r1, viennacl::range const & r2)
00261   {
00262     assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of range invalid!"));
00263 
00264     return matrix_range<MatrixType>(A,
00265                                     viennacl::range(A.start1() + r1.start(), A.start1() + r1.start() + r1.size()),
00266                                     viennacl::range(A.start2() + r2.start(), A.start2() + r2.start() + r2.size())
00267                                    );
00268   }
00269 
00270 
00271 
00272 
00273 //
00274 //
00275 //
00277 //
00278 //
00279 //
00280 
00281 
00282 
00283 
00284 
00289   template <typename MatrixType>
00290   class matrix_slice : public matrix_base<typename MatrixType::cpu_value_type, typename MatrixType::orientation_functor>
00291   {
00292       typedef matrix_base<typename MatrixType::cpu_value_type,
00293                           typename MatrixType::orientation_functor>    base_type;
00294       typedef matrix_slice<MatrixType>                                 self_type;
00295 
00296     public:
00297       typedef typename MatrixType::orientation_category       orientation_category;
00298 
00299       typedef typename MatrixType::value_type     value_type;
00300       typedef typename viennacl::result_of::cpu_value_type<value_type>::type    cpu_value_type;
00301       typedef range::size_type                    size_type;
00302       typedef range::difference_type              difference_type;
00303       typedef value_type                          reference;
00304       typedef const value_type &                  const_reference;
00305 
00306       matrix_slice(MatrixType & A,
00307                    slice const & row_slice,
00308                    slice const & col_slice) : base_type(A.handle(),
00309                                                         row_slice.size(), row_slice.start(), row_slice.stride(), A.internal_size1(),
00310                                                         col_slice.size(), col_slice.start(), col_slice.stride(), A.internal_size2()) {}
00311 
00312       using base_type::operator=;
00313 
00314   };
00315 
00316 
00317 
00321 
00322   //row_major:
00323   template <typename CPU_MATRIX, typename SCALARTYPE>
00324   void copy(const CPU_MATRIX & cpu_matrix,
00325             matrix_slice<matrix<SCALARTYPE, row_major, 1> > & gpu_matrix_slice )
00326   {
00327     assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
00328            && (cpu_matrix.size2() == gpu_matrix_slice.size2())
00329            && bool("Matrix size mismatch!"));
00330 
00331      if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
00332      {
00333        vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2(); //no. of entries per stride
00334 
00335        std::vector<SCALARTYPE> entries(num_entries);
00336 
00337        //copy each stride separately:
00338        for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
00339        {
00340          vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
00341          viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00342 
00343          for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
00344            entries[j * gpu_matrix_slice.stride2()] = cpu_matrix(i,j);
00345 
00346          viennacl::backend::memory_write(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00347        }
00348      }
00349   }
00350 
00351   //column_major:
00352   template <typename CPU_MATRIX, typename SCALARTYPE>
00353   void copy(const CPU_MATRIX & cpu_matrix,
00354             matrix_slice<matrix<SCALARTYPE, column_major, 1> > & gpu_matrix_slice )
00355   {
00356     assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
00357            && (cpu_matrix.size2() == gpu_matrix_slice.size2())
00358            && bool("Matrix size mismatch!"));
00359 
00360 
00361     if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
00362     {
00363       vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); //no. of entries per stride
00364 
00365       std::vector<SCALARTYPE> entries(num_entries);
00366 
00367       //copy each column stride separately:
00368       for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
00369       {
00370         vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
00371 
00372         viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00373 
00374         for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
00375           entries[i * gpu_matrix_slice.stride1()] = cpu_matrix(i,j);
00376 
00377         viennacl::backend::memory_write(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00378       }
00379     }
00380 
00381   }
00382 
00383 
00387 
00388 
00389   //row_major:
00390   template <typename CPU_MATRIX, typename SCALARTYPE>
00391   void copy(matrix_slice<matrix<SCALARTYPE, row_major, 1> > const & gpu_matrix_slice,
00392             CPU_MATRIX & cpu_matrix)
00393   {
00394     assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
00395            && (cpu_matrix.size2() == gpu_matrix_slice.size2())
00396            && bool("Matrix size mismatch!"));
00397 
00398      if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
00399      {
00400        vcl_size_t num_entries = gpu_matrix_slice.size2() * gpu_matrix_slice.stride2(); //no. of entries per stride
00401 
00402        std::vector<SCALARTYPE> entries(num_entries);
00403 
00404        //copy each stride separately:
00405        for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
00406        {
00407          vcl_size_t start_offset = (gpu_matrix_slice.start1() + i * gpu_matrix_slice.stride1()) * gpu_matrix_slice.internal_size2() + gpu_matrix_slice.start2();
00408 
00409          viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00410 
00411          for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
00412            cpu_matrix(i,j) = entries[j * gpu_matrix_slice.stride2()];
00413        }
00414      }
00415 
00416   }
00417 
00418 
00419   //column_major:
00420   template <typename CPU_MATRIX, typename SCALARTYPE>
00421   void copy(matrix_slice<matrix<SCALARTYPE, column_major, 1> > const & gpu_matrix_slice,
00422             CPU_MATRIX & cpu_matrix)
00423   {
00424     assert( (cpu_matrix.size1() == gpu_matrix_slice.size1())
00425            && (cpu_matrix.size2() == gpu_matrix_slice.size2())
00426            && bool("Matrix size mismatch!"));
00427 
00428     if ( (gpu_matrix_slice.size1() > 0) && (gpu_matrix_slice.size1() > 0) )
00429     {
00430       vcl_size_t num_entries = gpu_matrix_slice.size1() * gpu_matrix_slice.stride1(); //no. of entries per stride
00431 
00432       std::vector<SCALARTYPE> entries(num_entries);
00433 
00434       //copy each column stride separately:
00435       for (vcl_size_t j=0; j < gpu_matrix_slice.size2(); ++j)
00436       {
00437         vcl_size_t start_offset = gpu_matrix_slice.start1() + (gpu_matrix_slice.start2() + j * gpu_matrix_slice.stride2()) * gpu_matrix_slice.internal_size1();
00438 
00439         viennacl::backend::memory_read(gpu_matrix_slice.handle(), sizeof(SCALARTYPE)*start_offset, sizeof(SCALARTYPE)*num_entries, &(entries[0]));
00440 
00441         for (vcl_size_t i=0; i < gpu_matrix_slice.size1(); ++i)
00442           cpu_matrix(i,j) = entries[i * gpu_matrix_slice.stride1()];
00443       }
00444     }
00445 
00446   }
00447 
00448 
00449   //
00450   // Convenience function
00451   //
00452   template <typename MatrixType>
00453   matrix_slice<MatrixType> project(MatrixType & A, viennacl::slice const & r1, viennacl::slice const & r2)
00454   {
00455     assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!"));
00456 
00457     return matrix_slice<MatrixType>(A, r1, r2);
00458   }
00459 
00460   template <typename MatrixType>
00461   matrix_slice<MatrixType> project(matrix_range<MatrixType> & A, viennacl::slice const & r1, viennacl::slice const & r2)
00462   {
00463     assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!"));
00464 
00465     return matrix_slice<MatrixType>(A,
00466                                     viennacl::slice(A.start1() + r1.start(), r1.stride(), r1.size()),
00467                                     viennacl::slice(A.start2() + r2.start(), r2.stride(), r2.size())
00468                                    );
00469   }
00470 
00471   template <typename MatrixType>
00472   matrix_slice<MatrixType> project(matrix_slice<MatrixType> & A, viennacl::slice const & r1, viennacl::slice const & r2)
00473   {
00474     assert(r1.size() <= A.size1() && r2.size() <= A.size2() && bool("Size of slice invalid!"));
00475 
00476     return matrix_slice<MatrixType>(A,
00477                                     viennacl::slice(A.start1() + r1.start(), A.stride1() * r1.stride(), r1.size()),
00478                                     viennacl::slice(A.start2() + r2.start(), A.stride2() * r2.stride(), r2.size())
00479                                    );
00480   }
00481 
00482   // TODO: Allow mix of range/slice
00483 
00484 }
00485 
00486 #endif