ViennaCL - The Vienna Computing Library  1.5.2
viennacl/matrix.hpp
Go to the documentation of this file.
00001 #ifndef VIENNACL_MATRIX_HPP_
00002 #define VIENNACL_MATRIX_HPP_
00003 
00004 /* =========================================================================
00005    Copyright (c) 2010-2014, Institute for Microelectronics,
00006                             Institute for Analysis and Scientific Computing,
00007                             TU Wien.
00008    Portions of this software are copyright by UChicago Argonne, LLC.
00009 
00010                             -----------------
00011                   ViennaCL - The Vienna Computing Library
00012                             -----------------
00013 
00014    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
00015 
00016    (A list of authors and contributors can be found in the PDF manual)
00017 
00018    License:         MIT (X11), see file LICENSE in the base directory
00019 ============================================================================= */
00020 
00025 #include "viennacl/forwards.h"
00026 #include "viennacl/scalar.hpp"
00027 #include "viennacl/vector.hpp"
00028 #include "viennacl/linalg/matrix_operations.hpp"
00029 #include "viennacl/linalg/sparse_matrix_operations.hpp"
00030 #include "viennacl/tools/tools.hpp"
00031 #include "viennacl/tools/matrix_size_deducer.hpp"
00032 #include "viennacl/meta/result_of.hpp"
00033 #include "viennacl/meta/enable_if.hpp"
00034 //#include "viennacl/rand/utils.hpp"
00035 #include "viennacl/traits/handle.hpp"
00036 
00037 namespace viennacl
00038 {
00043   template<typename SCALARTYPE>
00044   class implicit_matrix_base
00045   {
00046     protected:
00047       typedef vcl_size_t        size_type;
00048       implicit_matrix_base(size_type size1, size_type size2, std::pair<SCALARTYPE, bool> value, bool diag) : size1_(size1), size2_(size2), value_(value), diag_(diag){ }
00049     public:
00050       typedef SCALARTYPE const & const_reference;
00051       typedef SCALARTYPE cpu_value_type;
00052 
00053       size_type size1() const { return size1_; }
00054       size_type size2() const { return size2_; }
00055 
00056       SCALARTYPE  value() const { return value_.first; }
00057       bool is_value_static( ) const { return value_.second; }
00058       bool diag() const { return diag_; }
00059 
00060       const_reference operator()(size_type i, size_type j) const {
00061         if(diag_) return (i == j) ? value_.first : 0;
00062         return value_.first;
00063       }
00064 
00065     protected:
00066       size_type size1_;
00067       size_type size2_;
00068       std::pair<SCALARTYPE, bool> value_;
00069       bool diag_;
00070   };
00071 
00072   //
00073   // Initializer types
00074   //
00076   template <typename SCALARTYPE>
00077   class identity_matrix
00078   {
00079     public:
00080       typedef vcl_size_t         size_type;
00081       typedef SCALARTYPE const & const_reference;
00082 
00083       identity_matrix(size_type s, viennacl::context ctx = viennacl::context()) : size_(s), diag_(1), off_diag_(0), ctx_(ctx) {}
00084 
00085       size_type size1() const { return size_; }
00086       size_type size2() const { return size_; }
00087       const_reference operator()(size_type i, size_type j) const { return (i == j) ? diag_ : off_diag_; }
00088 
00089       viennacl::context context() const { return ctx_; }
00090 
00091     private:
00092       size_type size_;
00093       SCALARTYPE diag_;
00094       SCALARTYPE off_diag_;
00095       viennacl::context ctx_;
00096   };
00097 
00098 
00100   template <typename SCALARTYPE>
00101   class zero_matrix
00102   {
00103     public:
00104       typedef vcl_size_t         size_type;
00105       typedef SCALARTYPE const & const_reference;
00106 
00107       zero_matrix(size_type s1, size_type s2, viennacl::context ctx = viennacl::context()) : size1_(s1), size2_(s2), val_(0), ctx_(ctx) {}
00108 
00109       size_type size1() const { return size1_; }
00110       size_type size2() const { return size2_; }
00111       const_reference operator()(size_type /*i*/, size_type /*j*/) const { return val_; }
00112 
00113       viennacl::context context() const { return ctx_; }
00114 
00115     private:
00116       size_type size1_;
00117       size_type size2_;
00118       SCALARTYPE val_;
00119       viennacl::context ctx_;
00120   };
00121 
00122 
00124   template <typename SCALARTYPE>
00125   class scalar_matrix
00126   {
00127     public:
00128       typedef vcl_size_t         size_type;
00129       typedef SCALARTYPE const & const_reference;
00130 
00131       scalar_matrix(size_type s1, size_type s2, const_reference val, viennacl::context ctx = viennacl::context()) : size1_(s1), size2_(s2), value_(val), ctx_(ctx) {}
00132 
00133       size_type size1() const { return size1_; }
00134       size_type size2() const { return size2_; }
00135       const_reference operator()(size_type /*i*/, size_type /*j*/) const { return value_; }
00136 
00137       viennacl::context context() const { return ctx_; }
00138 
00139     private:
00140       size_type size1_;
00141       size_type size2_;
00142       SCALARTYPE value_;
00143       viennacl::context ctx_;
00144   };
00145 
00146 
00147 
00148 //#ifdef VIENNACL_WITH_OPENCL
00149 //  template<class SCALARTYPE, class DISTRIBUTION>
00150 //  rand::random_matrix_t<SCALARTYPE, DISTRIBUTION> random_matrix(unsigned int size1, unsigned int size2, DISTRIBUTION const & distribution){
00151 //      return rand::random_matrix_t<SCALARTYPE,DISTRIBUTION>(size1,size2,distribution);
00152 //  }
00153 //#endif
00154 
00161   template <typename LHS, typename RHS, typename OP>
00162   class matrix_expression
00163   {
00164       typedef typename viennacl::result_of::reference_if_nonscalar<LHS>::type     lhs_reference_type;
00165       typedef typename viennacl::result_of::reference_if_nonscalar<RHS>::type     rhs_reference_type;
00166 
00167     public:
00168       typedef vcl_size_t       size_type;
00169 
00170       matrix_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {}
00171 
00174       LHS & lhs() const { return lhs_; }
00177       RHS & rhs() const { return rhs_; }
00178 
00180       vcl_size_t size1() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size1(lhs_, rhs_); }
00181       vcl_size_t size2() const { return viennacl::tools::MATRIX_SIZE_DEDUCER<LHS, RHS, OP>::size2(lhs_, rhs_); }
00182 
00183     private:
00185       lhs_reference_type lhs_;
00187       rhs_reference_type rhs_;
00188   };
00189 
00190 
00192   struct row_iteration {};
00193 
00195   struct col_iteration {};
00196 
00197   //STL-like iterator. TODO: STL-compliance...
00199   template <typename ROWCOL, typename MATRIXTYPE>
00200   class matrix_iterator
00201   {
00202       typedef matrix_iterator<ROWCOL, MATRIXTYPE>    self_type;
00203     public:
00204       typedef typename MATRIXTYPE::value_type       value_type;
00205 
00206       matrix_iterator(MATRIXTYPE & mat,
00207                       vcl_size_t start_row,
00208                       vcl_size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {}
00209 
00210       value_type operator*(void) { return mat_(row_, col_); }
00211       self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MATRIXTYPE>::apply(mat_, row_, col_); return *this; }
00212       self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
00213 
00214       bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }
00215       bool operator!=(self_type const & other) { return !(*this == other); }
00216 
00217       vcl_size_t index1() { return row_; }
00218       vcl_size_t index2() { return col_; }
00219 
00220       MATRIXTYPE & operator()(void) const { return mat_; }
00221 
00222     private:
00223       MATRIXTYPE & mat_;
00224       vcl_size_t row_;
00225       vcl_size_t col_;
00226   };
00227 
00228 
00235   template <class SCALARTYPE, typename F, typename SizeType /* see forwards.h for default type */, typename DistanceType /* see forwards.h for default type */>
00236   class matrix_base
00237   {
00238       typedef matrix_base<SCALARTYPE, F, SizeType, DistanceType>          self_type;
00239     public:
00240 
00241       typedef matrix_iterator<row_iteration, self_type >   iterator1;
00242       typedef matrix_iterator<col_iteration, self_type >   iterator2;
00243       typedef scalar<SCALARTYPE>                                                  value_type;
00244       typedef SCALARTYPE                                                          cpu_value_type;
00245       typedef SizeType                                                            size_type;
00246       typedef DistanceType                                                        difference_type;
00247       typedef viennacl::backend::mem_handle                                       handle_type;
00248       typedef F                                                                   orientation_functor;
00249       typedef typename F::orientation_category                                    orientation_category;
00250 
00251       static const size_type alignment = 128;
00252 
00253 
00255       explicit matrix_base() : size1_(0), size2_(0), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(0), internal_size2_(0) {}
00256 
00263       explicit matrix_base(size_type rows, size_type columns, viennacl::context ctx = viennacl::context())
00264           : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1),
00265             internal_size1_(viennacl::tools::align_to_multiple<size_type>(rows, alignment)),
00266             internal_size2_(viennacl::tools::align_to_multiple<size_type>(columns, alignment))
00267       {
00268         if (rows > 0 && columns > 0)
00269         {
00270           viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), ctx);
00271           clear();
00272         }
00273       }
00274 
00275 
00277       explicit matrix_base(viennacl::backend::mem_handle & h,
00278                            size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1,
00279                            size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2)
00280         : size1_(mat_size1), size2_(mat_size2),
00281           start1_(mat_start1), start2_(mat_start2),
00282           stride1_(mat_stride1), stride2_(mat_stride2),
00283           internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
00284           elements_(h) {}
00285 
00286       template <typename LHS, typename RHS, typename OP>
00287       explicit matrix_base(matrix_expression<const LHS, const RHS, OP> const & proxy) :
00288         size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1),
00289         internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, alignment)),
00290         internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, alignment))
00291       {
00292         elements_.switch_active_handle_id(viennacl::traits::active_handle_id(proxy));
00293         if (internal_size() > 0)
00294         {
00295           viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy));
00296           clear();
00297           self_type::operator=(proxy);
00298         }
00299       }
00300 
00301       // CUDA or host memory:
00302       explicit matrix_base(SCALARTYPE * ptr_to_mem, viennacl::memory_types mem_type,
00303                            size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1,
00304                            size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2)
00305         : size1_(mat_size1), size2_(mat_size2),
00306           start1_(mat_start1), start2_(mat_start2),
00307           stride1_(mat_stride1), stride2_(mat_stride2),
00308           internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2)
00309       {
00310         if (mem_type == viennacl::CUDA_MEMORY)
00311         {
00312 #ifdef VIENNACL_WITH_CUDA
00313           elements_.switch_active_handle_id(viennacl::CUDA_MEMORY);
00314           elements_.cuda_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
00315           elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
00316 #else
00317           throw cuda_not_available_exception();
00318 #endif
00319         }
00320         else if (mem_type == viennacl::MAIN_MEMORY)
00321         {
00322           elements_.switch_active_handle_id(viennacl::MAIN_MEMORY);
00323           elements_.ram_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
00324           elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
00325         }
00326 
00327         elements_.raw_size(sizeof(SCALARTYPE) * internal_size());
00328       }
00329 
00330 #ifdef VIENNACL_WITH_OPENCL
00331       explicit matrix_base(cl_mem mem, size_type rows, size_type columns, viennacl::context ctx = viennacl::context())
00332         : size1_(rows), size2_(columns),
00333           start1_(0), start2_(0),
00334           stride1_(1), stride2_(1),
00335           internal_size1_(rows), internal_size2_(columns)
00336       {
00337         elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY);
00338         elements_.opencl_handle() = mem;
00339         elements_.opencl_handle().inc();  //prevents that the user-provided memory is deleted once the vector object is destroyed.
00340         elements_.opencl_handle().context(ctx.opencl_context());
00341         elements_.raw_size(sizeof(SCALARTYPE)*internal_size());
00342       }
00343 
00344       explicit matrix_base(cl_mem mem, viennacl::context ctx,
00345                            size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1,
00346                            size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2)
00347         : size1_(mat_size1), size2_(mat_size2),
00348           start1_(mat_start1), start2_(mat_start2),
00349           stride1_(mat_stride1), stride2_(mat_stride2),
00350           internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2)
00351       {
00352         elements_.switch_active_handle_id(viennacl::OPENCL_MEMORY);
00353         elements_.opencl_handle() = mem;
00354         elements_.opencl_handle().inc();  //prevents that the user-provided memory is deleted once the vector object is destroyed.
00355         elements_.opencl_handle().context(ctx.opencl_context());
00356         elements_.raw_size(sizeof(SCALARTYPE)*internal_size());
00357       }
00358 #endif
00359 
00360 
00361       self_type & operator=(const self_type & other)  //enables implicit conversions
00362       {
00363         if (internal_size() == 0)
00364         {
00365           if (other.internal_size() == 0)
00366             return *this;
00367           resize(other.size1(), other.size2(), false);
00368         }
00369 
00370         viennacl::linalg::am(*this,
00371                              other, cpu_value_type(1.0), 1, false, false);
00372         return *this;
00373       }
00374 
00376       /*template<class DISTRIBUTION>
00377       matrix(rand::random_matrix_t<SCALARTYPE, DISTRIBUTION> const & m) : rows_(m.size1), columns_(m.size2)
00378       {
00379         if (internal_size() > 0)
00380         {
00381           viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size());
00382           rand::buffer_dumper<SCALARTYPE, DISTRIBUTION>::dump(elements_,m.distribution,0,internal_size());
00383         }
00384       }*/
00385 
00386 
00387 
00392       template <typename LHS, typename RHS, typename OP>
00393       self_type & operator=(const matrix_expression<const LHS, const RHS, OP> & proxy)
00394       {
00395         assert(  (viennacl::traits::size1(proxy) == size1() || size1() == 0)
00396               && (viennacl::traits::size2(proxy) == size2() || size2() == 0)
00397               && bool("Incompatible matrix sizes!"));
00398 
00399         if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0)
00400         {
00401           size1_ = viennacl::traits::size1(proxy);
00402           size2_ = viennacl::traits::size2(proxy);
00403           internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
00404           internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
00405           viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy));
00406           if (size1_ != internal_size1_ || size2_ != internal_size2_)
00407             clear();
00408         }
00409 
00410         if (internal_size() > 0)
00411           linalg::detail::op_executor<self_type, op_assign, matrix_expression<const LHS, const RHS, OP> >::apply(*this, proxy);
00412 
00413         return *this;
00414       }
00415 
00416 
00417       // A = trans(B). Currently achieved in CPU memory
00418       self_type & operator=(const matrix_expression< const self_type,
00419                                                      const self_type,
00420                                                      op_trans> & proxy)
00421       {
00422         assert( (handle() != proxy.lhs().handle()) && bool("Self-assignment of matrix transpose not implemented"));
00423         assert( ( (proxy.lhs().size1() == size2()) || (size2() == 0) ) && bool("Matrix dimensions do not match!"));
00424         assert( ( (proxy.lhs().size2() == size1()) || (size1() == 0) ) && bool("Matrix dimensions do not match!"));
00425 
00426         if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0)
00427         {
00428           size1_ = viennacl::traits::size1(proxy);
00429           size2_ = viennacl::traits::size2(proxy);
00430           internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
00431           internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
00432         }
00433 
00434         std::vector<SCALARTYPE> temp(proxy.lhs().internal_size());
00435 
00436         viennacl::backend::memory_read(proxy.lhs().handle(), 0, sizeof(SCALARTYPE)*proxy.lhs().internal_size(), &(temp[0]));
00437 
00438         // now transpose it
00439         std::vector<SCALARTYPE> temp_trans(internal_size());
00440 
00441         for (vcl_size_t i=0; i<proxy.lhs().size1(); ++i)
00442           for (vcl_size_t j=0; j<proxy.lhs().size2(); ++j)
00443             temp_trans[F::mem_index(start2() + stride2() * j,
00444                                     start1() + stride1() * i,
00445                                     internal_size1(), internal_size2())]
00446               = temp[F::mem_index(proxy.lhs().start1() + proxy.lhs().stride1() * i,
00447                                   proxy.lhs().start2() + proxy.lhs().stride2() * j,
00448                                   proxy.lhs().internal_size1(), proxy.lhs().internal_size2())];
00449 
00450         // write back
00451         viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy), &(temp_trans[0]));
00452 
00453         return *this;
00454       }
00455 
00456       template <typename LHS, typename RHS, typename OP>
00457       self_type & operator+=(const matrix_expression<const LHS, const RHS, OP> & proxy)
00458       {
00459         assert(  (viennacl::traits::size1(proxy) == size1())
00460               && (viennacl::traits::size2(proxy) == size2())
00461               && bool("Incompatible matrix sizes!"));
00462         assert( (size1() > 0) && bool("Vector not yet initialized!") );
00463         assert( (size2() > 0) && bool("Vector not yet initialized!") );
00464 
00465         linalg::detail::op_executor<self_type, op_inplace_add, matrix_expression<const LHS, const RHS, OP> >::apply(*this, proxy);
00466 
00467         return *this;
00468       }
00469 
00470       template <typename LHS, typename RHS, typename OP>
00471       self_type & operator-=(const matrix_expression<const LHS, const RHS, OP> & proxy)
00472       {
00473         assert(  (viennacl::traits::size1(proxy) == size1())
00474               && (viennacl::traits::size2(proxy) == size2())
00475               && bool("Incompatible matrix sizes!"));
00476         assert( (size1() > 0) && bool("Vector not yet initialized!") );
00477         assert( (size2() > 0) && bool("Vector not yet initialized!") );
00478 
00479         linalg::detail::op_executor<self_type, op_inplace_sub, matrix_expression<const LHS, const RHS, OP> >::apply(*this, proxy);
00480 
00481         return *this;
00482       }
00483 
00485       self_type & operator = (identity_matrix<SCALARTYPE> const & m)
00486       {
00487         assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
00488         assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
00489 
00490         if (internal_size() == 0)
00491         {
00492           size1_ = m.size1();
00493           size2_ = m.size2();
00494           internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
00495           internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
00496           if (internal_size() > 0)
00497           {
00498             viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context());
00499             clear();
00500           }
00501         }
00502         else
00503           viennacl::linalg::matrix_assign(*this, SCALARTYPE(0));
00504 
00505         if (internal_size() > 0)
00506           viennacl::linalg::matrix_diagonal_assign(*this, m(0,0));
00507 
00508         return *this;
00509       }
00510 
00512       self_type & operator = (zero_matrix<SCALARTYPE> const & m)
00513       {
00514         assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
00515         assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
00516 
00517         if (internal_size() == 0)
00518         {
00519           size1_ = m.size1();
00520           size2_ = m.size2();
00521           internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
00522           internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
00523           if (internal_size() > 0)
00524           {
00525             viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context());
00526             clear();
00527           }
00528         }
00529         else
00530           viennacl::linalg::matrix_assign(*this, SCALARTYPE(0));
00531 
00532         return *this;
00533       }
00534 
00536       self_type & operator = (scalar_matrix<SCALARTYPE> const & m)
00537       {
00538         assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
00539         assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
00540 
00541         if (internal_size() == 0)
00542         {
00543           size1_ = m.size1();
00544           size2_ = m.size2();
00545           internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
00546           internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
00547           if (internal_size() > 0)
00548           {
00549             viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context());
00550             clear();
00551           }
00552         }
00553 
00554         if (internal_size() > 0)
00555         {
00556           viennacl::linalg::matrix_assign(*this, m(0,0));
00557         }
00558 
00559         return *this;
00560       }
00561 
00562 
00563       //read-write access to an element of the matrix/matrix_range/matrix_slice
00566       entry_proxy<SCALARTYPE> operator()(size_type row_index, size_type col_index)
00567       {
00568         return entry_proxy<SCALARTYPE>(F::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
00569       }
00570 
00573       const_entry_proxy<SCALARTYPE> operator()(size_type row_index, size_type col_index) const
00574       {
00575         return const_entry_proxy<SCALARTYPE>(F::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
00576       }
00577 
00578       //
00579       // Operator overloads for enabling implicit conversions:
00580       //
00581       self_type & operator += (const self_type & other)
00582       {
00583         viennacl::linalg::ambm(*this,
00584                                 *this, SCALARTYPE(1.0), 1, false, false,
00585                                 other, SCALARTYPE(1.0), 1, false, false);
00586         return *this;
00587       }
00588 
00589       self_type & operator -= (const self_type & other)
00590       {
00591         viennacl::linalg::ambm(*this,
00592                                 *this, SCALARTYPE(1.0), 1, false, false,
00593                                 other, SCALARTYPE(1.0), 1, false, true);
00594         return *this;
00595       }
00596 
00599       self_type & operator *= (SCALARTYPE val)
00600       {
00601         //viennacl::linalg::inplace_mult(*this, val);
00602         viennacl::linalg::am(*this,
00603                               *this, val, 1, false, false);
00604         return *this;
00605       }
00606 
00609       self_type & operator /= (SCALARTYPE val)
00610       {
00611         //viennacl::linalg::inplace_mult(*this, static_cast<SCALARTYPE>(1) / val);
00612         viennacl::linalg::am(*this,
00613                               *this, val, 1, true, false);
00614         return *this;
00615       }
00616 
00617 
00619       matrix_expression<const self_type, const SCALARTYPE, op_mult> operator-() const
00620       {
00621         return matrix_expression<const self_type, const SCALARTYPE, op_mult>(*this, SCALARTYPE(-1));
00622       }
00623 
00625       size_type size1() const { return size1_;}
00627       size_type size2() const { return size2_; }
00628 
00630       size_type start1() const { return start1_;}
00632       size_type start2() const { return start2_; }
00633 
00635       size_type stride1() const { return stride1_;}
00637       size_type stride2() const { return stride2_; }
00638 
00640       void clear()
00641       {
00642         viennacl::linalg::matrix_assign(*this, SCALARTYPE(0), true);
00643       }
00644 
00645 
00647       size_type internal_size1() const { return internal_size1_; }
00649       size_type internal_size2() const { return internal_size2_; }
00651       size_type internal_size() const { return internal_size1() * internal_size2(); }
00652 
00654             handle_type & handle()       { return elements_; }
00656       const handle_type & handle() const { return elements_; }
00657 
00658 
00659       viennacl::memory_types memory_domain() const
00660       {
00661         return elements_.get_active_handle_id();
00662       }
00663 
00664     protected:
00665 
00666       void set_handle(viennacl::backend::mem_handle const & h)
00667       {
00668         elements_ = h;
00669       }
00670 
00671       void switch_memory_context(viennacl::context new_ctx)
00672       {
00673         viennacl::backend::switch_memory_context<SCALARTYPE>(elements_, new_ctx);
00674       }
00675 
00676 
00684       void resize(size_type rows, size_type columns, bool preserve = true)
00685       {
00686         assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!"));
00687 
00688         if (preserve && internal_size() > 0)
00689         {
00690           //get old entries:
00691           std::vector< SCALARTYPE > old_entries(internal_size());
00692           viennacl::backend::memory_read(elements_, 0, sizeof(SCALARTYPE)*internal_size(), &(old_entries[0]));
00693 
00694           //set up entries of new matrix:
00695           std::vector< SCALARTYPE > new_entries(  viennacl::tools::align_to_multiple<vcl_size_t>(rows,    alignment)
00696                                                 * viennacl::tools::align_to_multiple<vcl_size_t>(columns, alignment));
00697           for (size_type i=0; i<rows; ++i)
00698           {
00699             if (i >= size1_)
00700               continue;
00701 
00702             for (size_type j=0; j<columns; ++j)
00703             {
00704               if (j >= size2_)
00705                 continue;
00706               new_entries[F::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, alignment), viennacl::tools::align_to_multiple<vcl_size_t>(columns, alignment))]
00707                   = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())];
00708             }
00709           }
00710 
00711           //copy new entries to GPU:
00712           size1_ = rows;
00713           size2_ = columns;
00714           internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
00715           internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
00716           viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*new_entries.size(), viennacl::traits::context(elements_), &(new_entries[0]));
00717         }
00718         else //discard old entries:
00719         {
00720           size1_ = rows;
00721           size2_ = columns;
00722           internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
00723           internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
00724 
00725           viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(elements_));
00726           clear();
00727         }
00728       }
00729 
00730     private:
00731       size_type size1_;
00732       size_type size2_;
00733       size_type start1_;
00734       size_type start2_;
00735       difference_type stride1_;
00736       difference_type stride2_;
00737       size_type internal_size1_;
00738       size_type internal_size2_;
00739       handle_type elements_;
00740   }; //matrix
00741 
00742 
00743 
00750   template <class SCALARTYPE, typename F, unsigned int ALIGNMENT>
00751   class matrix : public matrix_base<SCALARTYPE, F>
00752   {
00753       typedef matrix<SCALARTYPE, F, ALIGNMENT>          self_type;
00754       typedef matrix_base<SCALARTYPE, F>                base_type;
00755     public:
00756       typedef typename base_type::size_type             size_type;
00757 
00759       explicit matrix() : base_type() {}
00760 
00767       explicit matrix(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : base_type(rows, columns, ctx) {}
00768 
00769 #ifdef VIENNACL_WITH_OPENCL
00770       explicit matrix(cl_mem mem, size_type rows, size_type columns) : base_type(mem, rows, columns) {}
00771 #endif
00772 
00773       template <typename LHS, typename RHS, typename OP>
00774       matrix(matrix_expression< LHS, RHS, OP> const & proxy) : base_type(proxy) {}
00775 
00777       matrix(identity_matrix<SCALARTYPE> const & m) : base_type(m.size1(), m.size2(), m.context())
00778       {
00779         if (base_type::internal_size() > 0)
00780           base_type::operator=(m);
00781       }
00782 
00784       matrix(zero_matrix<SCALARTYPE> const & m) : base_type(m.size1(), m.size2(), m.context())
00785       {
00786         if (base_type::internal_size() > 0)
00787           base_type::operator=(m);
00788       }
00789 
00791       matrix(scalar_matrix<SCALARTYPE> const & m) : base_type(m.size1(), m.size2(), m.context())
00792       {
00793         if (base_type::internal_size() > 0)
00794           base_type::operator=(m);
00795       }
00796 
00797       matrix(const base_type & other) : base_type(other.size1(), other.size2(), viennacl::traits::context(other))
00798       {
00799         base_type::operator=(other);
00800       }
00801 
00802 
00803       //copy constructor:
00804       matrix(const self_type & other) : base_type(other.size1(), other.size2(), viennacl::traits::context(other))
00805       {
00806         base_type::operator=(other);
00807       }
00808 
00809 
00810       /*template <typename M1>
00811       self_type & operator=(const matrix_expression< const M1, const M1, op_trans> & proxy)
00812       {
00813         self_type temp(proxy.lhs());
00814         *this = trans(temp);
00815         return *this;
00816       }*/
00817 
00818       using base_type::operator=;
00819 
00827       void resize(size_type rows, size_type columns, bool preserve = true)
00828       {
00829         base_type::resize(rows, columns, preserve);
00830       }
00831 
00832   }; //matrix
00833 
00834 
00835 
00841   template<class SCALARTYPE, typename F>
00842   std::ostream & operator<<(std::ostream & s, const matrix_base<SCALARTYPE, F> & gpu_matrix)
00843   {
00844     typedef typename matrix_base<SCALARTYPE, F>::size_type      size_type;
00845 
00846     std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size());
00847     viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &(tmp[0]));
00848 
00849     s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
00850 
00851     s << "(";
00852     for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00853     {
00854       s << "(";
00855       for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00856       {
00857         s << tmp[F::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
00858         if (j < gpu_matrix.size2() - 1)
00859           s << ",";
00860       }
00861       s << ")";
00862       if (i < gpu_matrix.size1() - 1)
00863         s << ",";
00864     }
00865     s << ")";
00866     return s;
00867   }
00868 
00874   template<typename LHS, typename RHS, typename OP>
00875   std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
00876   {
00877     typedef typename viennacl::tools::CPU_SCALAR_TYPE_DEDUCER< typename tools::CONST_REMOVER<LHS>::ResultType >::ResultType     ScalarType;
00878 
00879     matrix<ScalarType> temp = expr;
00880     s << temp;
00881     return s;
00882   }
00883 
00885   template<typename NumericT, typename F>
00886   matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_trans>
00887   trans(const matrix_base<NumericT, F> & mat)
00888   {
00889     return matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_trans>(mat, mat);
00890   }
00891 
00892   //diag():
00893   template<typename NumericT, typename F>
00894   vector_expression< const matrix_base<NumericT, F>, const int, op_matrix_diag>
00895   diag(const matrix_base<NumericT, F> & A, int k = 0)
00896   {
00897     return vector_expression< const matrix_base<NumericT, F>, const int, op_matrix_diag>(A, k);
00898   }
00899 
00900   template<typename NumericT>
00901   matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>
00902   diag(const vector_base<NumericT> & v, int k = 0)
00903   {
00904     return matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>(v, k);
00905   }
00906 
00907   // row():
00908   template<typename NumericT, typename F>
00909   vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>
00910   row(const matrix_base<NumericT, F> & A, unsigned int i)
00911   {
00912     return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>(A, i);
00913   }
00914 
00915   // column():
00916   template<typename NumericT, typename F>
00917   vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>
00918   column(const matrix_base<NumericT, F> & A, unsigned int j)
00919   {
00920     return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>(A, j);
00921   }
00922 
00924 
00925   //
00926   //cpu to gpu, generic type:
00927   //
00933   template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
00934   void copy(const CPU_MATRIX & cpu_matrix,
00935             matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00936   {
00937     typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type      size_type;
00938 
00939     //std::cout << "Copying CPU_MATRIX!" << std::endl;
00940     //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
00941     if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00942     {
00943       gpu_matrix.resize(cpu_matrix.size1(),
00944                         cpu_matrix.size2(), false);
00945     }
00946 
00947     assert( (gpu_matrix.size1() == cpu_matrix.size1()) && (gpu_matrix.size2() == cpu_matrix.size2()) && bool("Matrix dimensions mismatch.") );
00948 
00949     std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00950     for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00951     {
00952       for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00953         data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
00954     }
00955 
00956     viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
00957     //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00958     //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
00959   }
00960 
00961   //
00962   //cpu to gpu, STL type:
00963   //
00969   template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
00970   void copy(const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix,
00971             matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
00972   {
00973     typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type      size_type;
00974 
00975     if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
00976     {
00977       gpu_matrix.resize(cpu_matrix.size(),
00978                         cpu_matrix[0].size(),
00979                         false);
00980     }
00981 
00982     assert( (gpu_matrix.size1() == cpu_matrix.size()) && bool("Matrix dimensions mismatch.") );
00983 
00984     std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
00985     for (size_type i = 0; i < gpu_matrix.size1(); ++i)
00986     {
00987       assert( (gpu_matrix.size2() == cpu_matrix[i].size()) && bool("Matrix dimensions mismatch.") );
00988 
00989       for (size_type j = 0; j < gpu_matrix.size2(); ++j)
00990         data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
00991     }
00992 
00993     viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
00994     //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
00995   }
00996 
00997 
00998   //
00999   //cpu to gpu, another STL type:
01000   //
01007   template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
01008   void fast_copy(SCALARTYPE * cpu_matrix_begin,
01009                   SCALARTYPE * cpu_matrix_end,
01010                   matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
01011   {
01012     viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin);
01013     /*gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
01014                                                                           sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin),
01015                                                                           cpu_matrix_begin);*/
01016   }
01017 
01018 
01019   #ifdef VIENNACL_WITH_EIGEN
01020 
01025   template <typename F, unsigned int ALIGNMENT>
01026   void copy(const Eigen::MatrixXf & cpu_matrix,
01027             matrix<float, F, ALIGNMENT> & gpu_matrix)
01028   {
01029     typedef typename matrix<float, F, ALIGNMENT>::size_type      size_type;
01030 
01031     if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
01032     {
01033       gpu_matrix.resize(cpu_matrix.rows(),
01034                         cpu_matrix.cols(),
01035                         false);
01036     }
01037     else
01038     {
01039       assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
01040               && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
01041               && bool("matrix size mismatch")
01042             );
01043     }
01044 
01045     std::vector<float> data(gpu_matrix.internal_size());
01046     for (size_type i = 0; i < gpu_matrix.size1(); ++i)
01047     {
01048       for (size_type j = 0; j < gpu_matrix.size2(); ++j)
01049         data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
01050     }
01051 
01052     viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(float) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
01053     //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
01054   }
01055 
01061   template <typename F, unsigned int ALIGNMENT>
01062   void copy(const Eigen::MatrixXd & cpu_matrix,
01063             matrix<double, F, ALIGNMENT> & gpu_matrix)
01064   {
01065     typedef typename matrix<double, F, ALIGNMENT>::size_type      size_type;
01066 
01067     if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
01068     {
01069       gpu_matrix.resize(cpu_matrix.rows(),
01070                         cpu_matrix.cols(),
01071                         false);
01072     }
01073     else
01074     {
01075       assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
01076               && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
01077               && bool("matrix size mismatch")
01078             );
01079     }
01080 
01081     std::vector<double> data(gpu_matrix.internal_size());
01082     for (size_type i = 0; i < gpu_matrix.size1(); ++i)
01083     {
01084       for (size_type j = 0; j < gpu_matrix.size2(); ++j)
01085         data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
01086     }
01087 
01088     viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(double) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
01089     //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
01090   }
01091   #endif
01092 
01093   #ifdef VIENNACL_WITH_MTL4
01094 
01099   template <typename SCALARTYPE, typename T, typename F, unsigned int ALIGNMENT>
01100   void copy(const mtl::dense2D<SCALARTYPE, T>& cpu_matrix,
01101             matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
01102   {
01103     typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type      size_type;
01104 
01105     if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
01106     {
01107       gpu_matrix.resize(cpu_matrix.num_rows(),
01108                         cpu_matrix.num_cols(),
01109                         false);
01110     }
01111     else
01112     {
01113       assert( (gpu_matrix.size1() == cpu_matrix.num_rows())
01114               && (gpu_matrix.size2() == cpu_matrix.num_cols())
01115               && bool("matrix size mismatch")
01116             );
01117     }
01118 
01119     std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
01120     for (size_type i = 0; i < gpu_matrix.size1(); ++i)
01121     {
01122       for (size_type j = 0; j < gpu_matrix.size2(); ++j)
01123         data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
01124     }
01125 
01126     viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
01127     //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
01128   }
01129   #endif
01130 
01131 
01132 
01133 
01134   //
01135   //gpu to cpu, generic type
01136   //
01142   template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
01143   void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
01144             CPU_MATRIX & cpu_matrix )
01145   {
01146     typedef typename matrix<float, F, ALIGNMENT>::size_type      size_type;
01147 
01148     if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
01149     {
01150       assert( viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1() && bool("Matrix dimensions mismatch: rows"));
01151 
01152       std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
01153       viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]));
01154 
01155       //now copy entries to cpu_matrix:
01156       for (size_type i = 0; i < gpu_matrix.size1(); ++i)
01157       {
01158         assert( viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2() && bool("Matrix dimensions mismatch: columns"));
01159         for (size_type j = 0; j < gpu_matrix.size2(); ++j)
01160           cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
01161       }
01162     }
01163   }
01164 
01165   //gpu to cpu, STL type
01171   template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
01172   void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
01173             std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix)
01174   {
01175     typedef typename matrix<float, F, ALIGNMENT>::size_type      size_type;
01176 
01177     if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
01178     {
01179       assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Matrix dimensions mismatch: rows"));
01180 
01181       std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
01182       viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]));
01183 
01184       //now copy entries to cpu_matrix:
01185       for (size_type i = 0; i < gpu_matrix.size1(); ++i)
01186       {
01187         assert( (cpu_matrix[i].size() == gpu_matrix.size2()) && bool("Matrix dimensions mismatch: columns"));
01188 
01189         for (size_type j = 0; j < gpu_matrix.size2(); ++j)
01190           cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
01191       }
01192     }
01193   }
01194 
01195   //gpu to cpu, STL type
01201   template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
01202   void fast_copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
01203                   SCALARTYPE * cpu_matrix_begin)
01204   {
01205     viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), cpu_matrix_begin);
01206   }
01207 
01208 
01209 
01211 
01212 
01213   // operator +
01215   template <typename LHS1, typename RHS1, typename OP1,
01216             typename LHS2, typename RHS2, typename OP2>
01217   matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
01218                      const matrix_expression<const LHS2, const RHS2, OP2>,
01219                      op_add>
01220   operator + (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1,
01221               matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
01222   {
01223     assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
01224             && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
01225             && bool("Incompatible matrix sizes!"));
01226     return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
01227                               const matrix_expression<const LHS2, const RHS2, OP2>,
01228                               op_add>(proxy1, proxy2);
01229   }
01230 
01231   template <typename LHS1, typename RHS1, typename OP1,
01232             typename NumericT, typename F>
01233   matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
01234                      const matrix_base<NumericT, F>,
01235                      op_add>
01236   operator + (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1,
01237               matrix_base<NumericT, F> const & proxy2)
01238   {
01239     assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
01240             && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
01241             && bool("Incompatible matrix sizes!"));
01242     return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
01243                               const matrix_base<NumericT, F>,
01244                               op_add>(proxy1, proxy2);
01245   }
01246 
01247   template <typename NumericT, typename F,
01248             typename LHS2, typename RHS2, typename OP2>
01249   matrix_expression< const matrix_base<NumericT, F>,
01250                      const matrix_expression<const LHS2, const RHS2, OP2>,
01251                      op_add>
01252   operator + (matrix_base<NumericT, F> const & proxy1,
01253               matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
01254   {
01255     assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
01256             && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
01257             && bool("Incompatible matrix sizes!"));
01258     return  matrix_expression< const matrix_base<NumericT, F>,
01259                                const matrix_expression<const LHS2, const RHS2, OP2>,
01260                                op_add>(proxy1, proxy2);
01261   }
01262 
01264   template <typename NumericT, typename F>
01265   matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_add >
01266   operator + (const matrix_base<NumericT, F> & m1, const matrix_base<NumericT, F> & m2)
01267   {
01268     return matrix_expression< const matrix_base<NumericT, F>,
01269                               const matrix_base<NumericT, F>,
01270                               op_add > (m1, m2);
01271   }
01272 
01273 
01274   // operator -
01275   template <typename LHS1, typename RHS1, typename OP1,
01276             typename LHS2, typename RHS2, typename OP2>
01277   matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
01278                      const matrix_expression<const LHS2, const RHS2, OP2>,
01279                      op_sub>
01280   operator - (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1,
01281               matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
01282   {
01283     assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
01284             && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
01285             && bool("Incompatible matrix sizes!"));
01286     return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
01287                               const matrix_expression<const LHS2, const RHS2, OP2>,
01288                               op_sub>(proxy1, proxy2);
01289   }
01290 
01291   template <typename LHS1, typename RHS1, typename OP1,
01292             typename NumericT, typename F>
01293   matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
01294                      const matrix_base<NumericT, F>,
01295                      op_sub>
01296   operator - (matrix_expression<const LHS1, const RHS1, OP1> const & proxy1,
01297               matrix_base<NumericT, F> const & proxy2)
01298   {
01299     assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
01300             && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
01301             && bool("Incompatible matrix sizes!"));
01302     return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
01303                               const matrix_base<NumericT, F>,
01304                               op_sub>(proxy1, proxy2);
01305   }
01306 
01307   template <typename NumericT, typename F,
01308             typename LHS2, typename RHS2, typename OP2>
01309   matrix_expression< const matrix_base<NumericT, F>,
01310                      const matrix_expression<const LHS2, const RHS2, OP2>,
01311                      op_sub>
01312   operator - (matrix_base<NumericT, F> const & proxy1,
01313               matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
01314   {
01315     assert(    (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
01316             && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
01317             && bool("Incompatible matrix sizes!"));
01318     return  matrix_expression< const matrix_base<NumericT, F>,
01319                                const matrix_expression<const LHS2, const RHS2, OP2>,
01320                                op_sub>(proxy1, proxy2);
01321   }
01322 
01324   template <typename NumericT, typename F>
01325   matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_sub >
01326   operator - (const matrix_base<NumericT, F> & m1, const matrix_base<NumericT, F> & m2)
01327   {
01328     return matrix_expression< const matrix_base<NumericT, F>,
01329                               const matrix_base<NumericT, F>,
01330                               op_sub > (m1, m2);
01331   }
01332 
01333 
01334 
01335   // operator *
01341   template <typename S1, typename NumericT, typename F>
01342   typename viennacl::enable_if<    viennacl::is_any_scalar<S1>::value,
01343                                 matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult>
01344                               >::type
01345   operator * (S1 const & value, matrix_base<NumericT, F> const & m1)
01346   {
01347     return matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult>(m1, value);
01348   }
01349 
01350 
01356   template <typename LHS, typename RHS, typename OP, typename S1>
01357   typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,
01358                                 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
01359   operator * (matrix_expression< LHS, RHS, OP> const & proxy,
01360               S1 const & val)
01361   {
01362     return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);
01363   }
01364 
01365 
01371   template <typename S1, typename LHS, typename RHS, typename OP>
01372   typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,
01373                                 matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
01374   operator * (S1 const & val,
01375               matrix_expression< LHS, RHS, OP> const & proxy)
01376   {
01377     return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);
01378   }
01379 
01382   template <typename NumericT, typename F, typename S1>
01383   typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,
01384                                 matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult> >::type
01385   operator * (matrix_base<NumericT, F> const & m1, S1 const & s1)
01386   {
01387     return matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult>(m1, s1);
01388   }
01389 
01390 
01391   // operator *=
01392 
01395   template <typename NumericT, typename F, typename S1>
01396   typename viennacl::enable_if< viennacl::is_scalar<S1>::value,
01397                                 matrix_base<NumericT, F> &
01398                               >::type
01399   operator *= (matrix_base<NumericT, F> & m1, S1 const & gpu_val)
01400   {
01401     //viennacl::linalg::inplace_mult(*this, gpu_val);
01402     viennacl::linalg::am(m1,
01403                          m1, gpu_val, 1, false, (viennacl::is_flip_sign_scalar<S1>::value ? true : false));
01404     return m1;
01405   }
01406 
01407 
01408   // operator /
01409 
01410 
01416   template <typename LHS, typename RHS, typename OP, typename S1>
01417   typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,
01418                                 matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div> >::type
01419   operator / (matrix_expression<const LHS, const RHS, OP> const & proxy,
01420               S1 const & val)
01421   {
01422     return matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div>(proxy, val);
01423   }
01424 
01425 
01428   template <typename NumericT, typename F, typename S1>
01429   typename viennacl::enable_if< viennacl::is_any_scalar<S1>::value,
01430                                 matrix_expression< const matrix_base<NumericT, F>, const S1, op_div> >::type
01431   operator / (matrix_base<NumericT, F> const & m1, S1 const & s1)
01432   {
01433     return matrix_expression< const matrix_base<NumericT, F>, const S1, op_div>(m1, s1);
01434   }
01435 
01436 
01437   // operator /=
01438 
01441   template <typename NumericT, typename F, typename S1>
01442   typename viennacl::enable_if< viennacl::is_scalar<S1>::value,
01443                                 matrix_base<NumericT, F> &
01444                               >::type
01445   operator /= (matrix_base<NumericT, F> & m1, S1 const & gpu_val)
01446   {
01447     //viennacl::linalg::inplace_divide(*this, gpu_val);
01448     viennacl::linalg::am(m1,
01449                          m1, gpu_val, 1, true, (viennacl::is_flip_sign_scalar<S1>::value ? true : false));
01450     return m1;
01451   }
01452 
01453 
01454 
01455 
01456 
01457   // outer_prod(v1, v2) * val;
01458   template <typename NumericT, typename S1>
01459   typename viennacl::enable_if< viennacl::is_scalar<S1>::value,
01460                                 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
01461                                                              const S1,
01462                                                              op_mult>
01463                               >::type
01464   operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,
01465             const S1 & val)
01466   {
01467     return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
01468                                         const S1,
01469                                         op_mult>(proxy, val);
01470   }
01471 
01472   template <typename NumericT, typename S1>
01473   typename viennacl::enable_if< viennacl::is_cpu_scalar<S1>::value,
01474                                 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
01475                                                               const NumericT,
01476                                                               op_mult>
01477                               >::type
01478   operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,
01479             const S1 & val)
01480   {
01481     return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
01482                                         const NumericT,
01483                                         op_mult>(proxy, NumericT(val));
01484   }
01485 
01486   // val * outer_prod(v1, v2);
01487   template <typename NumericT, typename S1>
01488   typename viennacl::enable_if< viennacl::is_scalar<S1>::value,
01489                                 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
01490                                                              const S1,
01491                                                              op_mult>
01492                               >::type
01493   operator*(const S1 & val,
01494             const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
01495   {
01496     return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
01497                                         const S1,
01498                                         op_mult>(proxy, val);
01499   }
01500 
01501   template<typename NumericT, typename S1>
01502   typename viennacl::enable_if< viennacl::is_cpu_scalar<S1>::value,
01503                                 viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
01504                                                              const NumericT,
01505                                                              op_mult>
01506                               >::type
01507   operator*(const S1 & val,
01508             const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
01509   {
01510     return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
01511                                         const NumericT,
01512                                         op_mult>(proxy, NumericT(val));
01513   }
01514 
01515 
01516 
01517   //
01518   // Specify available operations:
01519   //
01520 
01523   namespace linalg
01524   {
01525     namespace detail
01526     {
01527 
01528       // x = y
01529       template <typename T, typename F>
01530       struct op_executor<matrix_base<T, F>, op_assign, matrix_base<T, F> >
01531       {
01532         static void apply(matrix_base<T, F> & lhs, matrix_base<T, F> const & rhs)
01533         {
01534           viennacl::linalg::am(lhs, rhs, T(1), 1, false, false);
01535         }
01536       };
01537 
01538       // x += y
01539       template <typename T, typename F>
01540       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_base<T, F> >
01541       {
01542         static void apply(matrix_base<T, F> & lhs, matrix_base<T, F> const & rhs)
01543         {
01544           viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false);
01545         }
01546       };
01547 
01548       // x -= y
01549       template <typename T, typename F>
01550       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_base<T, F> >
01551       {
01552         static void apply(matrix_base<T, F> & lhs, matrix_base<T, F> const & rhs)
01553         {
01554           viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, true);
01555         }
01556       };
01557 
01559 
01560 
01561       // x = alpha * y
01562       template <typename T, typename F, typename ScalarType>
01563       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> >
01564       {
01565         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> const & proxy)
01566         {
01567           viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false);
01568         }
01569       };
01570 
01571       // x += alpha * y
01572       template <typename T, typename F, typename ScalarType>
01573       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> >
01574       {
01575         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> const & proxy)
01576         {
01577           viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false);
01578         }
01579       };
01580 
01581       // x -= alpha * y
01582       template <typename T, typename F, typename ScalarType>
01583       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> >
01584       {
01585         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> const & proxy)
01586         {
01587           viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true);
01588         }
01589       };
01590 
01591 
01593 
01594       // x = alpha * vec_expr
01595       template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
01596       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
01597       {
01598           static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
01599           {
01600             matrix<T, F> temp(proxy.lhs());
01601             lhs = temp * proxy.rhs();
01602           }
01603       };
01604 
01605       // x += alpha * vec_expr
01606       template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
01607       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
01608       {
01609           static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
01610           {
01611             matrix<T, F> temp(proxy.lhs());
01612             lhs += temp * proxy.rhs();
01613           }
01614       };
01615 
01616       // x -= alpha * vec_expr
01617       template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
01618       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
01619       {
01620           static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
01621           {
01622             matrix<T, F> temp(proxy.lhs());
01623             lhs -= temp * proxy.rhs();
01624           }
01625       };
01626 
01627 
01629 
01630       // x = y / alpha
01631       template <typename T, typename F, typename ScalarType>
01632       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> >
01633       {
01634         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> const & proxy)
01635         {
01636           viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false);
01637         }
01638       };
01639 
01640       // x += y / alpha
01641       template <typename T, typename F, typename ScalarType>
01642       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> >
01643       {
01644         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> const & proxy)
01645         {
01646           viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false);
01647         }
01648       };
01649 
01650       // x -= y / alpha
01651       template <typename T, typename F, typename ScalarType>
01652       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> >
01653       {
01654         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> const & proxy)
01655         {
01656           viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true);
01657         }
01658       };
01659 
01660 
01662 
01663       // x = vec_expr / alpha
01664       template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
01665       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
01666       {
01667           static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
01668           {
01669             matrix<T, F> temp(proxy.lhs());
01670             lhs = temp / proxy.rhs();
01671           }
01672       };
01673 
01674       // x += vec_expr / alpha
01675       template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
01676       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
01677       {
01678           static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
01679           {
01680             matrix<T, F> temp(proxy.lhs());
01681             lhs += temp / proxy.rhs();
01682           }
01683       };
01684 
01685       // x -= vec_expr / alpha
01686       template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
01687       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
01688       {
01689           static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
01690           {
01691             matrix<T, F> temp(proxy.lhs());
01692             lhs -= temp / proxy.rhs();
01693           }
01694       };
01695 
01696 
01697 
01698       // generic x = vec_expr1 + vec_expr2:
01699       template <typename T, typename F, typename LHS, typename RHS>
01700       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_add> >
01701       {
01702         // generic x = vec_expr1 + vec_expr2:
01703         template <typename LHS1, typename RHS1>
01704         static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
01705         {
01706           bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
01707           bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
01708 
01709           if (op_aliasing_lhs || op_aliasing_rhs)
01710           {
01711             matrix_base<T, F> temp(proxy.lhs());
01712             op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
01713             lhs = temp;
01714           }
01715           else
01716           {
01717             op_executor<matrix_base<T, F>, op_assign, LHS>::apply(lhs, proxy.lhs());
01718             op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
01719           }
01720         }
01721 
01722         // x = y + z
01723         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_add> const & proxy)
01724         {
01725           viennacl::linalg::ambm(lhs,
01726                                  proxy.lhs(), T(1), 1, false, false,
01727                                  proxy.rhs(), T(1), 1, false, false);
01728         }
01729 
01730         // x = alpha * y + z
01731         template <typename ScalarType>
01732         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
01733                                                                   const matrix_base<T, F>,
01734                                                                   op_add> const & proxy)
01735         {
01736           viennacl::linalg::ambm(lhs,
01737                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
01738                                  proxy.rhs(), T(1), 1, false, false);
01739         }
01740 
01741         // x = y / alpha + z
01742         template <typename ScalarType>
01743         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
01744                                                                   const matrix_base<T, F>,
01745                                                                   op_add> const & proxy)
01746         {
01747           viennacl::linalg::ambm(lhs,
01748                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
01749                                  proxy.rhs(), T(1), 1, false, false);
01750         }
01751 
01752         // x = y + beta * z
01753         template <typename ScalarType>
01754         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
01755                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
01756                                                                   op_add> const & proxy)
01757         {
01758           viennacl::linalg::ambm(lhs,
01759                                  proxy.lhs(), T(1), 1, false, false,
01760                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
01761         }
01762 
01763         // x = y + z / beta
01764         template <typename ScalarType>
01765         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
01766                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
01767                                                                   op_add> const & proxy)
01768         {
01769           viennacl::linalg::ambm(lhs,
01770                                  proxy.lhs(), T(1), 1, false, false,
01771                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
01772         }
01773 
01774         // x = alpha * y + beta * z
01775         template <typename ScalarType1, typename ScalarType2>
01776         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
01777                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
01778                                                                   op_add> const & proxy)
01779         {
01780           viennacl::linalg::ambm(lhs,
01781                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
01782                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
01783         }
01784 
01785         // x = alpha * y + z / beta
01786         template <typename ScalarType1, typename ScalarType2>
01787         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
01788                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
01789                                                                   op_add> const & proxy)
01790         {
01791           viennacl::linalg::ambm(lhs,
01792                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
01793                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
01794         }
01795 
01796         // x = y / alpha + beta * z
01797         template <typename ScalarType1, typename ScalarType2>
01798         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
01799                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
01800                                                                   op_add> const & proxy)
01801         {
01802           viennacl::linalg::ambm(lhs,
01803                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
01804                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
01805         }
01806 
01807         // x = y / alpha + z / beta
01808         template <typename ScalarType1, typename ScalarType2>
01809         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
01810                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
01811                                                                   op_add> const & proxy)
01812         {
01813           viennacl::linalg::ambm(lhs,
01814                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
01815                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
01816         }
01817       };
01818 
01819       // dense = sparse * dense
01820       template <typename T, typename F1, typename LHS, typename RHS>
01821       struct op_executor<matrix_base<T, F1>, op_assign, matrix_expression<const LHS, const RHS, op_prod> >
01822       {
01823         template < typename SparseMatrixType, typename F2 >
01824         static void apply(matrix_base<T, F1> & lhs, matrix_expression<const SparseMatrixType,
01825                                                                      const viennacl::matrix_base<T, F2>,
01826                                                                      viennacl::op_prod> const & proxy)
01827         {
01828           viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
01829         }
01830 
01831         // dense = sparse * trans(dense)
01832         template < typename SparseMatrixType, typename F2 >
01833         static void apply(matrix_base<T, F1> & lhs, matrix_expression<const SparseMatrixType,
01834                                                                      const viennacl::matrix_expression< const viennacl::matrix_base<T, F2>,
01835                                                                                                         const viennacl::matrix_base<T, F2>,
01836                                                                                                         viennacl::op_trans >,
01837                                                                      viennacl::op_prod> const & proxy)
01838         {
01839           viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
01840         }
01841 
01842       };
01843 
01844       // generic x += vec_expr1 + vec_expr2:
01845       template <typename T, typename F, typename LHS, typename RHS>
01846       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_add> >
01847       {
01848         // generic x += vec_expr1 + vec_expr2:
01849         template <typename LHS1, typename RHS1>
01850         static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
01851         {
01852           bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
01853           bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
01854 
01855           if (op_aliasing_lhs || op_aliasing_rhs)
01856           {
01857             matrix_base<T, F> temp(proxy.lhs());
01858             op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
01859             lhs += temp;
01860           }
01861           else
01862           {
01863             op_executor<matrix_base<T, F>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
01864             op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
01865           }
01866         }
01867 
01868         // x += y + z
01869         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_add> const & proxy)
01870         {
01871           viennacl::linalg::ambm_m(lhs,
01872                                    proxy.lhs(), T(1), 1, false, false,
01873                                    proxy.rhs(), T(1), 1, false, false);
01874         }
01875 
01876         // x += alpha * y + z
01877         template <typename ScalarType>
01878         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
01879                                                                   const matrix_base<T, F>,
01880                                                                   op_add> const & proxy)
01881         {
01882           viennacl::linalg::ambm_m(lhs,
01883                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
01884                                    proxy.rhs(), T(1), 1, false, false);
01885         }
01886 
01887         // x += y / alpha + z
01888         template <typename ScalarType>
01889         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
01890                                                                   const matrix_base<T, F>,
01891                                                                   op_add> const & proxy)
01892         {
01893           viennacl::linalg::ambm_m(lhs,
01894                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
01895                                    proxy.rhs(), T(1), 1, false, false);
01896         }
01897 
01898         // x += y + beta * z
01899         template <typename ScalarType>
01900         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
01901                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
01902                                                                   op_add> const & proxy)
01903         {
01904           viennacl::linalg::ambm_m(lhs,
01905                                    proxy.lhs(), T(1), 1, false, false,
01906                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
01907         }
01908 
01909         // x += y + z / beta
01910         template <typename ScalarType>
01911         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
01912                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
01913                                                                   op_add> const & proxy)
01914         {
01915           viennacl::linalg::ambm_m(lhs,
01916                                    proxy.lhs(), T(1), 1, false, false,
01917                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
01918         }
01919 
01920         // x += alpha * y + beta * z
01921         template <typename ScalarType1, typename ScalarType2>
01922         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
01923                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
01924                                                                   op_add> const & proxy)
01925         {
01926           viennacl::linalg::ambm_m(lhs,
01927                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
01928                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
01929         }
01930 
01931         // x += alpha * y + z / beta
01932         template <typename ScalarType1, typename ScalarType2>
01933         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
01934                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
01935                                                                   op_add> const & proxy)
01936         {
01937           viennacl::linalg::ambm_m(lhs,
01938                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
01939                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
01940         }
01941 
01942         // x += y / alpha + beta * z
01943         template <typename ScalarType1, typename ScalarType2>
01944         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
01945                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
01946                                                                   op_add> const & proxy)
01947         {
01948           viennacl::linalg::ambm_m(lhs,
01949                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
01950                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
01951         }
01952 
01953         // x += y / alpha + z / beta
01954         template <typename ScalarType1, typename ScalarType2>
01955         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
01956                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
01957                                                                   op_add> const & proxy)
01958         {
01959           viennacl::linalg::ambm_m(lhs,
01960                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
01961                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
01962         }
01963       };
01964 
01965 
01966 
01967       // generic x -= vec_expr1 + vec_expr2:
01968       template <typename T, typename F, typename LHS, typename RHS>
01969       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_add> >
01970       {
01971         // generic x -= vec_expr1 + vec_expr2:
01972         template <typename LHS1, typename RHS1>
01973         static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
01974         {
01975           bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
01976           bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
01977 
01978           if (op_aliasing_lhs || op_aliasing_rhs)
01979           {
01980             matrix_base<T, F> temp(proxy.lhs());
01981             op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
01982             lhs -= temp;
01983           }
01984           else
01985           {
01986             op_executor<matrix_base<T, F>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
01987             op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
01988           }
01989         }
01990 
01991         // x -= y + z
01992         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_add> const & proxy)
01993         {
01994           viennacl::linalg::ambm_m(lhs,
01995                                    proxy.lhs(), T(1), 1, false, true,
01996                                    proxy.rhs(), T(1), 1, false, true);
01997         }
01998 
01999         // x -= alpha * y + z
02000         template <typename ScalarType>
02001         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
02002                                                                   const matrix_base<T, F>,
02003                                                                   op_add> const & proxy)
02004         {
02005           viennacl::linalg::ambm_m(lhs,
02006                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
02007                                    proxy.rhs(), T(1), 1, false, true);
02008         }
02009 
02010         // x -= y / alpha + z
02011         template <typename ScalarType>
02012         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
02013                                                                   const matrix_base<T, F>,
02014                                                                   op_add> const & proxy)
02015         {
02016           viennacl::linalg::ambm_m(lhs,
02017                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
02018                                    proxy.rhs(), T(1), 1, false, true);
02019         }
02020 
02021         // x -= y + beta * z
02022         template <typename ScalarType>
02023         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
02024                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
02025                                                                   op_add> const & proxy)
02026         {
02027           viennacl::linalg::ambm_m(lhs,
02028                                    proxy.lhs(), T(1), 1, false, true,
02029                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
02030         }
02031 
02032         // x -= y + z / beta
02033         template <typename ScalarType>
02034         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
02035                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
02036                                                                   op_add> const & proxy)
02037         {
02038           viennacl::linalg::ambm_m(lhs,
02039                                    proxy.lhs(), T(1), 1, false, true,
02040                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
02041         }
02042 
02043         // x -= alpha * y + beta * z
02044         template <typename ScalarType1, typename ScalarType2>
02045         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
02046                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
02047                                                                   op_add> const & proxy)
02048         {
02049           viennacl::linalg::ambm_m(lhs,
02050                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
02051                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
02052         }
02053 
02054         // x -= alpha * y + z / beta
02055         template <typename ScalarType1, typename ScalarType2>
02056         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
02057                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
02058                                                                   op_add> const & proxy)
02059         {
02060           viennacl::linalg::ambm_m(lhs,
02061                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
02062                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
02063         }
02064 
02065         // x -= y / alpha + beta * z
02066         template <typename ScalarType1, typename ScalarType2>
02067         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
02068                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
02069                                                                   op_add> const & proxy)
02070         {
02071           viennacl::linalg::ambm_m(lhs,
02072                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
02073                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
02074         }
02075 
02076         // x -= y / alpha + z / beta
02077         template <typename ScalarType1, typename ScalarType2>
02078         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
02079                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
02080                                                                   op_add> const & proxy)
02081         {
02082           viennacl::linalg::ambm_m(lhs,
02083                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
02084                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
02085         }
02086       };
02087 
02088 
02089 
02091 
02092 
02093 
02094       // generic x = vec_expr1 - vec_expr2:
02095       template <typename T, typename F, typename LHS, typename RHS>
02096       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_sub> >
02097       {
02098         // generic x = vec_expr1 - vec_expr2:
02099         template <typename LHS1, typename RHS1>
02100         static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
02101         {
02102           bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
02103           bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
02104 
02105           if (op_aliasing_lhs || op_aliasing_rhs)
02106           {
02107             matrix_base<T, F> temp(proxy.lhs());
02108             op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
02109             lhs = temp;
02110           }
02111           else
02112           {
02113             op_executor<matrix_base<T, F>, op_assign, LHS>::apply(lhs, proxy.lhs());
02114             op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
02115           }
02116         }
02117 
02118         // x = y - z
02119         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_sub> const & proxy)
02120         {
02121           viennacl::linalg::ambm(lhs,
02122                                  proxy.lhs(), T(1), 1, false, false,
02123                                  proxy.rhs(), T(1), 1, false, true);
02124         }
02125 
02126         // x = alpha * y - z
02127         template <typename ScalarType>
02128         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
02129                                                                   const matrix_base<T, F>,
02130                                                                   op_sub> const & proxy)
02131         {
02132           viennacl::linalg::ambm(lhs,
02133                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
02134                                  proxy.rhs(), T(1), 1, false, true);
02135         }
02136 
02137         // x = y / alpha - z
02138         template <typename ScalarType>
02139         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
02140                                                                   const matrix_base<T, F>,
02141                                                                   op_sub> const & proxy)
02142         {
02143           viennacl::linalg::ambm(lhs,
02144                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
02145                                  proxy.rhs(), T(1), 1, false, true);
02146         }
02147 
02148         // x = y - beta * z
02149         template <typename ScalarType>
02150         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
02151                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
02152                                                                   op_sub> const & proxy)
02153         {
02154           viennacl::linalg::ambm(lhs,
02155                                  proxy.lhs(), T(1), 1, false, false,
02156                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
02157         }
02158 
02159         // x = y - z / beta
02160         template <typename ScalarType>
02161         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
02162                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
02163                                                                   op_sub> const & proxy)
02164         {
02165           viennacl::linalg::ambm(lhs,
02166                                  proxy.lhs(), T(1), 1, false, false,
02167                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
02168         }
02169 
02170         // x = alpha * y - beta * z
02171         template <typename ScalarType1, typename ScalarType2>
02172         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
02173                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
02174                                                                   op_sub> const & proxy)
02175         {
02176           viennacl::linalg::ambm(lhs,
02177                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
02178                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
02179         }
02180 
02181         // x = alpha * y - z / beta
02182         template <typename ScalarType1, typename ScalarType2>
02183         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
02184                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
02185                                                                   op_sub> const & proxy)
02186         {
02187           viennacl::linalg::ambm(lhs,
02188                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
02189                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
02190         }
02191 
02192         // x = y / alpha - beta * z
02193         template <typename ScalarType1, typename ScalarType2>
02194         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
02195                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
02196                                                                   op_sub> const & proxy)
02197         {
02198           viennacl::linalg::ambm(lhs,
02199                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
02200                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
02201         }
02202 
02203         // x = y / alpha - z / beta
02204         template <typename ScalarType1, typename ScalarType2>
02205         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
02206                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
02207                                                                   op_sub> const & proxy)
02208         {
02209           viennacl::linalg::ambm(lhs,
02210                                  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
02211                                  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
02212         }
02213       };
02214 
02215 
02216       // generic x += vec_expr1 - vec_expr2:
02217       template <typename T, typename F, typename LHS, typename RHS>
02218       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_sub> >
02219       {
02220         // generic x += vec_expr1 - vec_expr2:
02221         template <typename LHS1, typename RHS1>
02222         static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
02223         {
02224           bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
02225           bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
02226 
02227           if (op_aliasing_lhs || op_aliasing_rhs)
02228           {
02229             matrix_base<T, F> temp(proxy.lhs());
02230             op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
02231             lhs += temp;
02232           }
02233           else
02234           {
02235             op_executor<matrix_base<T, F>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
02236             op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
02237           }
02238         }
02239 
02240         // x += y - z
02241         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_sub> const & proxy)
02242         {
02243           viennacl::linalg::ambm_m(lhs,
02244                                    proxy.lhs(), T(1), 1, false, false,
02245                                    proxy.rhs(), T(1), 1, false, true);
02246         }
02247 
02248         // x += alpha * y - z
02249         template <typename ScalarType>
02250         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
02251                                                                   const matrix_base<T, F>,
02252                                                                   op_sub> const & proxy)
02253         {
02254           viennacl::linalg::ambm_m(lhs,
02255                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
02256                                    proxy.rhs(), T(1), 1, false, true);
02257         }
02258 
02259         // x += y / alpha - z
02260         template <typename ScalarType>
02261         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
02262                                                                   const matrix_base<T, F>,
02263                                                                   op_sub> const & proxy)
02264         {
02265           viennacl::linalg::ambm_m(lhs,
02266                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
02267                                    proxy.rhs(), T(1), 1, false, true);
02268         }
02269 
02270         // x += y - beta * z
02271         template <typename ScalarType>
02272         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
02273                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
02274                                                                   op_sub> const & proxy)
02275         {
02276           viennacl::linalg::ambm_m(lhs,
02277                                    proxy.lhs(), T(1), 1, false, false,
02278                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
02279         }
02280 
02281         // x += y - z / beta
02282         template <typename ScalarType>
02283         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
02284                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
02285                                                                   op_sub> const & proxy)
02286         {
02287           viennacl::linalg::ambm_m(lhs,
02288                                    proxy.lhs(), T(1), 1, false, false,
02289                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
02290         }
02291 
02292         // x += alpha * y - beta * z
02293         template <typename ScalarType1, typename ScalarType2>
02294         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
02295                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
02296                                                                   op_sub> const & proxy)
02297         {
02298           viennacl::linalg::ambm_m(lhs,
02299                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
02300                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
02301         }
02302 
02303         // x += alpha * y - z / beta
02304         template <typename ScalarType1, typename ScalarType2>
02305         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
02306                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
02307                                                                   op_sub> const & proxy)
02308         {
02309           viennacl::linalg::ambm_m(lhs,
02310                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
02311                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
02312         }
02313 
02314         // x += y / alpha - beta * z
02315         template <typename ScalarType1, typename ScalarType2>
02316         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
02317                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
02318                                                                   op_sub> const & proxy)
02319         {
02320           viennacl::linalg::ambm_m(lhs,
02321                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
02322                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
02323         }
02324 
02325         // x += y / alpha - z / beta
02326         template <typename ScalarType1, typename ScalarType2>
02327         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
02328                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
02329                                                                   op_sub> const & proxy)
02330         {
02331           viennacl::linalg::ambm_m(lhs,
02332                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
02333                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
02334         }
02335       };
02336 
02337 
02338 
02339       // generic x -= vec_expr1 - vec_expr2:
02340       template <typename T, typename F, typename LHS, typename RHS>
02341       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_sub> >
02342       {
02343         // generic x -= vec_expr1 - vec_expr2:
02344         template <typename LHS1, typename RHS1>
02345         static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
02346         {
02347           bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
02348           bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
02349 
02350           if (op_aliasing_lhs || op_aliasing_rhs)
02351           {
02352             matrix_base<T, F> temp(proxy.lhs());
02353             op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
02354             lhs -= temp;
02355           }
02356           else
02357           {
02358             op_executor<matrix_base<T, F>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
02359             op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
02360           }
02361         }
02362 
02363         // x -= y - z
02364         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_sub> const & proxy)
02365         {
02366           viennacl::linalg::ambm_m(lhs,
02367                                    proxy.lhs(), T(1), 1, false, true,
02368                                    proxy.rhs(), T(1), 1, false, false);
02369         }
02370 
02371         // x -= alpha * y - z
02372         template <typename ScalarType>
02373         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
02374                                                                   const matrix_base<T, F>,
02375                                                                   op_sub> const & proxy)
02376         {
02377           viennacl::linalg::ambm_m(lhs,
02378                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
02379                                    proxy.rhs(), T(1), 1, false, false);
02380         }
02381 
02382         // x -= y / alpha - z
02383         template <typename ScalarType>
02384         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
02385                                                                   const matrix_base<T, F>,
02386                                                                   op_sub> const & proxy)
02387         {
02388           viennacl::linalg::ambm_m(lhs,
02389                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
02390                                    proxy.rhs(), T(1), 1, false, false);
02391         }
02392 
02393         // x -= y - beta * z
02394         template <typename ScalarType>
02395         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
02396                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
02397                                                                   op_sub> const & proxy)
02398         {
02399           viennacl::linalg::ambm_m(lhs,
02400                                    proxy.lhs(), T(1), 1, false, true,
02401                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
02402         }
02403 
02404         // x -= y - z / beta
02405         template <typename ScalarType>
02406         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
02407                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
02408                                                                   op_sub> const & proxy)
02409         {
02410           viennacl::linalg::ambm_m(lhs,
02411                                    proxy.lhs(), T(1), 1, false, true,
02412                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
02413         }
02414 
02415         // x -= alpha * y - beta * z
02416         template <typename ScalarType1, typename ScalarType2>
02417         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
02418                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
02419                                                                   op_sub> const & proxy)
02420         {
02421           viennacl::linalg::ambm_m(lhs,
02422                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
02423                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
02424         }
02425 
02426         // x -= alpha * y - z / beta
02427         template <typename ScalarType1, typename ScalarType2>
02428         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
02429                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
02430                                                                   op_sub> const & proxy)
02431         {
02432           viennacl::linalg::ambm_m(lhs,
02433                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
02434                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
02435         }
02436 
02437         // x -= y / alpha - beta * z
02438         template <typename ScalarType1, typename ScalarType2>
02439         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
02440                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
02441                                                                   op_sub> const & proxy)
02442         {
02443           viennacl::linalg::ambm_m(lhs,
02444                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
02445                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
02446         }
02447 
02448         // x -= y / alpha - z / beta
02449         template <typename ScalarType1, typename ScalarType2>
02450         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
02451                                                                   const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
02452                                                                   op_sub> const & proxy)
02453         {
02454           viennacl::linalg::ambm_m(lhs,
02455                                    proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
02456                                    proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
02457         }
02458       };
02459 
02460 
02462 
02463       template <typename T, typename F, typename LHS>
02464       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const int, op_vector_diag> >
02465       {
02466         static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const int, op_vector_diag> const & proxy)
02467         {
02468           viennacl::linalg::matrix_diag_from_vector(proxy.lhs(), proxy.rhs(), lhs);
02469         }
02470       };
02471 
02472 
02473       template <typename T, typename LHS>
02474       struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const int, op_matrix_diag> >
02475       {
02476         template <typename F>
02477         static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const int, op_matrix_diag> const & proxy)
02478         {
02479           viennacl::linalg::matrix_diag_to_vector(proxy.lhs(), proxy.rhs(), lhs);
02480         }
02481       };
02482 
02483       template <typename T, typename LHS>
02484       struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_row> >
02485       {
02486         template <typename F>
02487         static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const unsigned int, op_row> const & proxy)
02488         {
02489           viennacl::linalg::matrix_row(proxy.lhs(), proxy.rhs(), lhs);
02490         }
02491       };
02492 
02493 
02494       template <typename T, typename LHS>
02495       struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_column> >
02496       {
02497         template <typename F>
02498         static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const unsigned int, op_column> const & proxy)
02499         {
02500           viennacl::linalg::matrix_column(proxy.lhs(), proxy.rhs(), lhs);
02501         }
02502       };
02503 
02504 
02506 
02507       // generic x = mat_expr1 .* mat_expr2:
02508       template <typename T, typename F, typename LHS, typename RHS, typename OP>
02509       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
02510       {
02511         // x = y .* z
02512         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
02513         {
02514           viennacl::linalg::element_op(lhs, proxy);
02515         }
02516 
02517         // x = y .* mat_expr
02518         template <typename LHS2, typename RHS2, typename OP2>
02519         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
02520         {
02521           matrix<T, F> temp(proxy.rhs());
02522           viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(proxy.lhs(), temp));
02523         }
02524 
02525         // x = mat_expr .* z
02526         template <typename LHS1, typename RHS1, typename OP1>
02527         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
02528         {
02529           matrix<T, F> temp(proxy.lhs());
02530           viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp, proxy.rhs()));
02531         }
02532 
02533         // x = mat_expr .* mat_expr
02534         template <typename LHS1, typename RHS1, typename OP1,
02535                   typename LHS2, typename RHS2, typename OP2>
02536         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
02537                                                                   const matrix_expression<const LHS2, const RHS2, OP2>,
02538                                                                   op_element_binary<OP> > const & proxy)
02539         {
02540           matrix<T, F> temp1(proxy.lhs());
02541           matrix<T, F> temp2(proxy.rhs());
02542           viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp1, temp2));
02543         }
02544       };
02545 
02546       // generic x += mat_expr .* mat_expr:
02547       template <typename T, typename F, typename LHS, typename RHS, typename OP>
02548       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
02549       {
02550         // x += y .* z
02551         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
02552         {
02553           viennacl::matrix<T, F> temp(proxy);
02554           lhs += temp;
02555         }
02556 
02557         // x += y .* mat_expr
02558         template <typename LHS2, typename RHS2, typename OP2>
02559         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
02560         {
02561           matrix<T, F> temp(proxy.rhs());
02562           matrix<T, F> temp2(temp.size1(), temp.size2());
02563           viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(proxy.lhs(), temp));
02564           lhs += temp2;
02565         }
02566 
02567         // x += mat_expr .* z
02568         template <typename LHS1, typename RHS1, typename OP1>
02569         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
02570         {
02571           matrix<T, F> temp(proxy.lhs());
02572           matrix<T, F> temp2(temp.size1(), temp.size2());
02573           viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp, proxy.rhs()));
02574           lhs += temp2;
02575         }
02576 
02577         // x += mat_expr .* mat_expr
02578         template <typename LHS1, typename RHS1, typename OP1,
02579                   typename LHS2, typename RHS2, typename OP2>
02580         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
02581                                                                   const matrix_expression<const LHS2, const RHS2, OP2>,
02582                                                                   op_element_binary<OP> > const & proxy)
02583         {
02584           matrix<T, F> temp1(proxy.lhs());
02585           matrix<T, F> temp2(proxy.rhs());
02586           matrix<T, F> temp3(temp1.size1(), temp1.size2());
02587           viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp1, temp2));
02588           lhs += temp3;
02589         }
02590       };
02591 
02592       // generic x -= mat_expr1 .* mat_expr2:
02593       template <typename T, typename F, typename LHS, typename RHS, typename OP>
02594       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
02595       {
02596 
02597         // x -= y .* z
02598         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
02599         {
02600           viennacl::matrix<T, F> temp(proxy);
02601           lhs -= temp;
02602         }
02603 
02604         // x -= y .* mat_expr
02605         template <typename LHS2, typename RHS2, typename OP2>
02606         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
02607         {
02608           matrix<T, F> temp(proxy.rhs());
02609           matrix<T, F> temp2(temp.size1(), temp.size2());
02610           viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(proxy.lhs(), temp));
02611           lhs -= temp2;
02612         }
02613 
02614         // x -= mat_expr .* z
02615         template <typename LHS1, typename RHS1, typename OP1>
02616         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
02617         {
02618           matrix<T, F> temp(proxy.lhs());
02619           matrix<T, F> temp2(temp.size1(), temp.size2());
02620           viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp, proxy.rhs()));
02621           lhs -= temp2;
02622         }
02623 
02624         // x -= mat_expr .* mat_expr
02625         template <typename LHS1, typename RHS1, typename OP1,
02626                   typename LHS2, typename RHS2, typename OP2>
02627         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
02628                                                                      const matrix_expression<const LHS2, const RHS2, OP2>,
02629                                                                      op_element_binary<OP> > const & proxy)
02630         {
02631           matrix<T, F> temp1(proxy.lhs());
02632           matrix<T, F> temp2(proxy.rhs());
02633           matrix<T, F> temp3(temp1.size1(), temp1.size2());
02634           viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp1, temp2));
02635           lhs -= temp3;
02636         }
02637       };
02638 
02640 
02641       template <typename T, typename F, typename LHS, typename RHS, typename OP>
02642       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
02643       {
02644         // x = OP(y)
02645         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> > const & proxy)
02646         {
02647           viennacl::linalg::element_op(lhs, proxy);
02648         }
02649 
02650         // x = OP(vec_expr)
02651         template <typename LHS2, typename RHS2, typename OP2>
02652         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
02653                                                                      const matrix_expression<const LHS2, const RHS2, OP2>,
02654                                                                      op_element_unary<OP> > const & proxy)
02655         {
02656           matrix<T, F> temp(proxy.rhs());
02657           viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> >(temp, temp));
02658         }
02659       };
02660 
02661       template <typename T, typename F, typename LHS, typename RHS, typename OP>
02662       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
02663       {
02664         // x += OP(y)
02665         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> > const & proxy)
02666         {
02667           matrix<T, F> temp(proxy);
02668           lhs += temp;
02669         }
02670 
02671         // x += OP(vec_expr)
02672         template <typename LHS2, typename RHS2, typename OP2>
02673         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
02674                                                                   const matrix_expression<const LHS2, const RHS2, OP2>,
02675                                                                   op_element_unary<OP> > const & proxy)
02676         {
02677           matrix<T, F> temp(proxy.rhs());
02678           viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
02679           lhs += temp;
02680         }
02681       };
02682 
02683       template <typename T, typename F, typename LHS, typename RHS, typename OP>
02684       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
02685       {
02686         // x -= OP(y)
02687         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> > const & proxy)
02688         {
02689           matrix<T, F> temp(proxy);
02690           lhs -= temp;
02691         }
02692 
02693         // x -= OP(vec_expr)
02694         template <typename LHS2, typename RHS2, typename OP2>
02695         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
02696                                                                      const matrix_expression<const LHS2, const RHS2, OP2>,
02697                                                                      op_element_unary<OP> > const & proxy)
02698         {
02699           matrix<T, F> temp(proxy.rhs());
02700           viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
02701           lhs -= temp;
02702         }
02703       };
02704 
02705 
02706 
02708 
02709       // C = A * B
02710       template <typename T, typename F, typename F1, typename F2>
02711       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> >
02712       {
02713         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> const & rhs)
02714         {
02715           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
02716         }
02717       };
02718 
02719       // C = A * B^T
02720       template <typename T, typename F, typename F1, typename F2>
02721       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F1>,
02722                                                                          const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02723                                                                          op_mat_mat_prod> >
02724       {
02725         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>,
02726                                                                      const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02727                                                                      op_mat_mat_prod> const & rhs)
02728         {
02729           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
02730         }
02731       };
02732 
02733       // C = A^T * B
02734       template <typename T, typename F, typename F1, typename F2>
02735       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02736                                                                          const matrix_base<T, F2>,
02737                                                                          op_mat_mat_prod> >
02738       {
02739         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02740                                                                      const matrix_base<T, F2>,
02741                                                                      op_mat_mat_prod> const & rhs)
02742         {
02743           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
02744         }
02745       };
02746 
02747       // C = A^T * B^T
02748       template <typename T, typename F, typename F1, typename F2>
02749       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02750                                                                          const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02751                                                                          op_mat_mat_prod> >
02752       {
02753         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02754                                                                      const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02755                                                                      op_mat_mat_prod> const & rhs)
02756         {
02757           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
02758         }
02759       };
02760 
02761 
02762       // C += A * B
02763       template <typename T, typename F, typename F1, typename F2>
02764       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> >
02765       {
02766         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> const & rhs)
02767         {
02768           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
02769         }
02770       };
02771 
02772       // C += A * B^T
02773       template <typename T, typename F, typename F1, typename F2>
02774       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F1>,
02775                                                                               const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02776                                                                               op_mat_mat_prod> >
02777       {
02778         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>,
02779                                                                      const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02780                                                                      op_mat_mat_prod> const & rhs)
02781         {
02782           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
02783         }
02784       };
02785 
02786       // C += A^T * B
02787       template <typename T, typename F, typename F1, typename F2>
02788       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02789                                                                               const matrix_base<T, F2>,
02790                                                                               op_mat_mat_prod> >
02791       {
02792         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02793                                                                      const matrix_base<T, F2>,
02794                                                                      op_mat_mat_prod> const & rhs)
02795         {
02796           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
02797         }
02798       };
02799 
02800       // C += A^T * B^T
02801       template <typename T, typename F, typename F1, typename F2>
02802       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02803                                                                               const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02804                                                                               op_mat_mat_prod> >
02805       {
02806         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02807                                                                      const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02808                                                                      op_mat_mat_prod> const & rhs)
02809         {
02810           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
02811         }
02812       };
02813 
02814 
02815       // C -= A * B
02816       template <typename T, typename F, typename F1, typename F2>
02817       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> >
02818       {
02819         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> const & rhs)
02820         {
02821           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
02822         }
02823       };
02824 
02825       // C -= A * B^T
02826       template <typename T, typename F, typename F1, typename F2>
02827       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F1>,
02828                                                                               const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02829                                                                               op_mat_mat_prod> >
02830       {
02831         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>,
02832                                                                      const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02833                                                                      op_mat_mat_prod> const & rhs)
02834         {
02835           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
02836         }
02837       };
02838 
02839       // C -= A^T * B
02840       template <typename T, typename F, typename F1, typename F2>
02841       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02842                                                                               const matrix_base<T, F2>,
02843                                                                               op_mat_mat_prod> >
02844       {
02845         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02846                                                                      const matrix_base<T, F2>,
02847                                                                      op_mat_mat_prod> const & rhs)
02848         {
02849           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
02850         }
02851       };
02852 
02853       // C -= A^T * B^T
02854       template <typename T, typename F, typename F1, typename F2>
02855       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02856                                                                               const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02857                                                                               op_mat_mat_prod> >
02858       {
02859         static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
02860                                                                      const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
02861                                                                      op_mat_mat_prod> const & rhs)
02862         {
02863           viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
02864         }
02865       };
02866 
02868 
02869       // y = A * x
02870       template <typename T, typename F>
02871       struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> >
02872       {
02873         static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> const & rhs)
02874         {
02875           // check for x = A * x
02876           if (op_aliasing(lhs, rhs.rhs()))
02877           {
02878             vector_base<T> temp(rhs);
02879             lhs = temp;
02880           }
02881           else
02882             viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
02883         }
02884       };
02885 
02886       // y = A^T * x
02887       template <typename T, typename F>
02888       struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
02889                                                                       const vector_base<T>,
02890                                                                       op_prod> >
02891       {
02892         static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
02893                                                                   const vector_base<T>,
02894                                                                   op_prod> const & rhs)
02895         {
02896           // check for x = A^T * x
02897           if (op_aliasing(lhs, rhs.rhs()))
02898           {
02899             vector_base<T> temp(rhs);
02900             lhs = temp;
02901           }
02902           else
02903             viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
02904         }
02905       };
02906 
02907 
02908       // y += A * x
02909       template <typename T, typename F>
02910       struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> >
02911       {
02912         static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> const & rhs)
02913         {
02914           vector_base<T> temp(rhs);
02915           lhs += temp;
02916         }
02917       };
02918 
02919       // y += A^T * x
02920       template <typename T, typename F>
02921       struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
02922                                                                            const vector_base<T>,
02923                                                                            op_prod> >
02924       {
02925         static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
02926                                                                   const vector_base<T>,
02927                                                                   op_prod> const & rhs)
02928         {
02929           vector_base<T> temp(rhs);
02930           lhs += temp;
02931         }
02932       };
02933 
02934 
02935       // y -= A * x
02936       template <typename T, typename F>
02937       struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> >
02938       {
02939         static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> const & rhs)
02940         {
02941           vector_base<T> temp(rhs);
02942           lhs -= temp;
02943         }
02944       };
02945 
02946       // y -= A^T * x
02947       template <typename T, typename F>
02948       struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
02949                                                                            const vector_base<T>,
02950                                                                            op_prod> >
02951       {
02952         static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
02953                                                                   const vector_base<T>,
02954                                                                   op_prod> const & rhs)
02955         {
02956           vector_base<T> temp(rhs);
02957           lhs -= temp;
02958         }
02959       };
02960 
02961 
02962 
02964 
02965       // A = v1 * v2^T
02966       template <typename T, typename F>
02967       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
02968       {
02969         static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
02970         {
02971           lhs.clear();
02972           viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
02973         }
02974       };
02975 
02976       // A = alpha * v1 * v2^T
02977       template <typename T, typename F, typename ScalarType>
02978       struct op_executor<matrix_base<T, F>, op_assign, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
02979                                                                           const ScalarType,
02980                                                                           op_mult> >
02981       {
02982         static void apply(matrix_base<T, F> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
02983                                                                       const ScalarType,
02984                                                                       op_mult> const & rhs)
02985         {
02986           lhs.clear();
02987           viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
02988         }
02989       };
02990 
02991       // A += v1 * v2^T
02992       template <typename T, typename F>
02993       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
02994       {
02995         static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
02996         {
02997           viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
02998         }
02999       };
03000 
03001       // A += alpha * v1 * v2^T
03002       template <typename T, typename F, typename ScalarType>
03003       struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
03004                                                                                const ScalarType,
03005                                                                                op_mult> >
03006       {
03007         static void apply(matrix_base<T, F> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
03008                                                                       const ScalarType,
03009                                                                       op_mult> const & rhs)
03010         {
03011           viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
03012         }
03013       };
03014 
03015       // A -= v1 * v2^T
03016       template <typename T, typename F>
03017       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
03018       {
03019         static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
03020         {
03021           viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, true, rhs.lhs(), rhs.rhs());
03022         }
03023       };
03024 
03025       // A -= alpha * v1 * v2^T
03026       template <typename T, typename F, typename ScalarType>
03027       struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
03028                                                                                const ScalarType,
03029                                                                                op_mult> >
03030       {
03031         static void apply(matrix_base<T, F> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
03032                                                                       const ScalarType,
03033                                                                       op_mult> const & rhs)
03034         {
03035           viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, true, rhs.lhs().lhs(), rhs.lhs().rhs());
03036         }
03037       };
03038 
03039 
03040     } // namespace detail
03041 
03042   } // namespace linalg
03043 
03046 } //namespace viennacl
03047 
03048 #endif