ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_ 00002 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include "viennacl/forwards.h" 00026 #include "viennacl/scalar.hpp" 00027 #include "viennacl/vector.hpp" 00028 #include "viennacl/vector_proxy.hpp" 00029 #include "viennacl/tools/tools.hpp" 00030 #include "viennacl/meta/enable_if.hpp" 00031 #include "viennacl/meta/predicate.hpp" 00032 #include "viennacl/meta/result_of.hpp" 00033 #include "viennacl/traits/size.hpp" 00034 #include "viennacl/traits/start.hpp" 00035 #include "viennacl/traits/handle.hpp" 00036 #include "viennacl/traits/stride.hpp" 00037 #include "viennacl/linalg/detail/op_applier.hpp" 00038 #include "viennacl/linalg/host_based/common.hpp" 00039 00040 namespace viennacl 00041 { 00042 namespace linalg 00043 { 00044 namespace host_based 00045 { 00046 00047 // 00048 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here! 00049 // 00050 00051 template <typename NumericT, typename F, typename ScalarType1> 00052 void am(matrix_base<NumericT, F> & mat1, 00053 matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha) 00054 { 00055 typedef NumericT value_type; 00056 00057 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1); 00058 value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2); 00059 00060 value_type data_alpha = alpha; 00061 if (flip_sign_alpha) 00062 data_alpha = -data_alpha; 00063 00064 vcl_size_t A_start1 = viennacl::traits::start1(mat1); 00065 vcl_size_t A_start2 = viennacl::traits::start2(mat1); 00066 vcl_size_t A_inc1 = viennacl::traits::stride1(mat1); 00067 vcl_size_t A_inc2 = viennacl::traits::stride2(mat1); 00068 vcl_size_t A_size1 = viennacl::traits::size1(mat1); 00069 vcl_size_t A_size2 = viennacl::traits::size2(mat1); 00070 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1); 00071 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1); 00072 00073 vcl_size_t B_start1 = viennacl::traits::start1(mat2); 00074 vcl_size_t B_start2 = viennacl::traits::start2(mat2); 00075 vcl_size_t B_inc1 = viennacl::traits::stride1(mat2); 00076 vcl_size_t B_inc2 = viennacl::traits::stride2(mat2); 00077 vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2); 00078 vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2); 00079 00080 detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00081 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2); 00082 //typedef typename detail::majority_struct_for_orientation<typename M1::orientation_category>::type index_generator_A; 00083 //typedef typename detail::majority_struct_for_orientation<typename M2::orientation_category>::type index_generator_B; 00084 00085 if (detail::is_row_major(typename F::orientation_category())) 00086 { 00087 if (reciprocal_alpha) 00088 { 00089 #ifdef VIENNACL_WITH_OPENMP 00090 #pragma omp parallel for 00091 #endif 00092 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00093 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00094 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha; 00095 } 00096 else 00097 { 00098 #ifdef VIENNACL_WITH_OPENMP 00099 #pragma omp parallel for 00100 #endif 00101 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00102 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00103 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha; 00104 } 00105 } 00106 else 00107 { 00108 if (reciprocal_alpha) 00109 { 00110 #ifdef VIENNACL_WITH_OPENMP 00111 #pragma omp parallel for 00112 #endif 00113 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00114 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00115 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha; 00116 } 00117 else 00118 { 00119 #ifdef VIENNACL_WITH_OPENMP 00120 #pragma omp parallel for 00121 #endif 00122 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00123 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00124 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha; 00125 } 00126 } 00127 } 00128 00129 00130 template <typename NumericT, typename F, 00131 typename ScalarType1, typename ScalarType2> 00132 void ambm(matrix_base<NumericT, F> & mat1, 00133 matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha, 00134 matrix_base<NumericT, F> const & mat3, ScalarType2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta) 00135 { 00136 typedef NumericT value_type; 00137 00138 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1); 00139 value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2); 00140 value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3); 00141 00142 value_type data_alpha = alpha; 00143 if (flip_sign_alpha) 00144 data_alpha = -data_alpha; 00145 00146 value_type data_beta = beta; 00147 if (flip_sign_beta) 00148 data_beta = -data_beta; 00149 00150 vcl_size_t A_start1 = viennacl::traits::start1(mat1); 00151 vcl_size_t A_start2 = viennacl::traits::start2(mat1); 00152 vcl_size_t A_inc1 = viennacl::traits::stride1(mat1); 00153 vcl_size_t A_inc2 = viennacl::traits::stride2(mat1); 00154 vcl_size_t A_size1 = viennacl::traits::size1(mat1); 00155 vcl_size_t A_size2 = viennacl::traits::size2(mat1); 00156 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1); 00157 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1); 00158 00159 vcl_size_t B_start1 = viennacl::traits::start1(mat2); 00160 vcl_size_t B_start2 = viennacl::traits::start2(mat2); 00161 vcl_size_t B_inc1 = viennacl::traits::stride1(mat2); 00162 vcl_size_t B_inc2 = viennacl::traits::stride2(mat2); 00163 vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2); 00164 vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2); 00165 00166 vcl_size_t C_start1 = viennacl::traits::start1(mat3); 00167 vcl_size_t C_start2 = viennacl::traits::start2(mat3); 00168 vcl_size_t C_inc1 = viennacl::traits::stride1(mat3); 00169 vcl_size_t C_inc2 = viennacl::traits::stride2(mat3); 00170 vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3); 00171 vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3); 00172 00173 detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00174 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2); 00175 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2); 00176 00177 if (detail::is_row_major(typename F::orientation_category())) 00178 { 00179 if (reciprocal_alpha && reciprocal_beta) 00180 { 00181 #ifdef VIENNACL_WITH_OPENMP 00182 #pragma omp parallel for 00183 #endif 00184 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00185 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00186 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta; 00187 } 00188 else if (reciprocal_alpha && !reciprocal_beta) 00189 { 00190 #ifdef VIENNACL_WITH_OPENMP 00191 #pragma omp parallel for 00192 #endif 00193 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00194 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00195 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta; 00196 } 00197 else if (!reciprocal_alpha && reciprocal_beta) 00198 { 00199 #ifdef VIENNACL_WITH_OPENMP 00200 #pragma omp parallel for 00201 #endif 00202 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00203 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00204 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta; 00205 } 00206 else if (!reciprocal_alpha && !reciprocal_beta) 00207 { 00208 #ifdef VIENNACL_WITH_OPENMP 00209 #pragma omp parallel for 00210 #endif 00211 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00212 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00213 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta; 00214 } 00215 } 00216 else 00217 { 00218 if (reciprocal_alpha && reciprocal_beta) 00219 { 00220 #ifdef VIENNACL_WITH_OPENMP 00221 #pragma omp parallel for 00222 #endif 00223 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00224 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00225 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta; 00226 } 00227 else if (reciprocal_alpha && !reciprocal_beta) 00228 { 00229 #ifdef VIENNACL_WITH_OPENMP 00230 #pragma omp parallel for 00231 #endif 00232 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00233 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00234 wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta; 00235 } 00236 else if (!reciprocal_alpha && reciprocal_beta) 00237 { 00238 #ifdef VIENNACL_WITH_OPENMP 00239 #pragma omp parallel for 00240 #endif 00241 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00242 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00243 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta; 00244 } 00245 else if (!reciprocal_alpha && !reciprocal_beta) 00246 { 00247 #ifdef VIENNACL_WITH_OPENMP 00248 #pragma omp parallel for 00249 #endif 00250 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00251 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00252 wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta; 00253 } 00254 } 00255 00256 } 00257 00258 00259 template <typename NumericT, typename F, 00260 typename ScalarType1, typename ScalarType2> 00261 void ambm_m(matrix_base<NumericT, F> & mat1, 00262 matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha, 00263 matrix_base<NumericT, F> const & mat3, ScalarType2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta) 00264 { 00265 typedef NumericT value_type; 00266 00267 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1); 00268 value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2); 00269 value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3); 00270 00271 value_type data_alpha = alpha; 00272 if (flip_sign_alpha) 00273 data_alpha = -data_alpha; 00274 00275 value_type data_beta = beta; 00276 if (flip_sign_beta) 00277 data_beta = -data_beta; 00278 00279 vcl_size_t A_start1 = viennacl::traits::start1(mat1); 00280 vcl_size_t A_start2 = viennacl::traits::start2(mat1); 00281 vcl_size_t A_inc1 = viennacl::traits::stride1(mat1); 00282 vcl_size_t A_inc2 = viennacl::traits::stride2(mat1); 00283 vcl_size_t A_size1 = viennacl::traits::size1(mat1); 00284 vcl_size_t A_size2 = viennacl::traits::size2(mat1); 00285 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1); 00286 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1); 00287 00288 vcl_size_t B_start1 = viennacl::traits::start1(mat2); 00289 vcl_size_t B_start2 = viennacl::traits::start2(mat2); 00290 vcl_size_t B_inc1 = viennacl::traits::stride1(mat2); 00291 vcl_size_t B_inc2 = viennacl::traits::stride2(mat2); 00292 vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2); 00293 vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2); 00294 00295 vcl_size_t C_start1 = viennacl::traits::start1(mat3); 00296 vcl_size_t C_start2 = viennacl::traits::start2(mat3); 00297 vcl_size_t C_inc1 = viennacl::traits::stride1(mat3); 00298 vcl_size_t C_inc2 = viennacl::traits::stride2(mat3); 00299 vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3); 00300 vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3); 00301 00302 detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00303 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2); 00304 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2); 00305 00306 //typedef typename detail::majority_struct_for_orientation<typename M1::orientation_category>::type index_generator_A; 00307 //typedef typename detail::majority_struct_for_orientation<typename M2::orientation_category>::type index_generator_B; 00308 //typedef typename detail::majority_struct_for_orientation<typename M3::orientation_category>::type index_generator_C; 00309 00310 if (detail::is_row_major(typename F::orientation_category())) 00311 { 00312 if (reciprocal_alpha && reciprocal_beta) 00313 { 00314 #ifdef VIENNACL_WITH_OPENMP 00315 #pragma omp parallel for 00316 #endif 00317 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00318 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00319 wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta; 00320 } 00321 else if (reciprocal_alpha && !reciprocal_beta) 00322 { 00323 #ifdef VIENNACL_WITH_OPENMP 00324 #pragma omp parallel for 00325 #endif 00326 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00327 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00328 wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta; 00329 } 00330 else if (!reciprocal_alpha && reciprocal_beta) 00331 { 00332 #ifdef VIENNACL_WITH_OPENMP 00333 #pragma omp parallel for 00334 #endif 00335 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00336 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00337 wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta; 00338 } 00339 else if (!reciprocal_alpha && !reciprocal_beta) 00340 { 00341 #ifdef VIENNACL_WITH_OPENMP 00342 #pragma omp parallel for 00343 #endif 00344 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00345 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00346 wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta; 00347 } 00348 } 00349 else 00350 { 00351 if (reciprocal_alpha && reciprocal_beta) 00352 { 00353 #ifdef VIENNACL_WITH_OPENMP 00354 #pragma omp parallel for 00355 #endif 00356 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00357 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00358 wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta; 00359 } 00360 else if (reciprocal_alpha && !reciprocal_beta) 00361 { 00362 #ifdef VIENNACL_WITH_OPENMP 00363 #pragma omp parallel for 00364 #endif 00365 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00366 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00367 wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta; 00368 } 00369 else if (!reciprocal_alpha && reciprocal_beta) 00370 { 00371 #ifdef VIENNACL_WITH_OPENMP 00372 #pragma omp parallel for 00373 #endif 00374 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00375 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00376 wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta; 00377 } 00378 else if (!reciprocal_alpha && !reciprocal_beta) 00379 { 00380 #ifdef VIENNACL_WITH_OPENMP 00381 #pragma omp parallel for 00382 #endif 00383 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00384 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00385 wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta; 00386 } 00387 } 00388 00389 } 00390 00391 00392 00393 00394 template <typename NumericT, typename F> 00395 void matrix_assign(matrix_base<NumericT, F> & mat, NumericT s, bool clear = false) 00396 { 00397 typedef NumericT value_type; 00398 00399 value_type * data_A = detail::extract_raw_pointer<value_type>(mat); 00400 value_type alpha = static_cast<value_type>(s); 00401 00402 vcl_size_t A_start1 = viennacl::traits::start1(mat); 00403 vcl_size_t A_start2 = viennacl::traits::start2(mat); 00404 vcl_size_t A_inc1 = viennacl::traits::stride1(mat); 00405 vcl_size_t A_inc2 = viennacl::traits::stride2(mat); 00406 vcl_size_t A_size1 = clear ? viennacl::traits::internal_size1(mat) : viennacl::traits::size1(mat); 00407 vcl_size_t A_size2 = clear ? viennacl::traits::internal_size2(mat) : viennacl::traits::size2(mat); 00408 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat); 00409 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat); 00410 00411 detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00412 00413 if (detail::is_row_major(typename F::orientation_category())) 00414 { 00415 #ifdef VIENNACL_WITH_OPENMP 00416 #pragma omp parallel for 00417 #endif 00418 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00419 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00420 wrapper_A(row, col) = alpha; 00421 //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] 00422 // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha; 00423 } 00424 else 00425 { 00426 #ifdef VIENNACL_WITH_OPENMP 00427 #pragma omp parallel for 00428 #endif 00429 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00430 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00431 wrapper_A(row, col) = alpha; 00432 //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] 00433 // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha; 00434 } 00435 } 00436 00437 00438 00439 template <typename NumericT, typename F> 00440 void matrix_diagonal_assign(matrix_base<NumericT, F> & mat, NumericT s) 00441 { 00442 typedef NumericT value_type; 00443 00444 value_type * data_A = detail::extract_raw_pointer<value_type>(mat); 00445 value_type alpha = static_cast<value_type>(s); 00446 00447 vcl_size_t A_start1 = viennacl::traits::start1(mat); 00448 vcl_size_t A_start2 = viennacl::traits::start2(mat); 00449 vcl_size_t A_inc1 = viennacl::traits::stride1(mat); 00450 vcl_size_t A_inc2 = viennacl::traits::stride2(mat); 00451 vcl_size_t A_size1 = viennacl::traits::size1(mat); 00452 //vcl_size_t A_size2 = viennacl::traits::size2(mat); 00453 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat); 00454 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat); 00455 00456 detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00457 00458 #ifdef VIENNACL_WITH_OPENMP 00459 #pragma omp parallel for 00460 #endif 00461 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00462 wrapper_A(row, row) = alpha; 00463 } 00464 00465 template <typename NumericT, typename F> 00466 void matrix_diag_from_vector(const vector_base<NumericT> & vec, int k, matrix_base<NumericT, F> & mat) 00467 { 00468 typedef NumericT value_type; 00469 00470 value_type *data_A = detail::extract_raw_pointer<value_type>(mat); 00471 value_type const *data_vec = detail::extract_raw_pointer<value_type>(vec); 00472 00473 vcl_size_t A_start1 = viennacl::traits::start1(mat); 00474 vcl_size_t A_start2 = viennacl::traits::start2(mat); 00475 vcl_size_t A_inc1 = viennacl::traits::stride1(mat); 00476 vcl_size_t A_inc2 = viennacl::traits::stride2(mat); 00477 //vcl_size_t A_size1 = viennacl::traits::size1(mat); 00478 //vcl_size_t A_size2 = viennacl::traits::size2(mat); 00479 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat); 00480 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat); 00481 00482 vcl_size_t v_start = viennacl::traits::start(vec); 00483 vcl_size_t v_inc = viennacl::traits::stride(vec); 00484 vcl_size_t v_size = viennacl::traits::size(vec); 00485 00486 detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00487 00488 vcl_size_t row_start = 0; 00489 vcl_size_t col_start = 0; 00490 00491 if (k >= 0) 00492 col_start = static_cast<vcl_size_t>(k); 00493 else 00494 row_start = static_cast<vcl_size_t>(-k); 00495 00496 matrix_assign(mat, NumericT(0)); 00497 00498 for (vcl_size_t i = 0; i < v_size; ++i) 00499 wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc]; 00500 00501 } 00502 00503 template <typename NumericT, typename F> 00504 void matrix_diag_to_vector(const matrix_base<NumericT, F> & mat, int k, vector_base<NumericT> & vec) 00505 { 00506 typedef NumericT value_type; 00507 00508 value_type const *data_A = detail::extract_raw_pointer<value_type>(mat); 00509 value_type *data_vec = detail::extract_raw_pointer<value_type>(vec); 00510 00511 vcl_size_t A_start1 = viennacl::traits::start1(mat); 00512 vcl_size_t A_start2 = viennacl::traits::start2(mat); 00513 vcl_size_t A_inc1 = viennacl::traits::stride1(mat); 00514 vcl_size_t A_inc2 = viennacl::traits::stride2(mat); 00515 //vcl_size_t A_size1 = viennacl::traits::size1(mat); 00516 //vcl_size_t A_size2 = viennacl::traits::size2(mat); 00517 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat); 00518 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat); 00519 00520 vcl_size_t v_start = viennacl::traits::start(vec); 00521 vcl_size_t v_inc = viennacl::traits::stride(vec); 00522 vcl_size_t v_size = viennacl::traits::size(vec); 00523 00524 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00525 00526 vcl_size_t row_start = 0; 00527 vcl_size_t col_start = 0; 00528 00529 if (k >= 0) 00530 col_start = static_cast<vcl_size_t>(k); 00531 else 00532 row_start = static_cast<vcl_size_t>(-k); 00533 00534 for (vcl_size_t i = 0; i < v_size; ++i) 00535 data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i); 00536 } 00537 00538 template <typename NumericT, typename F> 00539 void matrix_row(const matrix_base<NumericT, F> & mat, unsigned int i, vector_base<NumericT> & vec) 00540 { 00541 typedef NumericT value_type; 00542 00543 value_type const *data_A = detail::extract_raw_pointer<value_type>(mat); 00544 value_type *data_vec = detail::extract_raw_pointer<value_type>(vec); 00545 00546 vcl_size_t A_start1 = viennacl::traits::start1(mat); 00547 vcl_size_t A_start2 = viennacl::traits::start2(mat); 00548 vcl_size_t A_inc1 = viennacl::traits::stride1(mat); 00549 vcl_size_t A_inc2 = viennacl::traits::stride2(mat); 00550 //vcl_size_t A_size1 = viennacl::traits::size1(mat); 00551 //vcl_size_t A_size2 = viennacl::traits::size2(mat); 00552 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat); 00553 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat); 00554 00555 vcl_size_t v_start = viennacl::traits::start(vec); 00556 vcl_size_t v_inc = viennacl::traits::stride(vec); 00557 vcl_size_t v_size = viennacl::traits::size(vec); 00558 00559 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00560 00561 for (vcl_size_t j = 0; j < v_size; ++j) 00562 data_vec[v_start + j * v_inc] = wrapper_A(i, j); 00563 } 00564 00565 template <typename NumericT, typename F> 00566 void matrix_column(const matrix_base<NumericT, F> & mat, unsigned int j, vector_base<NumericT> & vec) 00567 { 00568 typedef NumericT value_type; 00569 00570 value_type const *data_A = detail::extract_raw_pointer<value_type>(mat); 00571 value_type *data_vec = detail::extract_raw_pointer<value_type>(vec); 00572 00573 vcl_size_t A_start1 = viennacl::traits::start1(mat); 00574 vcl_size_t A_start2 = viennacl::traits::start2(mat); 00575 vcl_size_t A_inc1 = viennacl::traits::stride1(mat); 00576 vcl_size_t A_inc2 = viennacl::traits::stride2(mat); 00577 //vcl_size_t A_size1 = viennacl::traits::size1(mat); 00578 //vcl_size_t A_size2 = viennacl::traits::size2(mat); 00579 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat); 00580 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat); 00581 00582 vcl_size_t v_start = viennacl::traits::start(vec); 00583 vcl_size_t v_inc = viennacl::traits::stride(vec); 00584 vcl_size_t v_size = viennacl::traits::size(vec); 00585 00586 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00587 00588 for (vcl_size_t i = 0; i < v_size; ++i) 00589 data_vec[v_start + i * v_inc] = wrapper_A(i, j); 00590 } 00591 00592 // 00594 // 00595 00596 // Binary operations A = B .* C and A = B ./ C 00597 00603 template <typename NumericT, typename F, typename OP> 00604 void element_op(matrix_base<NumericT, F> & A, 00605 matrix_expression<const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_element_binary<OP> > const & proxy) 00606 { 00607 typedef NumericT value_type; 00608 typedef viennacl::linalg::detail::op_applier<op_element_binary<OP> > OpFunctor; 00609 00610 value_type * data_A = detail::extract_raw_pointer<value_type>(A); 00611 value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs()); 00612 value_type const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs()); 00613 00614 vcl_size_t A_start1 = viennacl::traits::start1(A); 00615 vcl_size_t A_start2 = viennacl::traits::start2(A); 00616 vcl_size_t A_inc1 = viennacl::traits::stride1(A); 00617 vcl_size_t A_inc2 = viennacl::traits::stride2(A); 00618 vcl_size_t A_size1 = viennacl::traits::size1(A); 00619 vcl_size_t A_size2 = viennacl::traits::size2(A); 00620 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A); 00621 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A); 00622 00623 vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs()); 00624 vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs()); 00625 vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs()); 00626 vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs()); 00627 vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs()); 00628 vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs()); 00629 00630 vcl_size_t C_start1 = viennacl::traits::start1(proxy.rhs()); 00631 vcl_size_t C_start2 = viennacl::traits::start2(proxy.rhs()); 00632 vcl_size_t C_inc1 = viennacl::traits::stride1(proxy.rhs()); 00633 vcl_size_t C_inc2 = viennacl::traits::stride2(proxy.rhs()); 00634 vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(proxy.rhs()); 00635 vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(proxy.rhs()); 00636 00637 detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00638 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2); 00639 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2); 00640 00641 if (detail::is_row_major(typename F::orientation_category())) 00642 { 00643 #ifdef VIENNACL_WITH_OPENMP 00644 #pragma omp parallel for 00645 #endif 00646 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00647 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00648 OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col)); 00649 //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] 00650 // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha 00651 // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta; 00652 } 00653 else 00654 { 00655 #ifdef VIENNACL_WITH_OPENMP 00656 #pragma omp parallel for 00657 #endif 00658 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00659 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00660 OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col)); 00661 00662 //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] 00663 // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha 00664 // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta; 00665 } 00666 } 00667 00668 // Unary operations 00669 00670 // A = op(B) 00671 template <typename NumericT, typename F, typename OP> 00672 void element_op(matrix_base<NumericT, F> & A, 00673 matrix_expression<const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_element_unary<OP> > const & proxy) 00674 { 00675 typedef NumericT value_type; 00676 typedef viennacl::linalg::detail::op_applier<op_element_unary<OP> > OpFunctor; 00677 00678 value_type * data_A = detail::extract_raw_pointer<value_type>(A); 00679 value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs()); 00680 00681 vcl_size_t A_start1 = viennacl::traits::start1(A); 00682 vcl_size_t A_start2 = viennacl::traits::start2(A); 00683 vcl_size_t A_inc1 = viennacl::traits::stride1(A); 00684 vcl_size_t A_inc2 = viennacl::traits::stride2(A); 00685 vcl_size_t A_size1 = viennacl::traits::size1(A); 00686 vcl_size_t A_size2 = viennacl::traits::size2(A); 00687 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A); 00688 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A); 00689 00690 vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs()); 00691 vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs()); 00692 vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs()); 00693 vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs()); 00694 vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs()); 00695 vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs()); 00696 00697 detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00698 detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2); 00699 00700 if (detail::is_row_major(typename F::orientation_category())) 00701 { 00702 #ifdef VIENNACL_WITH_OPENMP 00703 #pragma omp parallel for 00704 #endif 00705 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00706 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00707 OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col)); 00708 } 00709 else 00710 { 00711 #ifdef VIENNACL_WITH_OPENMP 00712 #pragma omp parallel for 00713 #endif 00714 for (long col = 0; col < static_cast<long>(A_size2); ++col) 00715 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00716 OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col)); 00717 } 00718 } 00719 00720 00721 00722 // 00724 // 00725 00726 // A * x 00727 00736 template <typename NumericT, typename F> 00737 void prod_impl(const matrix_base<NumericT, F> & mat, 00738 const vector_base<NumericT> & vec, 00739 vector_base<NumericT> & result) 00740 { 00741 typedef NumericT value_type; 00742 00743 value_type const * data_A = detail::extract_raw_pointer<value_type>(mat); 00744 value_type const * data_x = detail::extract_raw_pointer<value_type>(vec); 00745 value_type * data_result = detail::extract_raw_pointer<value_type>(result); 00746 00747 vcl_size_t A_start1 = viennacl::traits::start1(mat); 00748 vcl_size_t A_start2 = viennacl::traits::start2(mat); 00749 vcl_size_t A_inc1 = viennacl::traits::stride1(mat); 00750 vcl_size_t A_inc2 = viennacl::traits::stride2(mat); 00751 vcl_size_t A_size1 = viennacl::traits::size1(mat); 00752 vcl_size_t A_size2 = viennacl::traits::size2(mat); 00753 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat); 00754 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat); 00755 00756 vcl_size_t start1 = viennacl::traits::start(vec); 00757 vcl_size_t inc1 = viennacl::traits::stride(vec); 00758 00759 vcl_size_t start2 = viennacl::traits::start(result); 00760 vcl_size_t inc2 = viennacl::traits::stride(result); 00761 00762 if (detail::is_row_major(typename F::orientation_category())) 00763 { 00764 #ifdef VIENNACL_WITH_OPENMP 00765 #pragma omp parallel for 00766 #endif 00767 for (long row = 0; row < static_cast<long>(A_size1); ++row) 00768 { 00769 value_type temp = 0; 00770 for (vcl_size_t col = 0; col < A_size2; ++col) 00771 temp += data_A[viennacl::row_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1]; 00772 00773 data_result[row * inc2 + start2] = temp; 00774 } 00775 } 00776 else 00777 { 00778 { 00779 value_type temp = data_x[start1]; 00780 for (vcl_size_t row = 0; row < A_size1; ++row) 00781 data_result[row * inc2 + start2] = data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, A_start2, A_internal_size1, A_internal_size2)] * temp; 00782 } 00783 for (vcl_size_t col = 1; col < A_size2; ++col) //run through matrix sequentially 00784 { 00785 value_type temp = data_x[col * inc1 + start1]; 00786 for (vcl_size_t row = 0; row < A_size1; ++row) 00787 data_result[row * inc2 + start2] += data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp; 00788 } 00789 } 00790 } 00791 00792 00793 // trans(A) * x 00794 00803 template <typename NumericT, typename F> 00804 void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_trans> & mat_trans, 00805 const vector_base<NumericT> & vec, 00806 vector_base<NumericT> & result) 00807 { 00808 typedef NumericT value_type; 00809 00810 value_type const * data_A = detail::extract_raw_pointer<value_type>(mat_trans.lhs()); 00811 value_type const * data_x = detail::extract_raw_pointer<value_type>(vec); 00812 value_type * data_result = detail::extract_raw_pointer<value_type>(result); 00813 00814 vcl_size_t A_start1 = viennacl::traits::start1(mat_trans.lhs()); 00815 vcl_size_t A_start2 = viennacl::traits::start2(mat_trans.lhs()); 00816 vcl_size_t A_inc1 = viennacl::traits::stride1(mat_trans.lhs()); 00817 vcl_size_t A_inc2 = viennacl::traits::stride2(mat_trans.lhs()); 00818 vcl_size_t A_size1 = viennacl::traits::size1(mat_trans.lhs()); 00819 vcl_size_t A_size2 = viennacl::traits::size2(mat_trans.lhs()); 00820 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat_trans.lhs()); 00821 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat_trans.lhs()); 00822 00823 vcl_size_t start1 = viennacl::traits::start(vec); 00824 vcl_size_t inc1 = viennacl::traits::stride(vec); 00825 00826 vcl_size_t start2 = viennacl::traits::start(result); 00827 vcl_size_t inc2 = viennacl::traits::stride(result); 00828 00829 if (detail::is_row_major(typename F::orientation_category())) 00830 { 00831 { 00832 value_type temp = data_x[start1]; 00833 for (vcl_size_t row = 0; row < A_size2; ++row) 00834 data_result[row * inc2 + start2] = data_A[viennacl::row_major::mem_index(A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp; 00835 } 00836 00837 for (vcl_size_t col = 1; col < A_size1; ++col) //run through matrix sequentially 00838 { 00839 value_type temp = data_x[col * inc1 + start1]; 00840 for (vcl_size_t row = 0; row < A_size2; ++row) 00841 { 00842 data_result[row * inc2 + start2] += data_A[viennacl::row_major::mem_index(col * A_inc1 + A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp; 00843 } 00844 } 00845 } 00846 else 00847 { 00848 #ifdef VIENNACL_WITH_OPENMP 00849 #pragma omp parallel for 00850 #endif 00851 for (long row = 0; row < static_cast<long>(A_size2); ++row) 00852 { 00853 value_type temp = 0; 00854 for (vcl_size_t col = 0; col < A_size1; ++col) 00855 temp += data_A[viennacl::column_major::mem_index(col * A_inc1 + A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1]; 00856 00857 data_result[row * inc2 + start2] = temp; 00858 } 00859 } 00860 } 00861 00862 00863 // 00865 // 00866 00867 namespace detail 00868 { 00869 template <typename A, typename B, typename C, typename NumericT> 00870 void prod(A & a, B & b, C & c, 00871 vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2, 00872 NumericT alpha, NumericT beta) 00873 { 00874 #ifdef VIENNACL_WITH_OPENMP 00875 #pragma omp parallel for 00876 #endif 00877 for (long i=0; i<static_cast<long>(C_size1); ++i) 00878 { 00879 for (vcl_size_t j=0; j<C_size2; ++j) 00880 { 00881 NumericT temp = 0; 00882 for (vcl_size_t k=0; k<A_size2; ++k) 00883 temp += a(i, k) * b(k, j); 00884 00885 temp *= alpha; 00886 if (beta != 0) 00887 temp += beta * c(i,j); 00888 c(i,j) = temp; 00889 } 00890 } 00891 } 00892 00893 } 00894 00900 template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType > 00901 void prod_impl(const matrix_base<NumericT, F1> & A, 00902 const matrix_base<NumericT, F2> & B, 00903 matrix_base<NumericT, F3> & C, 00904 ScalarType alpha, 00905 ScalarType beta) 00906 { 00907 typedef NumericT value_type; 00908 00909 value_type const * data_A = detail::extract_raw_pointer<value_type>(A); 00910 value_type const * data_B = detail::extract_raw_pointer<value_type>(B); 00911 value_type * data_C = detail::extract_raw_pointer<value_type>(C); 00912 00913 vcl_size_t A_start1 = viennacl::traits::start1(A); 00914 vcl_size_t A_start2 = viennacl::traits::start2(A); 00915 vcl_size_t A_inc1 = viennacl::traits::stride1(A); 00916 vcl_size_t A_inc2 = viennacl::traits::stride2(A); 00917 vcl_size_t A_size2 = viennacl::traits::size2(A); 00918 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A); 00919 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A); 00920 00921 vcl_size_t B_start1 = viennacl::traits::start1(B); 00922 vcl_size_t B_start2 = viennacl::traits::start2(B); 00923 vcl_size_t B_inc1 = viennacl::traits::stride1(B); 00924 vcl_size_t B_inc2 = viennacl::traits::stride2(B); 00925 vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B); 00926 vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B); 00927 00928 vcl_size_t C_start1 = viennacl::traits::start1(C); 00929 vcl_size_t C_start2 = viennacl::traits::start2(C); 00930 vcl_size_t C_inc1 = viennacl::traits::stride1(C); 00931 vcl_size_t C_inc2 = viennacl::traits::stride2(C); 00932 vcl_size_t C_size1 = viennacl::traits::size1(C); 00933 vcl_size_t C_size2 = viennacl::traits::size2(C); 00934 vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C); 00935 vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C); 00936 00937 detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00938 detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2); 00939 detail::matrix_array_wrapper<value_type, typename F3::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2); 00940 00941 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta)); 00942 } 00943 00944 00945 00951 template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType > 00952 void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT, F1>, 00953 const matrix_base<NumericT, F1>, 00954 op_trans> & A, 00955 const matrix_base<NumericT, F2> & B, 00956 matrix_base<NumericT, F3> & C, 00957 ScalarType alpha, 00958 ScalarType beta) 00959 { 00960 typedef NumericT value_type; 00961 00962 value_type const * data_A = detail::extract_raw_pointer<value_type>(A.lhs()); 00963 value_type const * data_B = detail::extract_raw_pointer<value_type>(B); 00964 value_type * data_C = detail::extract_raw_pointer<value_type>(C); 00965 00966 vcl_size_t A_start1 = viennacl::traits::start1(A.lhs()); 00967 vcl_size_t A_start2 = viennacl::traits::start2(A.lhs()); 00968 vcl_size_t A_inc1 = viennacl::traits::stride1(A.lhs()); 00969 vcl_size_t A_inc2 = viennacl::traits::stride2(A.lhs()); 00970 vcl_size_t A_size1 = viennacl::traits::size1(A.lhs()); 00971 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A.lhs()); 00972 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A.lhs()); 00973 00974 vcl_size_t B_start1 = viennacl::traits::start1(B); 00975 vcl_size_t B_start2 = viennacl::traits::start2(B); 00976 vcl_size_t B_inc1 = viennacl::traits::stride1(B); 00977 vcl_size_t B_inc2 = viennacl::traits::stride2(B); 00978 vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B); 00979 vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B); 00980 00981 vcl_size_t C_start1 = viennacl::traits::start1(C); 00982 vcl_size_t C_start2 = viennacl::traits::start2(C); 00983 vcl_size_t C_inc1 = viennacl::traits::stride1(C); 00984 vcl_size_t C_inc2 = viennacl::traits::stride2(C); 00985 vcl_size_t C_size1 = viennacl::traits::size1(C); 00986 vcl_size_t C_size2 = viennacl::traits::size2(C); 00987 vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C); 00988 vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C); 00989 00990 detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 00991 detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2); 00992 detail::matrix_array_wrapper<value_type, typename F3::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2); 00993 00994 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta)); 00995 } 00996 00997 00998 00999 01005 template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType > 01006 void prod_impl(const matrix_base<NumericT, F1> & A, 01007 const viennacl::matrix_expression< const matrix_base<NumericT, F2>, const matrix_base<NumericT, F2>, op_trans> & B, 01008 matrix_base<NumericT, F3> & C, 01009 ScalarType alpha, 01010 ScalarType beta) 01011 { 01012 typedef NumericT value_type; 01013 01014 value_type const * data_A = detail::extract_raw_pointer<value_type>(A); 01015 value_type const * data_B = detail::extract_raw_pointer<value_type>(B.lhs()); 01016 value_type * data_C = detail::extract_raw_pointer<value_type>(C); 01017 01018 vcl_size_t A_start1 = viennacl::traits::start1(A); 01019 vcl_size_t A_start2 = viennacl::traits::start2(A); 01020 vcl_size_t A_inc1 = viennacl::traits::stride1(A); 01021 vcl_size_t A_inc2 = viennacl::traits::stride2(A); 01022 vcl_size_t A_size2 = viennacl::traits::size2(A); 01023 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A); 01024 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A); 01025 01026 vcl_size_t B_start1 = viennacl::traits::start1(B.lhs()); 01027 vcl_size_t B_start2 = viennacl::traits::start2(B.lhs()); 01028 vcl_size_t B_inc1 = viennacl::traits::stride1(B.lhs()); 01029 vcl_size_t B_inc2 = viennacl::traits::stride2(B.lhs()); 01030 vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B.lhs()); 01031 vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B.lhs()); 01032 01033 vcl_size_t C_start1 = viennacl::traits::start1(C); 01034 vcl_size_t C_start2 = viennacl::traits::start2(C); 01035 vcl_size_t C_inc1 = viennacl::traits::stride1(C); 01036 vcl_size_t C_inc2 = viennacl::traits::stride2(C); 01037 vcl_size_t C_size1 = viennacl::traits::size1(C); 01038 vcl_size_t C_size2 = viennacl::traits::size2(C); 01039 vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C); 01040 vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C); 01041 01042 detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 01043 detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2); 01044 detail::matrix_array_wrapper<value_type, typename F3::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2); 01045 01046 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta)); 01047 } 01048 01049 01050 01056 template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType > 01057 void prod_impl(const viennacl::matrix_expression< const matrix_base<NumericT, F1>, const matrix_base<NumericT, F1>, op_trans> & A, 01058 const viennacl::matrix_expression< const matrix_base<NumericT, F2>, const matrix_base<NumericT, F2>, op_trans> & B, 01059 matrix_base<NumericT, F3> & C, 01060 ScalarType alpha, 01061 ScalarType beta) 01062 { 01063 typedef NumericT value_type; 01064 01065 value_type const * data_A = detail::extract_raw_pointer<value_type>(A.lhs()); 01066 value_type const * data_B = detail::extract_raw_pointer<value_type>(B.lhs()); 01067 value_type * data_C = detail::extract_raw_pointer<value_type>(C); 01068 01069 vcl_size_t A_start1 = viennacl::traits::start1(A.lhs()); 01070 vcl_size_t A_start2 = viennacl::traits::start2(A.lhs()); 01071 vcl_size_t A_inc1 = viennacl::traits::stride1(A.lhs()); 01072 vcl_size_t A_inc2 = viennacl::traits::stride2(A.lhs()); 01073 vcl_size_t A_size1 = viennacl::traits::size1(A.lhs()); 01074 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A.lhs()); 01075 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A.lhs()); 01076 01077 vcl_size_t B_start1 = viennacl::traits::start1(B.lhs()); 01078 vcl_size_t B_start2 = viennacl::traits::start2(B.lhs()); 01079 vcl_size_t B_inc1 = viennacl::traits::stride1(B.lhs()); 01080 vcl_size_t B_inc2 = viennacl::traits::stride2(B.lhs()); 01081 vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B.lhs()); 01082 vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B.lhs()); 01083 01084 vcl_size_t C_start1 = viennacl::traits::start1(C); 01085 vcl_size_t C_start2 = viennacl::traits::start2(C); 01086 vcl_size_t C_inc1 = viennacl::traits::stride1(C); 01087 vcl_size_t C_inc2 = viennacl::traits::stride2(C); 01088 vcl_size_t C_size1 = viennacl::traits::size1(C); 01089 vcl_size_t C_size2 = viennacl::traits::size2(C); 01090 vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C); 01091 vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C); 01092 01093 detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2); 01094 detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2); 01095 detail::matrix_array_wrapper<value_type, typename F3::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2); 01096 01097 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta)); 01098 } 01099 01100 01101 01102 01103 // 01105 // 01106 01107 01119 template <typename NumericT, typename F, typename S1> 01120 void scaled_rank_1_update(matrix_base<NumericT, F> & mat1, 01121 S1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha, 01122 const vector_base<NumericT> & vec1, 01123 const vector_base<NumericT> & vec2) 01124 { 01125 typedef NumericT value_type; 01126 01127 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1); 01128 value_type const * data_v1 = detail::extract_raw_pointer<value_type>(vec1); 01129 value_type const * data_v2 = detail::extract_raw_pointer<value_type>(vec2); 01130 01131 vcl_size_t A_start1 = viennacl::traits::start1(mat1); 01132 vcl_size_t A_start2 = viennacl::traits::start2(mat1); 01133 vcl_size_t A_inc1 = viennacl::traits::stride1(mat1); 01134 vcl_size_t A_inc2 = viennacl::traits::stride2(mat1); 01135 vcl_size_t A_size1 = viennacl::traits::size1(mat1); 01136 vcl_size_t A_size2 = viennacl::traits::size2(mat1); 01137 vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1); 01138 vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1); 01139 01140 vcl_size_t start1 = viennacl::traits::start(vec1); 01141 vcl_size_t inc1 = viennacl::traits::stride(vec1); 01142 01143 vcl_size_t start2 = viennacl::traits::start(vec2); 01144 vcl_size_t inc2 = viennacl::traits::stride(vec2); 01145 01146 value_type data_alpha = alpha; 01147 if (flip_sign_alpha) 01148 data_alpha = -data_alpha; 01149 if (reciprocal_alpha) 01150 data_alpha = static_cast<value_type>(1) / data_alpha; 01151 01152 if (detail::is_row_major(typename F::orientation_category())) 01153 { 01154 for (vcl_size_t row = 0; row < A_size1; ++row) 01155 { 01156 value_type value_v1 = data_alpha * data_v1[row * inc1 + start1]; 01157 for (vcl_size_t col = 0; col < A_size2; ++col) 01158 data_A[viennacl::row_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2]; 01159 } 01160 } 01161 else 01162 { 01163 for (vcl_size_t col = 0; col < A_size2; ++col) //run through matrix sequentially 01164 { 01165 value_type value_v2 = data_alpha * data_v2[col * inc2 + start2]; 01166 for (vcl_size_t row = 0; row < A_size1; ++row) 01167 data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[row * inc1 + start1] * value_v2; 01168 } 01169 } 01170 } 01171 01172 } // namespace host_based 01173 } //namespace linalg 01174 } //namespace viennacl 01175 01176 01177 #endif