ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/scalar.hpp"
27 #include "viennacl/vector.hpp"
29 #include "viennacl/tools/tools.hpp"
33 #include "viennacl/traits/size.hpp"
39 
40 namespace viennacl
41 {
42 namespace linalg
43 {
44 namespace host_based
45 {
46 
47 //
48 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
49 //
50 
51 template<typename NumericT,
52  typename SizeT, typename DistanceT>
55 {
56  const NumericT * temp_proxy = detail::extract_raw_pointer<NumericT>(proxy.lhs());
57  NumericT * temp = detail::extract_raw_pointer<NumericT>(temp_trans);
58 
59  vcl_size_t proxy_int_size1=proxy.lhs().internal_size1();
60  vcl_size_t proxy_int_size2=proxy.lhs().internal_size2();
61  vcl_size_t temp_int_size1=temp_trans.internal_size1();
62  vcl_size_t temp_int_size2=temp_trans.internal_size2();
63 
64 #ifdef VIENNACL_WITH_OPENMP
65  #pragma omp parallel for
66 #endif
67  for (long i2 = 0; i2 < static_cast<long>(proxy_int_size1*proxy_int_size2); ++i2)
68  {
69  vcl_size_t row = vcl_size_t(i2) / proxy_int_size2;
70  vcl_size_t col = vcl_size_t(i2) % proxy_int_size2;
71 
72  if (row < proxy.lhs().size1() && col < proxy.lhs().size2())
73  {
74  if (proxy.lhs().row_major())
75  {
76  vcl_size_t pos = row_major::mem_index(proxy.lhs().start1() + proxy.lhs().stride1() * row,
77  proxy.lhs().start2() + proxy.lhs().stride2() * col,
78  proxy_int_size1, proxy_int_size2);
79  vcl_size_t new_pos = row_major::mem_index(temp_trans.start2() + temp_trans.stride2() * col,
80  temp_trans.start1() + temp_trans.stride1() * row, temp_int_size1,
81  temp_int_size2);
82  temp[new_pos] = temp_proxy[pos];
83  }
84  else
85  {
86  vcl_size_t pos = column_major::mem_index(proxy.lhs().start1() + proxy.lhs().stride1() * row,
87  proxy.lhs().start2() + proxy.lhs().stride2() * col, proxy_int_size1,
88  proxy_int_size2);
89  vcl_size_t new_pos = column_major::mem_index(temp_trans.start2() + temp_trans.stride2() * col,
90  temp_trans.start1() + temp_trans.stride1() * row, temp_int_size1,
91  temp_int_size2);
92  temp[new_pos] = temp_proxy[pos];
93  }
94  }
95  }
96 }
97 
98 template<typename NumericT, typename ScalarT1>
100  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha)
101 {
102  assert(mat1.row_major() == mat2.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
103 
104  typedef NumericT value_type;
105 
106  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
107  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
108 
109  value_type data_alpha = alpha;
110  if (flip_sign_alpha)
111  data_alpha = -data_alpha;
112 
113  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
114  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
115  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
116  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
117  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
118  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
119  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
120  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
121 
122  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
123  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
124  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
125  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
126  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
127  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
128 
129  if (mat1.row_major())
130  {
131  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
132  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
133 
134  if (reciprocal_alpha)
135  {
136 #ifdef VIENNACL_WITH_OPENMP
137  #pragma omp parallel for
138 #endif
139  for (long row = 0; row < static_cast<long>(A_size1); ++row)
140  for (vcl_size_t col = 0; col < A_size2; ++col)
141  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
142  }
143  else
144  {
145 #ifdef VIENNACL_WITH_OPENMP
146  #pragma omp parallel for
147 #endif
148  for (long row = 0; row < static_cast<long>(A_size1); ++row)
149  for (vcl_size_t col = 0; col < A_size2; ++col)
150  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
151  }
152  }
153  else
154  {
155  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
156  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
157 
158  if (reciprocal_alpha)
159  {
160 #ifdef VIENNACL_WITH_OPENMP
161  #pragma omp parallel for
162 #endif
163  for (long col = 0; col < static_cast<long>(A_size2); ++col)
164  for (vcl_size_t row = 0; row < A_size1; ++row)
165  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
166  }
167  else
168  {
169 #ifdef VIENNACL_WITH_OPENMP
170  #pragma omp parallel for
171 #endif
172  for (long col = 0; col < static_cast<long>(A_size2); ++col)
173  for (vcl_size_t row = 0; row < A_size1; ++row)
174  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
175  }
176  }
177 }
178 
179 
180 template<typename NumericT,
181  typename ScalarT1, typename ScalarT2>
183  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
184  matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
185 {
186  assert(mat1.row_major() == mat2.row_major() && mat1.row_major() == mat3.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
187 
188  typedef NumericT value_type;
189 
190  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
191  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
192  value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
193 
194  value_type data_alpha = alpha;
195  if (flip_sign_alpha)
196  data_alpha = -data_alpha;
197 
198  value_type data_beta = beta;
199  if (flip_sign_beta)
200  data_beta = -data_beta;
201 
202  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
203  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
204  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
205  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
206  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
207  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
208  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
209  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
210 
211  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
212  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
213  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
214  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
215  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
216  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
217 
218  vcl_size_t C_start1 = viennacl::traits::start1(mat3);
219  vcl_size_t C_start2 = viennacl::traits::start2(mat3);
220  vcl_size_t C_inc1 = viennacl::traits::stride1(mat3);
221  vcl_size_t C_inc2 = viennacl::traits::stride2(mat3);
222  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3);
223  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3);
224 
225  if (mat1.row_major())
226  {
227  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
228  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
229  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
230 
231  if (reciprocal_alpha && reciprocal_beta)
232  {
233 #ifdef VIENNACL_WITH_OPENMP
234  #pragma omp parallel for
235 #endif
236  for (long row = 0; row < static_cast<long>(A_size1); ++row)
237  for (vcl_size_t col = 0; col < A_size2; ++col)
238  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
239  }
240  else if (reciprocal_alpha && !reciprocal_beta)
241  {
242 #ifdef VIENNACL_WITH_OPENMP
243  #pragma omp parallel for
244 #endif
245  for (long row = 0; row < static_cast<long>(A_size1); ++row)
246  for (vcl_size_t col = 0; col < A_size2; ++col)
247  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
248  }
249  else if (!reciprocal_alpha && reciprocal_beta)
250  {
251 #ifdef VIENNACL_WITH_OPENMP
252  #pragma omp parallel for
253 #endif
254  for (long row = 0; row < static_cast<long>(A_size1); ++row)
255  for (vcl_size_t col = 0; col < A_size2; ++col)
256  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
257  }
258  else if (!reciprocal_alpha && !reciprocal_beta)
259  {
260 #ifdef VIENNACL_WITH_OPENMP
261  #pragma omp parallel for
262 #endif
263  for (long row = 0; row < static_cast<long>(A_size1); ++row)
264  for (vcl_size_t col = 0; col < A_size2; ++col)
265  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
266  }
267  }
268  else
269  {
270  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
271  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
272  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
273 
274  if (reciprocal_alpha && reciprocal_beta)
275  {
276 #ifdef VIENNACL_WITH_OPENMP
277  #pragma omp parallel for
278 #endif
279  for (long col = 0; col < static_cast<long>(A_size2); ++col)
280  for (vcl_size_t row = 0; row < A_size1; ++row)
281  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
282  }
283  else if (reciprocal_alpha && !reciprocal_beta)
284  {
285 #ifdef VIENNACL_WITH_OPENMP
286  #pragma omp parallel for
287 #endif
288  for (long col = 0; col < static_cast<long>(A_size2); ++col)
289  for (vcl_size_t row = 0; row < A_size1; ++row)
290  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
291  }
292  else if (!reciprocal_alpha && reciprocal_beta)
293  {
294 #ifdef VIENNACL_WITH_OPENMP
295  #pragma omp parallel for
296 #endif
297  for (long col = 0; col < static_cast<long>(A_size2); ++col)
298  for (vcl_size_t row = 0; row < A_size1; ++row)
299  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
300  }
301  else if (!reciprocal_alpha && !reciprocal_beta)
302  {
303 #ifdef VIENNACL_WITH_OPENMP
304  #pragma omp parallel for
305 #endif
306  for (long col = 0; col < static_cast<long>(A_size2); ++col)
307  for (vcl_size_t row = 0; row < A_size1; ++row)
308  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
309  }
310  }
311 
312 }
313 
314 
315 template<typename NumericT,
316  typename ScalarT1, typename ScalarT2>
318  matrix_base<NumericT> const & mat2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
319  matrix_base<NumericT> const & mat3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
320 {
321  assert(mat1.row_major() == mat2.row_major() && mat1.row_major() == mat3.row_major() && bool("Addition/subtraction on mixed matrix layouts not supported yet!"));
322 
323  typedef NumericT value_type;
324 
325  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
326  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
327  value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
328 
329  value_type data_alpha = alpha;
330  if (flip_sign_alpha)
331  data_alpha = -data_alpha;
332 
333  value_type data_beta = beta;
334  if (flip_sign_beta)
335  data_beta = -data_beta;
336 
337  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
338  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
339  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
340  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
341  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
342  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
343  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
344  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
345 
346  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
347  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
348  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
349  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
350  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
351  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
352 
353  vcl_size_t C_start1 = viennacl::traits::start1(mat3);
354  vcl_size_t C_start2 = viennacl::traits::start2(mat3);
355  vcl_size_t C_inc1 = viennacl::traits::stride1(mat3);
356  vcl_size_t C_inc2 = viennacl::traits::stride2(mat3);
357  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3);
358  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3);
359 
360  if (mat1.row_major())
361  {
362  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
363  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
364  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
365 
366  if (reciprocal_alpha && reciprocal_beta)
367  {
368 #ifdef VIENNACL_WITH_OPENMP
369  #pragma omp parallel for
370 #endif
371  for (long row = 0; row < static_cast<long>(A_size1); ++row)
372  for (vcl_size_t col = 0; col < A_size2; ++col)
373  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
374  }
375  else if (reciprocal_alpha && !reciprocal_beta)
376  {
377 #ifdef VIENNACL_WITH_OPENMP
378  #pragma omp parallel for
379 #endif
380  for (long row = 0; row < static_cast<long>(A_size1); ++row)
381  for (vcl_size_t col = 0; col < A_size2; ++col)
382  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
383  }
384  else if (!reciprocal_alpha && reciprocal_beta)
385  {
386 #ifdef VIENNACL_WITH_OPENMP
387  #pragma omp parallel for
388 #endif
389  for (long row = 0; row < static_cast<long>(A_size1); ++row)
390  for (vcl_size_t col = 0; col < A_size2; ++col)
391  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
392  }
393  else if (!reciprocal_alpha && !reciprocal_beta)
394  {
395 #ifdef VIENNACL_WITH_OPENMP
396  #pragma omp parallel for
397 #endif
398  for (long row = 0; row < static_cast<long>(A_size1); ++row)
399  for (vcl_size_t col = 0; col < A_size2; ++col)
400  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
401  }
402  }
403  else
404  {
405  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
406  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
407  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
408 
409  if (reciprocal_alpha && reciprocal_beta)
410  {
411 #ifdef VIENNACL_WITH_OPENMP
412  #pragma omp parallel for
413 #endif
414  for (long col = 0; col < static_cast<long>(A_size2); ++col)
415  for (vcl_size_t row = 0; row < A_size1; ++row)
416  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
417  }
418  else if (reciprocal_alpha && !reciprocal_beta)
419  {
420 #ifdef VIENNACL_WITH_OPENMP
421  #pragma omp parallel for
422 #endif
423  for (long col = 0; col < static_cast<long>(A_size2); ++col)
424  for (vcl_size_t row = 0; row < A_size1; ++row)
425  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
426  }
427  else if (!reciprocal_alpha && reciprocal_beta)
428  {
429 #ifdef VIENNACL_WITH_OPENMP
430  #pragma omp parallel for
431 #endif
432  for (long col = 0; col < static_cast<long>(A_size2); ++col)
433  for (vcl_size_t row = 0; row < A_size1; ++row)
434  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
435  }
436  else if (!reciprocal_alpha && !reciprocal_beta)
437  {
438 #ifdef VIENNACL_WITH_OPENMP
439  #pragma omp parallel for
440 #endif
441  for (long col = 0; col < static_cast<long>(A_size2); ++col)
442  for (vcl_size_t row = 0; row < A_size1; ++row)
443  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
444  }
445  }
446 
447 }
448 
449 
450 
451 
452 template<typename NumericT>
453 void matrix_assign(matrix_base<NumericT> & mat, NumericT s, bool clear = false)
454 {
455  typedef NumericT value_type;
456 
457  value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
458  value_type alpha = static_cast<value_type>(s);
459 
460  vcl_size_t A_start1 = viennacl::traits::start1(mat);
461  vcl_size_t A_start2 = viennacl::traits::start2(mat);
466  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
467  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
468 
469  if (mat.row_major())
470  {
471  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
472 
473 #ifdef VIENNACL_WITH_OPENMP
474  #pragma omp parallel for
475 #endif
476  for (long row = 0; row < static_cast<long>(A_size1); ++row)
477  for (vcl_size_t col = 0; col < A_size2; ++col)
478  wrapper_A(static_cast<vcl_size_t>(row), col) = alpha;
479  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
480  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
481  }
482  else
483  {
484  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
485 
486 #ifdef VIENNACL_WITH_OPENMP
487  #pragma omp parallel for
488 #endif
489  for (long col = 0; col < static_cast<long>(A_size2); ++col)
490  for (vcl_size_t row = 0; row < A_size1; ++row)
491  wrapper_A(row, static_cast<vcl_size_t>(col)) = alpha;
492  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
493  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
494  }
495 }
496 
497 
498 
499 template<typename NumericT>
501 {
502  typedef NumericT value_type;
503 
504  value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
505  value_type alpha = static_cast<value_type>(s);
506 
507  vcl_size_t A_start1 = viennacl::traits::start1(mat);
508  vcl_size_t A_start2 = viennacl::traits::start2(mat);
511  vcl_size_t A_size1 = viennacl::traits::size1(mat);
512  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
513  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
514  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
515 
516  if (mat.row_major())
517  {
518  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
519 
520 #ifdef VIENNACL_WITH_OPENMP
521  #pragma omp parallel for
522 #endif
523  for (long row = 0; row < static_cast<long>(A_size1); ++row)
524  wrapper_A(row, row) = alpha;
525  }
526  else
527  {
528  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
529 
530 #ifdef VIENNACL_WITH_OPENMP
531  #pragma omp parallel for
532 #endif
533  for (long row = 0; row < static_cast<long>(A_size1); ++row)
534  wrapper_A(row, row) = alpha;
535  }
536 }
537 
538 template<typename NumericT>
540 {
541  typedef NumericT value_type;
542 
543  value_type *data_A = detail::extract_raw_pointer<value_type>(mat);
544  value_type const *data_vec = detail::extract_raw_pointer<value_type>(vec);
545 
546  vcl_size_t A_start1 = viennacl::traits::start1(mat);
547  vcl_size_t A_start2 = viennacl::traits::start2(mat);
550  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
551  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
552  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
553  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
554 
555  vcl_size_t v_start = viennacl::traits::start(vec);
557  vcl_size_t v_size = viennacl::traits::size(vec);
558 
559  vcl_size_t row_start = 0;
560  vcl_size_t col_start = 0;
561 
562  if (k >= 0)
563  col_start = static_cast<vcl_size_t>(k);
564  else
565  row_start = static_cast<vcl_size_t>(-k);
566 
567  matrix_assign(mat, NumericT(0));
568 
569  if (mat.row_major())
570  {
571  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
572 
573  for (vcl_size_t i = 0; i < v_size; ++i)
574  wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
575  }
576  else
577  {
578  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
579 
580  for (vcl_size_t i = 0; i < v_size; ++i)
581  wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
582  }
583 }
584 
585 template<typename NumericT>
587 {
588  typedef NumericT value_type;
589 
590  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
591  value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
592 
593  vcl_size_t A_start1 = viennacl::traits::start1(mat);
594  vcl_size_t A_start2 = viennacl::traits::start2(mat);
597  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
598  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
599  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
600  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
601 
602  vcl_size_t v_start = viennacl::traits::start(vec);
604  vcl_size_t v_size = viennacl::traits::size(vec);
605 
606  vcl_size_t row_start = 0;
607  vcl_size_t col_start = 0;
608 
609  if (k >= 0)
610  col_start = static_cast<vcl_size_t>(k);
611  else
612  row_start = static_cast<vcl_size_t>(-k);
613 
614  if (mat.row_major())
615  {
616  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
617 
618  for (vcl_size_t i = 0; i < v_size; ++i)
619  data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
620  }
621  else
622  {
623  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
624 
625  for (vcl_size_t i = 0; i < v_size; ++i)
626  data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
627  }
628 }
629 
630 template<typename NumericT>
631 void matrix_row(const matrix_base<NumericT> & mat, unsigned int i, vector_base<NumericT> & vec)
632 {
633  typedef NumericT value_type;
634 
635  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
636  value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
637 
638  vcl_size_t A_start1 = viennacl::traits::start1(mat);
639  vcl_size_t A_start2 = viennacl::traits::start2(mat);
642  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
643  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
644  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
645  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
646 
647  vcl_size_t v_start = viennacl::traits::start(vec);
649  vcl_size_t v_size = viennacl::traits::size(vec);
650 
651  if (mat.row_major())
652  {
653  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
654 
655  for (vcl_size_t j = 0; j < v_size; ++j)
656  data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
657  }
658  else
659  {
660  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
661 
662  for (vcl_size_t j = 0; j < v_size; ++j)
663  data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
664  }
665 }
666 
667 template<typename NumericT>
668 void matrix_column(const matrix_base<NumericT> & mat, unsigned int j, vector_base<NumericT> & vec)
669 {
670  typedef NumericT value_type;
671 
672  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
673  value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
674 
675  vcl_size_t A_start1 = viennacl::traits::start1(mat);
676  vcl_size_t A_start2 = viennacl::traits::start2(mat);
679  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
680  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
681  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
682  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
683 
684  vcl_size_t v_start = viennacl::traits::start(vec);
686  vcl_size_t v_size = viennacl::traits::size(vec);
687 
688  if (mat.row_major())
689  {
690  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
691 
692  for (vcl_size_t i = 0; i < v_size; ++i)
693  data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
694  }
695  else
696  {
697  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
698 
699  for (vcl_size_t i = 0; i < v_size; ++i)
700  data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
701  }
702 }
703 
704 //
706 //
707 
708 // Binary operations A = B .* C and A = B ./ C
709 
715 template<typename NumericT, typename OpT>
718 {
719  assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!"));
720 
721  typedef NumericT value_type;
723 
724  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
725  value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
726  value_type const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs());
727 
728  vcl_size_t A_start1 = viennacl::traits::start1(A);
729  vcl_size_t A_start2 = viennacl::traits::start2(A);
732  vcl_size_t A_size1 = viennacl::traits::size1(A);
733  vcl_size_t A_size2 = viennacl::traits::size2(A);
734  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
735  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
736 
737  vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
738  vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
739  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs());
740  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs());
741  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs());
742  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs());
743 
744  vcl_size_t C_start1 = viennacl::traits::start1(proxy.rhs());
745  vcl_size_t C_start2 = viennacl::traits::start2(proxy.rhs());
746  vcl_size_t C_inc1 = viennacl::traits::stride1(proxy.rhs());
747  vcl_size_t C_inc2 = viennacl::traits::stride2(proxy.rhs());
748  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(proxy.rhs());
749  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(proxy.rhs());
750 
751  if (A.row_major())
752  {
753  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
754  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
755  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
756 
757 #ifdef VIENNACL_WITH_OPENMP
758  #pragma omp parallel for
759 #endif
760  for (long row = 0; row < static_cast<long>(A_size1); ++row)
761  for (vcl_size_t col = 0; col < A_size2; ++col)
762  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
763  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
764  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
765  // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
766  }
767  else
768  {
769  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
770  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
771  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
772 
773 #ifdef VIENNACL_WITH_OPENMP
774  #pragma omp parallel for
775 #endif
776  for (long col = 0; col < static_cast<long>(A_size2); ++col)
777  for (vcl_size_t row = 0; row < A_size1; ++row)
778  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
779 
780  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
781  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
782  // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
783  }
784 }
785 
786 // Unary operations
787 
788 // A = op(B)
789 template<typename NumericT, typename OpT>
792 {
793  assert(A.row_major() == proxy.lhs().row_major() && A.row_major() == proxy.rhs().row_major() && bool("Element-wise operations on mixed matrix layouts not supported yet!"));
794 
795  typedef NumericT value_type;
797 
798  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
799  value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
800 
801  vcl_size_t A_start1 = viennacl::traits::start1(A);
802  vcl_size_t A_start2 = viennacl::traits::start2(A);
805  vcl_size_t A_size1 = viennacl::traits::size1(A);
806  vcl_size_t A_size2 = viennacl::traits::size2(A);
807  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
808  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
809 
810  vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
811  vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
812  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs());
813  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs());
814  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs());
815  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs());
816 
817  if (A.row_major())
818  {
819  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
820  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
821 
822 #ifdef VIENNACL_WITH_OPENMP
823  #pragma omp parallel for
824 #endif
825  for (long row = 0; row < static_cast<long>(A_size1); ++row)
826  for (vcl_size_t col = 0; col < A_size2; ++col)
827  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
828  }
829  else
830  {
831  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
832  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
833 
834 #ifdef VIENNACL_WITH_OPENMP
835  #pragma omp parallel for
836 #endif
837  for (long col = 0; col < static_cast<long>(A_size2); ++col)
838  for (vcl_size_t row = 0; row < A_size1; ++row)
839  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
840  }
841 }
842 
843 
844 
845 //
847 //
848 
849 // A * x
850 
860 template<typename NumericT>
861 void prod_impl(const matrix_base<NumericT> & mat, bool trans,
862  const vector_base<NumericT> & vec,
863  vector_base<NumericT> & result)
864 {
865  typedef NumericT value_type;
866 
867  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
868  value_type const * data_x = detail::extract_raw_pointer<value_type>(vec);
869  value_type * data_result = detail::extract_raw_pointer<value_type>(result);
870 
871  vcl_size_t A_start1 = viennacl::traits::start1(mat);
872  vcl_size_t A_start2 = viennacl::traits::start2(mat);
875  vcl_size_t A_size1 = viennacl::traits::size1(mat);
876  vcl_size_t A_size2 = viennacl::traits::size2(mat);
877  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
878  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
879 
882 
884  vcl_size_t inc2 = viennacl::traits::stride(result);
885 
886  if (mat.row_major())
887  {
888  if (trans)
889  {
890  {
891  value_type temp = data_x[start1];
892  for (vcl_size_t row = 0; row < A_size2; ++row)
893  data_result[row * inc2 + start2] = data_A[viennacl::row_major::mem_index(A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
894  }
895 
896  for (vcl_size_t col = 1; col < A_size1; ++col) //run through matrix sequentially
897  {
898  value_type temp = data_x[col * inc1 + start1];
899  for (vcl_size_t row = 0; row < A_size2; ++row)
900  {
901  data_result[row * inc2 + start2] += data_A[viennacl::row_major::mem_index(col * A_inc1 + A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
902  }
903  }
904  }
905  else
906  {
907 #ifdef VIENNACL_WITH_OPENMP
908  #pragma omp parallel for
909 #endif
910  for (long row = 0; row < static_cast<long>(A_size1); ++row)
911  {
912  value_type temp = 0;
913  for (vcl_size_t col = 0; col < A_size2; ++col)
914  temp += data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
915 
916  data_result[static_cast<vcl_size_t>(row) * inc2 + start2] = temp;
917  }
918  }
919  }
920  else
921  {
922  if (!trans)
923  {
924  {
925  value_type temp = data_x[start1];
926  for (vcl_size_t row = 0; row < A_size1; ++row)
927  data_result[row * inc2 + start2] = data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, A_start2, A_internal_size1, A_internal_size2)] * temp;
928  }
929  for (vcl_size_t col = 1; col < A_size2; ++col) //run through matrix sequentially
930  {
931  value_type temp = data_x[col * inc1 + start1];
932  for (vcl_size_t row = 0; row < A_size1; ++row)
933  data_result[row * inc2 + start2] += data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
934  }
935  }
936  else
937  {
938 #ifdef VIENNACL_WITH_OPENMP
939  #pragma omp parallel for
940 #endif
941  for (long row = 0; row < static_cast<long>(A_size2); ++row)
942  {
943  value_type temp = 0;
944  for (vcl_size_t col = 0; col < A_size1; ++col)
945  temp += data_A[viennacl::column_major::mem_index(col * A_inc1 + A_start1, static_cast<vcl_size_t>(row) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
946 
947  data_result[static_cast<vcl_size_t>(row) * inc2 + start2] = temp;
948  }
949  }
950  }
951 }
952 
953 
954 
955 //
957 //
958 
959 namespace detail
960 {
961  template<typename MatrixAccT1, typename MatrixAccT2, typename MatrixAccT3, typename NumericT>
962  void prod(MatrixAccT1 & A, MatrixAccT2 & B, MatrixAccT3 & C,
963  vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2,
964  NumericT alpha, NumericT beta)
965  {
966  if (C_size1 == 0 || C_size2 == 0 || A_size2 == 0)
967  return;
968 
969  static const vcl_size_t blocksize = 64;
970 
971  vcl_size_t num_blocks_C1 = (C_size1 - 1) / blocksize + 1;
972  vcl_size_t num_blocks_C2 = (C_size2 - 1) / blocksize + 1;
973  vcl_size_t num_blocks_A2 = (A_size2 - 1) / blocksize + 1;
974 
975  //
976  // outer loop pair: Run over all blocks with indices (block_idx_i, block_idx_j) of the result matrix C:
977  //
978 #ifdef VIENNACL_WITH_OPENMP
979  #pragma omp parallel for
980 #endif
981  for (long block_idx_i2=0; block_idx_i2<static_cast<long>(num_blocks_C1); ++block_idx_i2)
982  {
983  // thread-local auxiliary buffers
984  std::vector<NumericT> buffer_A(blocksize * blocksize); // row-major
985  std::vector<NumericT> buffer_B(blocksize * blocksize); // column-major
986  std::vector<NumericT> buffer_C(blocksize * blocksize); // row-major
987 
988  vcl_size_t block_idx_i = static_cast<vcl_size_t>(block_idx_i2);
989  for (vcl_size_t block_idx_j=0; block_idx_j<num_blocks_C2; ++block_idx_j)
990  {
991  // Reset block matrix:
992  std::fill(buffer_C.begin(), buffer_C.end(), NumericT(0));
993 
994  vcl_size_t offset_i = block_idx_i*blocksize;
995  vcl_size_t offset_j = block_idx_j*blocksize;
996 
997  // C(block_idx_i, block_idx_i) += A(block_idx_i, block_idx_k) * B(block_idx_k, block_idx_j)
998  for (vcl_size_t block_idx_k=0; block_idx_k<num_blocks_A2; ++block_idx_k)
999  {
1000  // flush buffers:
1001  std::fill(buffer_A.begin(), buffer_A.end(), NumericT(0));
1002  std::fill(buffer_B.begin(), buffer_B.end(), NumericT(0));
1003 
1004  vcl_size_t offset_k = block_idx_k*blocksize;
1005 
1006  // load current data:
1007  for (vcl_size_t i = offset_i; i < std::min(offset_i + blocksize, C_size1); ++i)
1008  for (vcl_size_t k = offset_k; k < std::min(offset_k + blocksize, A_size2); ++k)
1009  buffer_A[(i - offset_i) * blocksize + (k - offset_k)] = A(i, k);
1010 
1011  for (vcl_size_t j = offset_j; j < std::min(offset_j + blocksize, C_size2); ++j)
1012  for (vcl_size_t k = offset_k; k < std::min(offset_k + blocksize, A_size2); ++k)
1013  buffer_B[(k - offset_k) + (j - offset_j) * blocksize] = B(k, j);
1014 
1015  // multiply (this is the hot spot in terms of flops)
1016  for (vcl_size_t i = 0; i < blocksize; ++i)
1017  {
1018  NumericT const * ptrA = &(buffer_A[i*blocksize]);
1019  for (vcl_size_t j = 0; j < blocksize; ++j)
1020  {
1021  NumericT const * ptrB = &(buffer_B[j*blocksize]);
1022 
1023  NumericT temp = NumericT(0);
1024  for (vcl_size_t k = 0; k < blocksize; ++k)
1025  temp += ptrA[k] * ptrB[k]; // buffer_A[i*blocksize + k] * buffer_B[k + j*blocksize];
1026 
1027  buffer_C[i*blocksize + j] += temp;
1028  }
1029  }
1030  }
1031 
1032  // write result:
1033  if (beta > 0 || beta < 0)
1034  {
1035  for (vcl_size_t i = offset_i; i < std::min(offset_i + blocksize, C_size1); ++i)
1036  for (vcl_size_t j = offset_j; j < std::min(offset_j + blocksize, C_size2); ++j)
1037  C(i,j) = beta * C(i,j) + alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1038  }
1039  else
1040  {
1041  for (vcl_size_t i = offset_i; i < std::min(offset_i + blocksize, C_size1); ++i)
1042  for (vcl_size_t j = offset_j; j < std::min(offset_j + blocksize, C_size2); ++j)
1043  C(i,j) = alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1044  }
1045 
1046  } // for block j
1047  } // for block i
1048 
1049  } // prod()
1050 
1051 } // namespace detail
1052 
1058 template<typename NumericT, typename ScalarT1, typename ScalarT2 >
1059 void prod_impl(const matrix_base<NumericT> & A, bool trans_A,
1060  const matrix_base<NumericT> & B, bool trans_B,
1062  ScalarT1 alpha,
1063  ScalarT2 beta)
1064 {
1065  typedef NumericT value_type;
1066 
1067  value_type const * data_A = detail::extract_raw_pointer<value_type>(A);
1068  value_type const * data_B = detail::extract_raw_pointer<value_type>(B);
1069  value_type * data_C = detail::extract_raw_pointer<value_type>(C);
1070 
1071  vcl_size_t A_start1 = viennacl::traits::start1(A);
1072  vcl_size_t A_start2 = viennacl::traits::start2(A);
1075  vcl_size_t A_size1 = viennacl::traits::size1(A);
1076  vcl_size_t A_size2 = viennacl::traits::size2(A);
1077  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1078  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1079 
1080  vcl_size_t B_start1 = viennacl::traits::start1(B);
1081  vcl_size_t B_start2 = viennacl::traits::start2(B);
1084  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B);
1085  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B);
1086 
1087  vcl_size_t C_start1 = viennacl::traits::start1(C);
1088  vcl_size_t C_start2 = viennacl::traits::start2(C);
1091  vcl_size_t C_size1 = viennacl::traits::size1(C);
1092  vcl_size_t C_size2 = viennacl::traits::size2(C);
1093  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C);
1094  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C);
1095 
1096  if (!trans_A && !trans_B)
1097  {
1098  if (A.row_major() && B.row_major() && C.row_major())
1099  {
1100  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1101  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1102  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1103 
1104  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1105  }
1106  else if (A.row_major() && B.row_major() && !C.row_major())
1107  {
1108  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1109  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1110  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1111 
1112  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1113  }
1114  else if (A.row_major() && !B.row_major() && C.row_major())
1115  {
1116  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1117  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1118  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1119 
1120  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1121  }
1122  else if (A.row_major() && !B.row_major() && !C.row_major())
1123  {
1124  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1125  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1126  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1127 
1128  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1129  }
1130  else if (!A.row_major() && B.row_major() && C.row_major())
1131  {
1132  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1133  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1134  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1135 
1136  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1137  }
1138  else if (!A.row_major() && B.row_major() && !C.row_major())
1139  {
1140  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1141  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1142  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1143 
1144  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1145  }
1146  else if (!A.row_major() && !B.row_major() && C.row_major())
1147  {
1148  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1149  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1150  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1151 
1152  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1153  }
1154  else
1155  {
1156  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1157  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1158  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1159 
1160  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1161  }
1162  }
1163  else if (!trans_A && trans_B)
1164  {
1165  if (A.row_major() && B.row_major() && C.row_major())
1166  {
1167  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1168  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1169  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1170 
1171  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1172  }
1173  else if (A.row_major() && B.row_major() && !C.row_major())
1174  {
1175  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1176  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1177  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1178 
1179  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1180  }
1181  else if (A.row_major() && !B.row_major() && C.row_major())
1182  {
1183  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1184  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1185  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1186 
1187  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1188  }
1189  else if (A.row_major() && !B.row_major() && !C.row_major())
1190  {
1191  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1192  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1193  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1194 
1195  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1196  }
1197  else if (!A.row_major() && B.row_major() && C.row_major())
1198  {
1199  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1200  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1201  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1202 
1203  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1204  }
1205  else if (!A.row_major() && B.row_major() && !C.row_major())
1206  {
1207  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1208  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1209  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1210 
1211  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1212  }
1213  else if (!A.row_major() && !B.row_major() && C.row_major())
1214  {
1215  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1216  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1217  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1218 
1219  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1220  }
1221  else
1222  {
1223  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1224  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1225  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1226 
1227  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1228  }
1229  }
1230  else if (trans_A && !trans_B)
1231  {
1232  if (A.row_major() && B.row_major() && C.row_major())
1233  {
1234  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1235  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1236  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1237 
1238  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1239  }
1240  else if (A.row_major() && B.row_major() && !C.row_major())
1241  {
1242  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1243  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1244  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1245 
1246  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1247  }
1248  else if (A.row_major() && !B.row_major() && C.row_major())
1249  {
1250  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1251  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1252  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1253 
1254  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1255  }
1256  else if (A.row_major() && !B.row_major() && !C.row_major())
1257  {
1258  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1259  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1260  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1261 
1262  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1263  }
1264  else if (!A.row_major() && B.row_major() && C.row_major())
1265  {
1266  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1267  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1268  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1269 
1270  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1271  }
1272  else if (!A.row_major() && B.row_major() && !C.row_major())
1273  {
1274  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1275  detail::matrix_array_wrapper<value_type const, row_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1276  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1277 
1278  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1279  }
1280  else if (!A.row_major() && !B.row_major() && C.row_major())
1281  {
1282  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1283  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1284  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1285 
1286  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1287  }
1288  else
1289  {
1290  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1291  detail::matrix_array_wrapper<value_type const, column_major, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1292  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1293 
1294  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1295  }
1296  }
1297  else if (trans_A && trans_B)
1298  {
1299  if (A.row_major() && B.row_major() && C.row_major())
1300  {
1301  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1302  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1303  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1304 
1305  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1306  }
1307  else if (A.row_major() && B.row_major() && !C.row_major())
1308  {
1309  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1310  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1311  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1312 
1313  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1314  }
1315  else if (A.row_major() && !B.row_major() && C.row_major())
1316  {
1317  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1318  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1319  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1320 
1321  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1322  }
1323  else if (A.row_major() && !B.row_major() && !C.row_major())
1324  {
1325  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1326  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1327  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1328 
1329  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1330  }
1331  else if (!A.row_major() && B.row_major() && C.row_major())
1332  {
1333  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1334  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1335  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1336 
1337  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1338  }
1339  else if (!A.row_major() && B.row_major() && !C.row_major())
1340  {
1341  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1342  detail::matrix_array_wrapper<value_type const, row_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1343  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1344 
1345  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1346  }
1347  else if (!A.row_major() && !B.row_major() && C.row_major())
1348  {
1349  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1350  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1351  detail::matrix_array_wrapper<value_type, row_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1352 
1353  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1354  }
1355  else
1356  {
1357  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1358  detail::matrix_array_wrapper<value_type const, column_major, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1359  detail::matrix_array_wrapper<value_type, column_major, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1360 
1361  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1362  }
1363  }
1364 }
1365 
1366 
1367 
1368 
1369 //
1371 //
1372 
1373 
1385 template<typename NumericT, typename ScalarT>
1387  ScalarT const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
1388  const vector_base<NumericT> & vec1,
1389  const vector_base<NumericT> & vec2)
1390 {
1391  typedef NumericT value_type;
1392 
1393  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
1394  value_type const * data_v1 = detail::extract_raw_pointer<value_type>(vec1);
1395  value_type const * data_v2 = detail::extract_raw_pointer<value_type>(vec2);
1396 
1397  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
1398  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
1399  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
1400  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
1401  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
1402  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
1403  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
1404  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
1405 
1407  vcl_size_t inc1 = viennacl::traits::stride(vec1);
1408 
1410  vcl_size_t inc2 = viennacl::traits::stride(vec2);
1411 
1412  value_type data_alpha = alpha;
1413  if (flip_sign_alpha)
1414  data_alpha = -data_alpha;
1415  if (reciprocal_alpha)
1416  data_alpha = static_cast<value_type>(1) / data_alpha;
1417 
1418  if (mat1.row_major())
1419  {
1420  for (vcl_size_t row = 0; row < A_size1; ++row)
1421  {
1422  value_type value_v1 = data_alpha * data_v1[row * inc1 + start1];
1423  for (vcl_size_t col = 0; col < A_size2; ++col)
1424  data_A[viennacl::row_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
1425  }
1426  }
1427  else
1428  {
1429  for (vcl_size_t col = 0; col < A_size2; ++col) //run through matrix sequentially
1430  {
1431  value_type value_v2 = data_alpha * data_v2[col * inc2 + start2];
1432  for (vcl_size_t row = 0; row < A_size1; ++row)
1433  data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[row * inc1 + start1] * value_v2;
1434  }
1435  }
1436 }
1437 
1438 
1446 template <typename NumericT, typename S1>
1448  vector_base<S1> & D,
1449  vector_base<S1> & S
1450  )
1451 
1452  {
1453  typedef NumericT value_type;
1454 
1455  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1456  value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1457  value_type * data_S = detail::extract_raw_pointer<value_type>(S);
1458 
1459  vcl_size_t A_start1 = viennacl::traits::start1(A);
1460  vcl_size_t A_start2 = viennacl::traits::start2(A);
1463  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1464  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1465 
1469 
1473 
1474  vcl_size_t size = std::min(size1, size2);
1475  if (A.row_major())
1476  {
1477 #ifdef VIENNACL_WITH_OPENMP
1478  #pragma omp parallel for
1479 #endif
1480  for(long i2 = 0; i2 < long(size) - 1; i2++)
1481  {
1482  vcl_size_t i = vcl_size_t(i2);
1483  data_D[start1 + inc1 * i] = data_A[viennacl::row_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1484  data_S[start2 + inc2 * (i + 1)] = data_A[viennacl::row_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1485  }
1486  data_D[start1 + inc1 * (size-1)] = data_A[viennacl::row_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1487 
1488  }
1489  else
1490  {
1491 #ifdef VIENNACL_WITH_OPENMP
1492  #pragma omp parallel for
1493 #endif
1494  for(long i2 = 0; i2 < long(size) - 1; i2++)
1495  {
1496  vcl_size_t i = vcl_size_t(i2);
1497  data_D[start1 + inc1 * i] = data_A[viennacl::column_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1498  data_S[start2 + inc2 * (i + 1)] = data_A[viennacl::column_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1499  }
1500  data_D[start1 + inc1 * (size-1)] = data_A[viennacl::column_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1501  }
1502 
1503  }
1504 
1505 
1506 
1507  template <typename NumericT, typename VectorType>
1509  VectorType & dh,
1510  VectorType & sh
1511  )
1512  {
1513 
1515 
1516  }
1517 
1524  template <typename NumericT>
1527  vcl_size_t start)
1528  {
1529  typedef NumericT value_type;
1530  NumericT ss = 0;
1531  vcl_size_t row_start = start + 1;
1532 
1533  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1534  value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1535 
1536  vcl_size_t A_start1 = viennacl::traits::start1(A);
1537  vcl_size_t A_start2 = viennacl::traits::start2(A);
1540  vcl_size_t A_size1 = viennacl::traits::size1(A);
1541  vcl_size_t A_size2 = viennacl::traits::size2(A);
1542  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1543  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1544 
1547 
1548  if (A.row_major())
1549  {
1550  for(vcl_size_t i = 0; i < A_size2; i++)
1551  {
1552  ss = 0;
1553  for(vcl_size_t j = row_start; j < A_size1; j++)
1554  ss = ss + data_D[start1 + inc1 * j] * data_A[viennacl::row_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1555 #ifdef VIENNACL_WITH_OPENMP
1556  #pragma omp parallel for
1557 #endif
1558  for(long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1559  data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1560  data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
1561  (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)]* ss);
1562  }
1563  }
1564  else
1565  {
1566  for(vcl_size_t i = 0; i < A_size2; i++)
1567  {
1568  ss = 0;
1569  for(vcl_size_t j = row_start; j < A_size1; j++)
1570  ss = ss + data_D[start1 + inc1 * j] * data_A[viennacl::column_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1571 #ifdef VIENNACL_WITH_OPENMP
1572  #pragma omp parallel for
1573 #endif
1574  for(long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1575  data_A[viennacl::column_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]=
1576  data_A[viennacl::column_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
1577  (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)]* ss);
1578  }
1579  }
1580 
1581  }
1582 
1589  template <typename NumericT>
1592  {
1593  typedef NumericT value_type;
1594  NumericT ss = 0;
1595 
1596  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1597  value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1598 
1599  vcl_size_t A_start1 = viennacl::traits::start1(A);
1600  vcl_size_t A_start2 = viennacl::traits::start2(A);
1603  vcl_size_t A_size1 = viennacl::traits::size1(A);
1604  vcl_size_t A_size2 = viennacl::traits::size2(A);
1605  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1606  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1607 
1610 
1611  if (A.row_major())
1612  {
1613  for(vcl_size_t i = 0; i < A_size1; i++)
1614  {
1615  ss = 0;
1616  for(vcl_size_t j = 0; j < A_size2; j++) // ss = ss + D[j] * A(i, j)
1617  ss = ss + (data_D[start1 + inc1 * j] * data_A[viennacl::row_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1618 
1619  NumericT sum_Av = ss;
1620 #ifdef VIENNACL_WITH_OPENMP
1621  #pragma omp parallel for
1622 #endif
1623  for(long j = 0; j < static_cast<long>(A_size2); j++) // A(i, j) = A(i, j) - 2 * D[j] * sum_Av
1624  data_A[viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1625  data_A[viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)] * sum_Av);
1626  }
1627  }
1628  else
1629  {
1630  for(vcl_size_t i = 0; i < A_size1; i++)
1631  {
1632  ss = 0;
1633  for(vcl_size_t j = 0; j < A_size2; j++) // ss = ss + D[j] * A(i, j)
1634  ss = ss + (data_D[start1 + inc1 * j] * data_A[viennacl::column_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1635 
1636  NumericT sum_Av = ss;
1637 #ifdef VIENNACL_WITH_OPENMP
1638  #pragma omp parallel for
1639 #endif
1640  for(long j = 0; j < static_cast<long>(A_size2); j++) // A(i, j) = A(i, j) - 2 * D[j] * sum_Av
1641  data_A[viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1642  data_A[viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 * static_cast<vcl_size_t>(j)] * sum_Av);
1643  }
1644  }
1645 
1646 
1647  }
1648 
1655  template <typename NumericT>
1658  vcl_size_t A_size1)
1659 
1660  {
1661  NumericT beta = 2;
1663  viennacl::matrix<NumericT> Q_temp = Q;
1664  viennacl::vector<NumericT> vcl_D = D;
1665 
1666 
1667  viennacl::linalg::host_based::scaled_rank_1_update(vcl_P, beta, 1, 0, 1, vcl_D, vcl_D);
1668  Q = prod(Q_temp, vcl_P);
1669 
1670  }
1671 
1681  template<typename NumericT>
1683  vector_base<NumericT> & tmp1,
1684  vector_base<NumericT> & tmp2,
1685  int l,
1686  int m
1687  )
1688  {
1689  typedef NumericT value_type;
1690 
1691  value_type * data_Q = detail::extract_raw_pointer<value_type>(Q);
1692  value_type * data_tmp1 = detail::extract_raw_pointer<value_type>(tmp1);
1693  value_type * data_tmp2 = detail::extract_raw_pointer<value_type>(tmp2);
1694 
1695  vcl_size_t Q_start1 = viennacl::traits::start1(Q);
1696  vcl_size_t Q_start2 = viennacl::traits::start2(Q);
1699  vcl_size_t Q_size1 = viennacl::traits::size1(Q);
1700  vcl_size_t Q_internal_size1 = viennacl::traits::internal_size1(Q);
1701  vcl_size_t Q_internal_size2 = viennacl::traits::internal_size2(Q);
1702 
1704  vcl_size_t inc1 = viennacl::traits::stride(tmp1);
1705 
1707  vcl_size_t inc2 = viennacl::traits::stride(tmp2);
1708 
1709  if (Q.row_major())
1710  {
1711  for( int i = m - 1; i >= l; i--)
1712  {
1713 #ifdef VIENNACL_WITH_OPENMP
1714  #pragma omp parallel for
1715 #endif
1716  for(long k = 0; k < static_cast<long>(Q_size1); k++)
1717  {
1718 
1719  // h = data_Q(k, i+1);
1720  NumericT h = data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)];
1721 
1722  // Q(k, i+1) = tmp2[i] * Q(k, i) + tmp1[i]*h;
1723  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp2[start2 + inc2 * vcl_size_t(i)] *
1724  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] + data_tmp1[start1 + inc1 * vcl_size_t(i)] * h;
1725 
1726  // Q(k, i) = tmp1[i] * Q(k, i) - tmp2[i]*h;
1727  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp1[start1 + inc1 * vcl_size_t(i)] *
1728  data_Q[viennacl::row_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] - data_tmp2[start2 + inc2 * vcl_size_t(i)]*h;
1729  }
1730  }
1731  }
1732  else // column_major
1733  {
1734  for( int i = m - 1; i >= l; i--)
1735  {
1736 #ifdef VIENNACL_WITH_OPENMP
1737  #pragma omp parallel for
1738 #endif
1739  for(long k = 0; k < static_cast<long>(Q_size1); k++)
1740  {
1741 
1742  // h = data_Q(k, i+1);
1743  NumericT h = data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)];
1744 
1745  // Q(k, i+1) = tmp2[i] * Q(k, i) + tmp1[i]*h;
1746  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i + 1) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp2[start2 + inc2 * vcl_size_t(i)] *
1747  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] + data_tmp1[start1 + inc1 * vcl_size_t(i)] * h;
1748 
1749  // Q(k, i) = tmp1[i] * Q(k, i) - tmp2[i]*h;
1750  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] = data_tmp1[start1 + inc1 * vcl_size_t(i)] *
1751  data_Q[viennacl::column_major::mem_index(static_cast<vcl_size_t>(k) * Q_inc1 + Q_start1, vcl_size_t(i) * Q_inc2 + Q_start2, Q_internal_size1, Q_internal_size2)] - data_tmp2[start2 + inc2 * vcl_size_t(i)]*h;
1752  }
1753  }
1754  }
1755 
1756  }
1757 
1758 
1768  template <typename NumericT, typename S1>
1770  vector_base<S1> & V,
1771  vcl_size_t row_start,
1772  vcl_size_t col_start,
1773  bool copy_col
1774  )
1775  {
1776  typedef NumericT value_type;
1777 
1778  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1779  value_type * data_V = detail::extract_raw_pointer<value_type>(V);
1780 
1781  vcl_size_t A_start1 = viennacl::traits::start1(A);
1782  vcl_size_t A_start2 = viennacl::traits::start2(A);
1785  vcl_size_t A_size1 = viennacl::traits::size1(A);
1786  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1787  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1788 
1789 
1790  if(copy_col)
1791  {
1792  if (A.row_major())
1793  {
1794 #ifdef VIENNACL_WITH_OPENMP
1795  #pragma omp parallel for
1796 #endif
1797  for(long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
1798  {
1799  data_V[i - static_cast<long>(row_start)] = data_A[viennacl::row_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1800  }
1801  }
1802  else
1803  {
1804 #ifdef VIENNACL_WITH_OPENMP
1805  #pragma omp parallel for
1806 #endif
1807  for(long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
1808  {
1809  data_V[i - static_cast<long>(row_start)] = data_A[viennacl::column_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1810  }
1811  }
1812  }
1813  else
1814  {
1815  if (A.row_major())
1816  {
1817 #ifdef VIENNACL_WITH_OPENMP
1818  #pragma omp parallel for
1819 #endif
1820  for(long i = static_cast<long>(col_start); i < static_cast<long>(A_size1); i++)
1821  {
1822  data_V[i - static_cast<long>(col_start)] = data_A[viennacl::row_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1823  }
1824  }
1825  else
1826  {
1827 #ifdef VIENNACL_WITH_OPENMP
1828  #pragma omp parallel for
1829 #endif
1830  for(long i = static_cast<long>(col_start); i < static_cast<long>(A_size1); i++)
1831  {
1832  data_V[i - static_cast<long>(col_start)] = data_A[viennacl::column_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1833  }
1834  }
1835  }
1836  }
1837 
1844  template<typename NumericT>
1846  vector_base<NumericT>& vec2)
1847  {
1849  vcl_size_t inc1 = viennacl::traits::stride(vec1);
1851 
1853  vcl_size_t inc2 = viennacl::traits::stride(vec2);
1854 
1855  vec2[start2] = vec1[start1];
1856  for(vcl_size_t i = 1; i < size1; i++)
1857  {
1858  vec2[i * inc2 + start2] = vec2[(i - 1) * inc2 + start2] + vec1[i * inc1 + start1];
1859 
1860  }
1861  }
1862 
1869  template<typename NumericT>
1871  vector_base<NumericT>& vec2)
1872  {
1874  vcl_size_t inc1 = viennacl::traits::stride(vec1);
1876 
1878  vcl_size_t inc2 = viennacl::traits::stride(vec2);
1879 
1880 
1881  vec2[start2] = 0;
1882  for(vcl_size_t i = 1; i < size1; i++)
1883  {
1884  vec2[i * inc2 + start2] = vec2[(i - 1) * inc2 + start2] + vec1[(i - 1) * inc1 + start1];
1885 
1886  }
1887  }
1888 
1889 } // namespace host_based
1890 } //namespace linalg
1891 } //namespace viennacl
1892 
1893 
1894 #endif
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value.
Definition: fill.hpp:46
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:313
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
Definition: stride.hpp:55
void bidiag_pack_impl(matrix_base< NumericT > &A, vector_base< S1 > &D, vector_base< S1 > &S)
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
Generic size and resize functionality for different vector and matrix types.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
Worker class for decomposing expression templates.
Definition: op_applier.hpp:43
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:340
size_type stride2() const
Returns the number of columns.
Definition: matrix_def.hpp:225
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
A dense matrix class.
Definition: forwards.h:374
Determines row and column increments for matrices and matrix proxies.
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: matrix_def.hpp:69
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
Helper array for accessing a strided submatrix embedded in a larger matrix.
Definition: common.hpp:73
void copy_vec(matrix_base< NumericT > &A, vector_base< S1 > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
This function copies a row or a column from a matrix to a vector.
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
size_type stride1() const
Returns the number of rows.
Definition: matrix_def.hpp:223
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
std::size_t vcl_size_t
Definition: forwards.h:74
Common routines for single-threaded or OpenMP-enabled execution on CPU.
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
Definition: stride.hpp:65
void scaled_rank_1_update(matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:853
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void matrix_row(const matrix_base< NumericT > &mat, unsigned int i, vector_base< NumericT > &vec)
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void inclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
bool row_major() const
Definition: matrix_def.hpp:239
void givens_next(matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
This function updates the matrix Q. It is part of the tql2 algorithm.
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
A tag class representing transposed matrices.
Definition: forwards.h:219
size_type start2() const
Returns the number of columns.
Definition: matrix_def.hpp:221
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:129
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:231
void prod(MatrixAccT1 &A, MatrixAccT2 &B, MatrixAccT3 &C, vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2, NumericT alpha, NumericT beta)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix_def.hpp:229
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:330
T min(const T &lhs, const T &rhs)
Minimum.
Definition: util.hpp:45
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:133
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
Simple enable-if variant that uses the SFINAE pattern.
size_type start1() const
Returns the number of rows.
Definition: matrix_def.hpp:219