1 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
51 template<
typename NumericT,
52 typename SizeT,
typename DistanceT>
56 const NumericT * temp_proxy = detail::extract_raw_pointer<NumericT>(proxy.lhs());
57 NumericT * temp = detail::extract_raw_pointer<NumericT>(temp_trans);
59 vcl_size_t proxy_int_size1=proxy.lhs().internal_size1();
60 vcl_size_t proxy_int_size2=proxy.lhs().internal_size2();
64 #ifdef VIENNACL_WITH_OPENMP
65 #pragma omp parallel for
67 for (
long i2 = 0; i2 < static_cast<long>(proxy_int_size1*proxy_int_size2); ++i2)
72 if (row < proxy.lhs().size1() && col < proxy.lhs().size2())
74 if (proxy.lhs().row_major())
77 proxy.lhs().start2() + proxy.lhs().stride2() * col,
78 proxy_int_size1, proxy_int_size2);
82 temp[new_pos] = temp_proxy[pos];
87 proxy.lhs().start2() + proxy.lhs().stride2() * col, proxy_int_size1,
92 temp[new_pos] = temp_proxy[pos];
98 template<
typename NumericT,
typename ScalarT1>
102 assert(mat1.
row_major() == mat2.
row_major() && bool(
"Addition/subtraction on mixed matrix layouts not supported yet!"));
104 typedef NumericT value_type;
106 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
107 value_type
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
109 value_type data_alpha = alpha;
111 data_alpha = -data_alpha;
134 if (reciprocal_alpha)
136 #ifdef VIENNACL_WITH_OPENMP
137 #pragma omp parallel for
139 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
140 for (
vcl_size_t col = 0; col < A_size2; ++col)
141 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha;
145 #ifdef VIENNACL_WITH_OPENMP
146 #pragma omp parallel for
148 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
149 for (
vcl_size_t col = 0; col < A_size2; ++col)
150 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha;
158 if (reciprocal_alpha)
160 #ifdef VIENNACL_WITH_OPENMP
161 #pragma omp parallel for
163 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
165 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha;
169 #ifdef VIENNACL_WITH_OPENMP
170 #pragma omp parallel for
172 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
174 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha;
180 template<
typename NumericT,
181 typename ScalarT1,
typename ScalarT2>
188 typedef NumericT value_type;
190 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
191 value_type
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
192 value_type
const * data_C = detail::extract_raw_pointer<value_type>(mat3);
194 value_type data_alpha = alpha;
196 data_alpha = -data_alpha;
198 value_type data_beta = beta;
200 data_beta = -data_beta;
231 if (reciprocal_alpha && reciprocal_beta)
233 #ifdef VIENNACL_WITH_OPENMP
234 #pragma omp parallel for
236 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
237 for (
vcl_size_t col = 0; col < A_size2; ++col)
238 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
240 else if (reciprocal_alpha && !reciprocal_beta)
242 #ifdef VIENNACL_WITH_OPENMP
243 #pragma omp parallel for
245 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
246 for (
vcl_size_t col = 0; col < A_size2; ++col)
247 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
249 else if (!reciprocal_alpha && reciprocal_beta)
251 #ifdef VIENNACL_WITH_OPENMP
252 #pragma omp parallel for
254 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
255 for (
vcl_size_t col = 0; col < A_size2; ++col)
256 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
258 else if (!reciprocal_alpha && !reciprocal_beta)
260 #ifdef VIENNACL_WITH_OPENMP
261 #pragma omp parallel for
263 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
264 for (
vcl_size_t col = 0; col < A_size2; ++col)
265 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
274 if (reciprocal_alpha && reciprocal_beta)
276 #ifdef VIENNACL_WITH_OPENMP
277 #pragma omp parallel for
279 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
281 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
283 else if (reciprocal_alpha && !reciprocal_beta)
285 #ifdef VIENNACL_WITH_OPENMP
286 #pragma omp parallel for
288 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
290 wrapper_A(
row, col) = wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
292 else if (!reciprocal_alpha && reciprocal_beta)
294 #ifdef VIENNACL_WITH_OPENMP
295 #pragma omp parallel for
297 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
299 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
301 else if (!reciprocal_alpha && !reciprocal_beta)
303 #ifdef VIENNACL_WITH_OPENMP
304 #pragma omp parallel for
306 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
308 wrapper_A(
row, col) = wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
315 template<
typename NumericT,
316 typename ScalarT1,
typename ScalarT2>
323 typedef NumericT value_type;
325 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
326 value_type
const * data_B = detail::extract_raw_pointer<value_type>(mat2);
327 value_type
const * data_C = detail::extract_raw_pointer<value_type>(mat3);
329 value_type data_alpha = alpha;
331 data_alpha = -data_alpha;
333 value_type data_beta = beta;
335 data_beta = -data_beta;
366 if (reciprocal_alpha && reciprocal_beta)
368 #ifdef VIENNACL_WITH_OPENMP
369 #pragma omp parallel for
371 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
372 for (
vcl_size_t col = 0; col < A_size2; ++col)
373 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
375 else if (reciprocal_alpha && !reciprocal_beta)
377 #ifdef VIENNACL_WITH_OPENMP
378 #pragma omp parallel for
380 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
381 for (
vcl_size_t col = 0; col < A_size2; ++col)
382 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
384 else if (!reciprocal_alpha && reciprocal_beta)
386 #ifdef VIENNACL_WITH_OPENMP
387 #pragma omp parallel for
389 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
390 for (
vcl_size_t col = 0; col < A_size2; ++col)
391 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
393 else if (!reciprocal_alpha && !reciprocal_beta)
395 #ifdef VIENNACL_WITH_OPENMP
396 #pragma omp parallel for
398 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
399 for (
vcl_size_t col = 0; col < A_size2; ++col)
400 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
409 if (reciprocal_alpha && reciprocal_beta)
411 #ifdef VIENNACL_WITH_OPENMP
412 #pragma omp parallel for
414 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
416 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) / data_beta;
418 else if (reciprocal_alpha && !reciprocal_beta)
420 #ifdef VIENNACL_WITH_OPENMP
421 #pragma omp parallel for
423 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
425 wrapper_A(
row, col) += wrapper_B(
row, col) / data_alpha + wrapper_C(
row, col) * data_beta;
427 else if (!reciprocal_alpha && reciprocal_beta)
429 #ifdef VIENNACL_WITH_OPENMP
430 #pragma omp parallel for
432 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
434 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) / data_beta;
436 else if (!reciprocal_alpha && !reciprocal_beta)
438 #ifdef VIENNACL_WITH_OPENMP
439 #pragma omp parallel for
441 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
443 wrapper_A(
row, col) += wrapper_B(
row, col) * data_alpha + wrapper_C(
row, col) * data_beta;
452 template<
typename NumericT>
455 typedef NumericT value_type;
457 value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
458 value_type alpha =
static_cast<value_type
>(s);
473 #ifdef VIENNACL_WITH_OPENMP
474 #pragma omp parallel for
476 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
477 for (
vcl_size_t col = 0; col < A_size2; ++col)
478 wrapper_A(static_cast<vcl_size_t>(
row), col) = alpha;
486 #ifdef VIENNACL_WITH_OPENMP
487 #pragma omp parallel for
489 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
491 wrapper_A(
row, static_cast<vcl_size_t>(col)) = alpha;
499 template<
typename NumericT>
502 typedef NumericT value_type;
504 value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
505 value_type alpha =
static_cast<value_type
>(s);
520 #ifdef VIENNACL_WITH_OPENMP
521 #pragma omp parallel for
523 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
524 wrapper_A(
row,
row) = alpha;
530 #ifdef VIENNACL_WITH_OPENMP
531 #pragma omp parallel for
533 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
534 wrapper_A(
row,
row) = alpha;
538 template<
typename NumericT>
541 typedef NumericT value_type;
543 value_type *data_A = detail::extract_raw_pointer<value_type>(mat);
544 value_type
const *data_vec = detail::extract_raw_pointer<value_type>(vec);
574 wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
581 wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
585 template<
typename NumericT>
588 typedef NumericT value_type;
590 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
591 value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
619 data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
626 data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
630 template<
typename NumericT>
633 typedef NumericT value_type;
635 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
636 value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
656 data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
663 data_vec[v_start + j * v_inc] = wrapper_A(static_cast<vcl_size_t>(i), j);
667 template<
typename NumericT>
670 typedef NumericT value_type;
672 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
673 value_type * data_vec = detail::extract_raw_pointer<value_type>(vec);
693 data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
700 data_vec[v_start + i * v_inc] = wrapper_A(i, static_cast<vcl_size_t>(j));
715 template<
typename NumericT,
typename OpT>
719 assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
721 typedef NumericT value_type;
724 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
725 value_type
const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
726 value_type
const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs());
757 #ifdef VIENNACL_WITH_OPENMP
758 #pragma omp parallel for
760 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
761 for (
vcl_size_t col = 0; col < A_size2; ++col)
762 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col), wrapper_C(
row, col));
773 #ifdef VIENNACL_WITH_OPENMP
774 #pragma omp parallel for
776 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
778 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col), wrapper_C(
row, col));
789 template<
typename NumericT,
typename OpT>
793 assert(A.
row_major() == proxy.lhs().row_major() && A.
row_major() == proxy.rhs().row_major() && bool(
"Element-wise operations on mixed matrix layouts not supported yet!"));
795 typedef NumericT value_type;
798 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
799 value_type
const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
822 #ifdef VIENNACL_WITH_OPENMP
823 #pragma omp parallel for
825 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
826 for (
vcl_size_t col = 0; col < A_size2; ++col)
827 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col));
834 #ifdef VIENNACL_WITH_OPENMP
835 #pragma omp parallel for
837 for (
long col = 0; col < static_cast<long>(A_size2); ++col)
839 OpFunctor::apply(wrapper_A(
row, col), wrapper_B(
row, col));
860 template<
typename NumericT>
865 typedef NumericT value_type;
867 value_type
const * data_A = detail::extract_raw_pointer<value_type>(mat);
868 value_type
const * data_x = detail::extract_raw_pointer<value_type>(vec);
869 value_type * data_result = detail::extract_raw_pointer<value_type>(result);
891 value_type temp = data_x[
start1];
896 for (
vcl_size_t col = 1; col < A_size1; ++col)
898 value_type temp = data_x[col * inc1 +
start1];
907 #ifdef VIENNACL_WITH_OPENMP
908 #pragma omp parallel for
910 for (
long row = 0; row < static_cast<long>(A_size1); ++
row)
913 for (
vcl_size_t col = 0; col < A_size2; ++col)
914 temp += data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(
row) * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
916 data_result[
static_cast<vcl_size_t>(
row) * inc2 + start2] = temp;
925 value_type temp = data_x[
start1];
929 for (
vcl_size_t col = 1; col < A_size2; ++col)
931 value_type temp = data_x[col * inc1 +
start1];
938 #ifdef VIENNACL_WITH_OPENMP
939 #pragma omp parallel for
941 for (
long row = 0; row < static_cast<long>(A_size2); ++
row)
944 for (
vcl_size_t col = 0; col < A_size1; ++col)
945 temp += data_A[
viennacl::column_major::mem_index(col * A_inc1 + A_start1, static_cast<vcl_size_t>(
row) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
947 data_result[
static_cast<vcl_size_t>(
row) * inc2 + start2] = temp;
961 template<
typename MatrixAccT1,
typename MatrixAccT2,
typename MatrixAccT3,
typename NumericT>
962 void prod(MatrixAccT1 & A, MatrixAccT2 & B, MatrixAccT3 & C,
964 NumericT alpha, NumericT beta)
966 if (C_size1 == 0 || C_size2 == 0 || A_size2 == 0)
971 vcl_size_t num_blocks_C1 = (C_size1 - 1) / blocksize + 1;
972 vcl_size_t num_blocks_C2 = (C_size2 - 1) / blocksize + 1;
973 vcl_size_t num_blocks_A2 = (A_size2 - 1) / blocksize + 1;
978 #ifdef VIENNACL_WITH_OPENMP
979 #pragma omp parallel for
981 for (
long block_idx_i2=0; block_idx_i2<static_cast<long>(num_blocks_C1); ++block_idx_i2)
984 std::vector<NumericT> buffer_A(blocksize * blocksize);
985 std::vector<NumericT> buffer_B(blocksize * blocksize);
986 std::vector<NumericT> buffer_C(blocksize * blocksize);
989 for (
vcl_size_t block_idx_j=0; block_idx_j<num_blocks_C2; ++block_idx_j)
992 std::fill(buffer_C.begin(), buffer_C.end(), NumericT(0));
998 for (
vcl_size_t block_idx_k=0; block_idx_k<num_blocks_A2; ++block_idx_k)
1001 std::fill(buffer_A.begin(), buffer_A.end(), NumericT(0));
1002 std::fill(buffer_B.begin(), buffer_B.end(), NumericT(0));
1009 buffer_A[(i - offset_i) * blocksize + (k - offset_k)] = A(i, k);
1013 buffer_B[(k - offset_k) + (j - offset_j) * blocksize] = B(k, j);
1018 NumericT
const * ptrA = &(buffer_A[i*blocksize]);
1021 NumericT
const * ptrB = &(buffer_B[j*blocksize]);
1023 NumericT temp = NumericT(0);
1025 temp += ptrA[k] * ptrB[k];
1027 buffer_C[i*blocksize + j] += temp;
1033 if (beta > 0 || beta < 0)
1037 C(i,j) = beta * C(i,j) + alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1043 C(i,j) = alpha * buffer_C[(i - offset_i) * blocksize + (j - offset_j)];
1058 template<
typename NumericT,
typename ScalarT1,
typename ScalarT2 >
1065 typedef NumericT value_type;
1067 value_type
const * data_A = detail::extract_raw_pointer<value_type>(A);
1068 value_type
const * data_B = detail::extract_raw_pointer<value_type>(B);
1069 value_type * data_C = detail::extract_raw_pointer<value_type>(C);
1096 if (!trans_A && !trans_B)
1104 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1112 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1120 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1128 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1136 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1144 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1152 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1160 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1163 else if (!trans_A && trans_B)
1171 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1179 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1187 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1195 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1203 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1211 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1219 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1227 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1230 else if (trans_A && !trans_B)
1238 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1246 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1254 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1262 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1270 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1278 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1286 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1294 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1297 else if (trans_A && trans_B)
1305 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1313 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1321 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1329 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1337 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1345 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1353 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1361 detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1385 template<
typename NumericT,
typename ScalarT>
1387 ScalarT
const & alpha,
vcl_size_t ,
bool reciprocal_alpha,
bool flip_sign_alpha,
1391 typedef NumericT value_type;
1393 value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
1394 value_type
const * data_v1 = detail::extract_raw_pointer<value_type>(vec1);
1395 value_type
const * data_v2 = detail::extract_raw_pointer<value_type>(vec2);
1412 value_type data_alpha = alpha;
1413 if (flip_sign_alpha)
1414 data_alpha = -data_alpha;
1415 if (reciprocal_alpha)
1416 data_alpha =
static_cast<value_type
>(1) / data_alpha;
1422 value_type value_v1 = data_alpha * data_v1[
row * inc1 +
start1];
1423 for (
vcl_size_t col = 0; col < A_size2; ++col)
1429 for (
vcl_size_t col = 0; col < A_size2; ++col)
1431 value_type value_v2 = data_alpha * data_v2[col * inc2 +
start2];
1446 template <
typename NumericT,
typename S1>
1453 typedef NumericT value_type;
1455 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1456 value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1457 value_type * data_S = detail::extract_raw_pointer<value_type>(S);
1477 #ifdef VIENNACL_WITH_OPENMP
1478 #pragma omp parallel for
1480 for(
long i2 = 0; i2 < long(size) - 1; i2++)
1483 data_D[start1 + inc1 * i] = data_A[
viennacl::row_major::mem_index(i * A_inc1 + A_start1, i * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1484 data_S[start2 + inc2 * (i + 1)] = data_A[
viennacl::row_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1486 data_D[start1 + inc1 * (size-1)] = data_A[
viennacl::row_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1491 #ifdef VIENNACL_WITH_OPENMP
1492 #pragma omp parallel for
1494 for(
long i2 = 0; i2 < long(size) - 1; i2++)
1498 data_S[start2 + inc2 * (i + 1)] = data_A[
viennacl::column_major::mem_index(i * A_inc1 + A_start1, (i + 1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1500 data_D[start1 + inc1 * (size-1)] = data_A[
viennacl::column_major::mem_index((size-1) * A_inc1 + A_start1, (size-1) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1507 template <
typename NumericT,
typename VectorType>
1524 template <
typename NumericT>
1529 typedef NumericT value_type;
1533 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1534 value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1553 for(
vcl_size_t j = row_start; j < A_size1; j++)
1554 ss = ss + data_D[start1 + inc1 * j] * data_A[
viennacl::row_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1555 #ifdef VIENNACL_WITH_OPENMP
1556 #pragma omp parallel for
1558 for(
long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1559 data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] =
1560 data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] -
1561 (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)]* ss);
1569 for(
vcl_size_t j = row_start; j < A_size1; j++)
1570 ss = ss + data_D[start1 + inc1 * j] * data_A[
viennacl::column_major::mem_index((j) * A_inc1 + A_start1, (i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1571 #ifdef VIENNACL_WITH_OPENMP
1572 #pragma omp parallel for
1574 for(
long j = static_cast<long>(row_start); j < static_cast<long>(A_size1); j++)
1577 (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)]* ss);
1589 template <
typename NumericT>
1593 typedef NumericT value_type;
1596 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1597 value_type * data_D = detail::extract_raw_pointer<value_type>(D);
1617 ss = ss + (data_D[start1 + inc1 * j] * data_A[
viennacl::row_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1619 NumericT sum_Av = ss;
1620 #ifdef VIENNACL_WITH_OPENMP
1621 #pragma omp parallel for
1623 for(
long j = 0; j < static_cast<long>(A_size2); j++)
1625 data_A[
viennacl::row_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)] * sum_Av);
1634 ss = ss + (data_D[start1 + inc1 * j] * data_A[
viennacl::column_major::mem_index((i) * A_inc1 + A_start1, (j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]);
1636 NumericT sum_Av = ss;
1637 #ifdef VIENNACL_WITH_OPENMP
1638 #pragma omp parallel for
1640 for(
long j = 0; j < static_cast<long>(A_size2); j++)
1642 data_A[
viennacl::column_major::mem_index((i) * A_inc1 + A_start1, static_cast<vcl_size_t>(j) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] - (2 * data_D[start1 + inc1 *
static_cast<vcl_size_t>(j)] * sum_Av);
1655 template <
typename NumericT>
1668 Q =
prod(Q_temp, vcl_P);
1681 template<
typename NumericT>
1689 typedef NumericT value_type;
1691 value_type * data_Q = detail::extract_raw_pointer<value_type>(Q);
1692 value_type * data_tmp1 = detail::extract_raw_pointer<value_type>(tmp1);
1693 value_type * data_tmp2 = detail::extract_raw_pointer<value_type>(tmp2);
1711 for(
int i = m - 1; i >= l; i--)
1713 #ifdef VIENNACL_WITH_OPENMP
1714 #pragma omp parallel for
1716 for(
long k = 0; k < static_cast<long>(Q_size1); k++)
1734 for(
int i = m - 1; i >= l; i--)
1736 #ifdef VIENNACL_WITH_OPENMP
1737 #pragma omp parallel for
1739 for(
long k = 0; k < static_cast<long>(Q_size1); k++)
1768 template <
typename NumericT,
typename S1>
1776 typedef NumericT value_type;
1778 value_type * data_A = detail::extract_raw_pointer<value_type>(A);
1779 value_type * data_V = detail::extract_raw_pointer<value_type>(V);
1794 #ifdef VIENNACL_WITH_OPENMP
1795 #pragma omp parallel for
1797 for(
long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
1799 data_V[i -
static_cast<long>(row_start)] = data_A[
viennacl::row_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1804 #ifdef VIENNACL_WITH_OPENMP
1805 #pragma omp parallel for
1807 for(
long i = static_cast<long>(row_start); i < static_cast<long>(A_size1); i++)
1809 data_V[i -
static_cast<long>(row_start)] = data_A[
viennacl::column_major::mem_index(static_cast<vcl_size_t>(i) * A_inc1 + A_start1, col_start * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1817 #ifdef VIENNACL_WITH_OPENMP
1818 #pragma omp parallel for
1820 for(
long i = static_cast<long>(col_start); i < static_cast<long>(A_size1); i++)
1822 data_V[i -
static_cast<long>(col_start)] = data_A[
viennacl::row_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1827 #ifdef VIENNACL_WITH_OPENMP
1828 #pragma omp parallel for
1830 for(
long i = static_cast<long>(col_start); i < static_cast<long>(A_size1); i++)
1832 data_V[i -
static_cast<long>(col_start)] = data_A[
viennacl::column_major::mem_index(row_start * A_inc1 + A_start1, static_cast<vcl_size_t>(i) * A_inc2 + A_start2, A_internal_size1, A_internal_size2)];
1844 template<
typename NumericT>
1858 vec2[i * inc2 +
start2] = vec2[(i - 1) * inc2 + start2] + vec1[i * inc1 + start1];
1869 template<
typename NumericT>
1884 vec2[i * inc2 +
start2] = vec2[(i - 1) * inc2 + start2] + vec1[(i - 1) * inc1 +
start1];
void fill(MatrixType &matrix, vcl_size_t row_index, vcl_size_t col_index, NumericT value)
Generic filler routine for setting an entry of a matrix to a particular value.
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
result_of::size_type< matrix_base< NumericT > >::type stride1(matrix_base< NumericT > const &s)
void bidiag_pack_impl(matrix_base< NumericT > &A, vector_base< S1 > &D, vector_base< S1 > &S)
This function stores the diagonal and the superdiagonal of a matrix in two vectors.
void matrix_diag_to_vector(const matrix_base< NumericT > &mat, int k, vector_base< NumericT > &vec)
void exclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an exclusive scan.
Generic size and resize functionality for different vector and matrix types.
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
vcl_size_t internal_size1(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
void matrix_assign(matrix_base< NumericT > &mat, NumericT s, bool clear=false)
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Worker class for decomposing expression templates.
vcl_size_t internal_size2(matrix_base< NumericT > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Expression template class for representing a tree of expressions which ultimately result in a matrix...
size_type stride2() const
Returns the number of columns.
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Determines row and column increments for matrices and matrix proxies.
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
void house_update_A_right(matrix_base< NumericT > &A, vector_base< NumericT > &D)
This function applies a householder transformation to a matrix: A <- A * P with a householder reflect...
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
result_of::size_type< T >::type start2(T const &obj)
Helper array for accessing a strided submatrix embedded in a larger matrix.
void copy_vec(matrix_base< NumericT > &A, vector_base< S1 > &V, vcl_size_t row_start, vcl_size_t col_start, bool copy_col)
This function copies a row or a column from a matrix to a vector.
void prod_impl(const matrix_base< NumericT > &mat, bool trans, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
void ambm_m(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void matrix_column(const matrix_base< NumericT > &mat, unsigned int j, vector_base< NumericT > &vec)
void matrix_diagonal_assign(matrix_base< NumericT > &mat, NumericT s)
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
void house_update_QL(matrix_base< NumericT > &Q, vector_base< NumericT > &D, vcl_size_t A_size1)
This function updates the matrix Q, which is needed for the computation of the eigenvectors.
size_type stride1() const
Returns the number of rows.
void ambm(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT > const &mat3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Common routines for single-threaded or OpenMP-enabled execution on CPU.
Proxy classes for vectors.
result_of::size_type< matrix_base< NumericT > >::type stride2(matrix_base< NumericT > const &s)
void scaled_rank_1_update(matrix_base< NumericT > &mat1, ScalarT const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void matrix_row(const matrix_base< NumericT > &mat, unsigned int i, vector_base< NumericT > &vec)
void am(matrix_base< NumericT > &mat1, matrix_base< NumericT > const &mat2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
void inclusive_scan(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
This function implements an inclusive scan.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void givens_next(matrix_base< NumericT > &Q, vector_base< NumericT > &tmp1, vector_base< NumericT > &tmp2, int l, int m)
This function updates the matrix Q. It is part of the tql2 algorithm.
void trans(const matrix_expression< const matrix_base< NumericT, SizeT, DistanceT >, const matrix_base< NumericT, SizeT, DistanceT >, op_trans > &proxy, matrix_base< NumericT > &temp_trans)
void bidiag_pack(matrix_base< NumericT > &A, VectorType &dh, VectorType &sh)
A tag class representing transposed matrices.
size_type start2() const
Returns the number of columns.
void house_update_A_left(matrix_base< NumericT > &A, vector_base< NumericT > &D, vcl_size_t start)
This function applies a householder transformation to a matrix. A <- P * A with a householder reflect...
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
void prod(MatrixAccT1 &A, MatrixAccT2 &B, MatrixAccT3 &C, vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2, NumericT alpha, NumericT beta)
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
T min(const T &lhs, const T &rhs)
Minimum.
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT > &mat)
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Implementation of the ViennaCL scalar class.
A collection of compile time type deductions.
Simple enable-if variant that uses the SFINAE pattern.
size_type start1() const
Returns the number of rows.