ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
vector_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_VECTOR_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <cmath>
26 #include <algorithm> //for std::max and std::min
27 
28 #include "viennacl/forwards.h"
29 #include "viennacl/scalar.hpp"
30 #include "viennacl/tools/tools.hpp"
33 #include "viennacl/traits/size.hpp"
38 
39 
40 // Minimum vector size for using OpenMP on vector operations:
41 #ifndef VIENNACL_OPENMP_VECTOR_MIN_SIZE
42  #define VIENNACL_OPENMP_VECTOR_MIN_SIZE 5000
43 #endif
44 
45 namespace viennacl
46 {
47 namespace linalg
48 {
49 namespace host_based
50 {
51 namespace detail
52 {
53  template<typename NumericT>
54  NumericT flip_sign(NumericT val) { return -val; }
55  inline unsigned long flip_sign(unsigned long val) { return val; }
56  inline unsigned int flip_sign(unsigned int val) { return val; }
57  inline unsigned short flip_sign(unsigned short val) { return val; }
58  inline unsigned char flip_sign(unsigned char val) { return val; }
59 }
60 
61 //
62 // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
63 //
64 
65 template<typename NumericT, typename ScalarT1>
67  vector_base<NumericT> const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha)
68 {
69  typedef NumericT value_type;
70 
71  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
72  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
73 
74  value_type data_alpha = alpha;
75  if (flip_sign_alpha)
76  data_alpha = detail::flip_sign(data_alpha);
77 
81 
84 
85  if (reciprocal_alpha)
86  {
87 #ifdef VIENNACL_WITH_OPENMP
88  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
89 #endif
90  for (long i = 0; i < static_cast<long>(size1); ++i)
91  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha;
92  }
93  else
94  {
95 #ifdef VIENNACL_WITH_OPENMP
96  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
97 #endif
98  for (long i = 0; i < static_cast<long>(size1); ++i)
99  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha;
100  }
101 }
102 
103 
104 template<typename NumericT, typename ScalarT1, typename ScalarT2>
106  vector_base<NumericT> const & vec2, ScalarT1 const & alpha, vcl_size_t /* len_alpha */, bool reciprocal_alpha, bool flip_sign_alpha,
107  vector_base<NumericT> const & vec3, ScalarT2 const & beta, vcl_size_t /* len_beta */, bool reciprocal_beta, bool flip_sign_beta)
108 {
109  typedef NumericT value_type;
110 
111  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
112  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
113  value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
114 
115  value_type data_alpha = alpha;
116  if (flip_sign_alpha)
117  data_alpha = detail::flip_sign(data_alpha);
118 
119  value_type data_beta = beta;
120  if (flip_sign_beta)
121  data_beta = detail::flip_sign(data_beta);
122 
126 
129 
130  vcl_size_t start3 = viennacl::traits::start(vec3);
132 
133  if (reciprocal_alpha)
134  {
135  if (reciprocal_beta)
136  {
137 #ifdef VIENNACL_WITH_OPENMP
138  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
139 #endif
140  for (long i = 0; i < static_cast<long>(size1); ++i)
141  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
142  }
143  else
144  {
145 #ifdef VIENNACL_WITH_OPENMP
146  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
147 #endif
148  for (long i = 0; i < static_cast<long>(size1); ++i)
149  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
150  }
151  }
152  else
153  {
154  if (reciprocal_beta)
155  {
156 #ifdef VIENNACL_WITH_OPENMP
157  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
158 #endif
159  for (long i = 0; i < static_cast<long>(size1); ++i)
160  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
161  }
162  else
163  {
164 #ifdef VIENNACL_WITH_OPENMP
165  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
166 #endif
167  for (long i = 0; i < static_cast<long>(size1); ++i)
168  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
169  }
170  }
171 }
172 
173 
174 template<typename NumericT, typename ScalarT1, typename ScalarT2>
176  vector_base<NumericT> const & vec2, ScalarT1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
177  vector_base<NumericT> const & vec3, ScalarT2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
178 {
179  typedef NumericT value_type;
180 
181  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
182  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
183  value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(vec3);
184 
185  value_type data_alpha = alpha;
186  if (flip_sign_alpha)
187  data_alpha = detail::flip_sign(data_alpha);
188 
189  value_type data_beta = beta;
190  if (flip_sign_beta)
191  data_beta = detail::flip_sign(data_beta);
192 
196 
199 
200  vcl_size_t start3 = viennacl::traits::start(vec3);
202 
203  if (reciprocal_alpha)
204  {
205  if (reciprocal_beta)
206  {
207 #ifdef VIENNACL_WITH_OPENMP
208  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
209 #endif
210  for (long i = 0; i < static_cast<long>(size1); ++i)
211  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
212  }
213  else
214  {
215 #ifdef VIENNACL_WITH_OPENMP
216  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
217 #endif
218  for (long i = 0; i < static_cast<long>(size1); ++i)
219  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] / data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
220  }
221  }
222  else
223  {
224  if (reciprocal_beta)
225  {
226 #ifdef VIENNACL_WITH_OPENMP
227  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
228 #endif
229  for (long i = 0; i < static_cast<long>(size1); ++i)
230  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] / data_beta;
231  }
232  else
233  {
234 #ifdef VIENNACL_WITH_OPENMP
235  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
236 #endif
237  for (long i = 0; i < static_cast<long>(size1); ++i)
238  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] += data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] * data_alpha + data_vec3[static_cast<vcl_size_t>(i)*inc3+start3] * data_beta;
239  }
240  }
241 }
242 
243 
244 
245 
252 template<typename NumericT>
253 void vector_assign(vector_base<NumericT> & vec1, const NumericT & alpha, bool up_to_internal_size = false)
254 {
255  typedef NumericT value_type;
256 
257  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
258 
262  vcl_size_t loop_bound = up_to_internal_size ? vec1.internal_size() : size1; //Note: Do NOT use traits::internal_size() here, because vector proxies don't require padding.
263 
264  value_type data_alpha = static_cast<value_type>(alpha);
265 
266 #ifdef VIENNACL_WITH_OPENMP
267  #pragma omp parallel for if (loop_bound > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
268 #endif
269  for (long i = 0; i < static_cast<long>(loop_bound); ++i)
270  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha;
271 }
272 
273 
279 template<typename NumericT>
281 {
282  typedef NumericT value_type;
283 
284  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
285  value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
286 
290 
293 
294 #ifdef VIENNACL_WITH_OPENMP
295  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
296 #endif
297  for (long i = 0; i < static_cast<long>(size1); ++i)
298  {
299  value_type temp = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2];
300  data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1];
301  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = temp;
302  }
303 }
304 
305 
307 
313 template<typename NumericT, typename OpT>
316 {
317  typedef NumericT value_type;
319 
320  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
321  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
322  value_type const * data_vec3 = detail::extract_raw_pointer<value_type>(proxy.rhs());
323 
327 
329  vcl_size_t inc2 = viennacl::traits::stride(proxy.lhs());
330 
331  vcl_size_t start3 = viennacl::traits::start(proxy.rhs());
332  vcl_size_t inc3 = viennacl::traits::stride(proxy.rhs());
333 
334 #ifdef VIENNACL_WITH_OPENMP
335  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
336 #endif
337  for (long i = 0; i < static_cast<long>(size1); ++i)
338  OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1], data_vec2[static_cast<vcl_size_t>(i)*inc2+start2], data_vec3[static_cast<vcl_size_t>(i)*inc3+start3]);
339 }
340 
346 template<typename NumericT, typename OpT>
349 {
350  typedef NumericT value_type;
352 
353  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
354  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(proxy.lhs());
355 
359 
361  vcl_size_t inc2 = viennacl::traits::stride(proxy.lhs());
362 
363 #ifdef VIENNACL_WITH_OPENMP
364  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
365 #endif
366  for (long i = 0; i < static_cast<long>(size1); ++i)
367  OpFunctor::apply(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1], data_vec2[static_cast<vcl_size_t>(i)*inc2+start2]);
368 }
369 
370 
372 
373 
374 //implementation of inner product:
375 
376 namespace detail
377 {
378 
379 // the following circumvents problems when trying to use a variable of template parameter type for a reduction.
380 // Such a behavior is not covered by the OpenMP standard, hence we manually apply some preprocessor magic to resolve the problem.
381 // See https://github.com/viennacl/viennacl-dev/issues/112 for a detailed explanation and discussion.
382 
383 #define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART) \
384  inline RESULTSCALART inner_prod_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1, \
385  RESULTSCALART const * data_vec2, vcl_size_t start2, vcl_size_t inc2) { \
386  TEMPSCALART temp = 0;
387 
388 #define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART) \
389  for (long i = 0; i < static_cast<long>(size1); ++i) \
390  temp += data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] * data_vec2[static_cast<vcl_size_t>(i)*inc2+start2]; \
391  return static_cast<RESULTSCALART>(temp); \
392  }
393 
394 // char
396 #ifdef VIENNACL_WITH_OPENMP
397  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
398 #endif
400 
401 VIENNACL_INNER_PROD_IMPL_1(unsigned char, int)
402 #ifdef VIENNACL_WITH_OPENMP
403  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
404 #endif
405 VIENNACL_INNER_PROD_IMPL_2(unsigned char)
406 
407 
408 // short
409 VIENNACL_INNER_PROD_IMPL_1(short, int)
410 #ifdef VIENNACL_WITH_OPENMP
411  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
412 #endif
414 
415 VIENNACL_INNER_PROD_IMPL_1(unsigned short, int)
416 #ifdef VIENNACL_WITH_OPENMP
417  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
418 #endif
419 VIENNACL_INNER_PROD_IMPL_2(unsigned short)
420 
421 
422 // int
424 #ifdef VIENNACL_WITH_OPENMP
425  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
426 #endif
428 
429 VIENNACL_INNER_PROD_IMPL_1(unsigned int, unsigned int)
430 #ifdef VIENNACL_WITH_OPENMP
431  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
432 #endif
433 VIENNACL_INNER_PROD_IMPL_2(unsigned int)
434 
435 
436 // long
437 VIENNACL_INNER_PROD_IMPL_1(long, long)
438 #ifdef VIENNACL_WITH_OPENMP
439  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
440 #endif
442 
443 VIENNACL_INNER_PROD_IMPL_1(unsigned long, unsigned long)
444 #ifdef VIENNACL_WITH_OPENMP
445  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
446 #endif
447 VIENNACL_INNER_PROD_IMPL_2(unsigned long)
448 
449 
450 // float
451 VIENNACL_INNER_PROD_IMPL_1(float, float)
452 #ifdef VIENNACL_WITH_OPENMP
453  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
454 #endif
456 
457 // double
458 VIENNACL_INNER_PROD_IMPL_1(double, double)
459 #ifdef VIENNACL_WITH_OPENMP
460  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
461 #endif
463 
464 #undef VIENNACL_INNER_PROD_IMPL_1
465 #undef VIENNACL_INNER_PROD_IMPL_2
466 }
467 
474 template<typename NumericT, typename ScalarT>
476  vector_base<NumericT> const & vec2,
477  ScalarT & result)
478 {
479  typedef NumericT value_type;
480 
481  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
482  value_type const * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
483 
487 
490 
491  result = detail::inner_prod_impl(data_vec1, start1, inc1, size1,
492  data_vec2, start2, inc2); //Note: Assignment to result might be expensive, thus a temporary is introduced here
493 }
494 
495 template<typename NumericT>
497  vector_tuple<NumericT> const & vec_tuple,
498  vector_base<NumericT> & result)
499 {
500  typedef NumericT value_type;
501 
502  value_type const * data_x = detail::extract_raw_pointer<value_type>(x);
503 
504  vcl_size_t start_x = viennacl::traits::start(x);
507 
508  std::vector<value_type> temp(vec_tuple.const_size());
509  std::vector<value_type const *> data_y(vec_tuple.const_size());
510  std::vector<vcl_size_t> start_y(vec_tuple.const_size());
511  std::vector<vcl_size_t> stride_y(vec_tuple.const_size());
512 
513  for (vcl_size_t j=0; j<vec_tuple.const_size(); ++j)
514  {
515  data_y[j] = detail::extract_raw_pointer<value_type>(vec_tuple.const_at(j));
516  start_y[j] = viennacl::traits::start(vec_tuple.const_at(j));
517  stride_y[j] = viennacl::traits::stride(vec_tuple.const_at(j));
518  }
519 
520  // Note: No OpenMP here because it cannot perform a reduction on temp-array. Savings in memory bandwidth are expected to still justify this approach...
521  for (vcl_size_t i = 0; i < size_x; ++i)
522  {
523  value_type entry_x = data_x[i*inc_x+start_x];
524  for (vcl_size_t j=0; j < vec_tuple.const_size(); ++j)
525  temp[j] += entry_x * data_y[j][i*stride_y[j]+start_y[j]];
526  }
527 
528  for (vcl_size_t j=0; j < vec_tuple.const_size(); ++j)
529  result[j] = temp[j]; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
530 }
531 
532 
533 namespace detail
534 {
535 
536 #define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART) \
537  inline RESULTSCALART norm_1_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
538  TEMPSCALART temp = 0;
539 
540 #define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART) \
541  for (long i = 0; i < static_cast<long>(size1); ++i) \
542  temp += static_cast<TEMPSCALART>(std::fabs(static_cast<double>(data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]))); \
543  return static_cast<RESULTSCALART>(temp); \
544  }
545 
546 // char
547 VIENNACL_NORM_1_IMPL_1(char, int)
548 #ifdef VIENNACL_WITH_OPENMP
549  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
550 #endif
551 VIENNACL_NORM_1_IMPL_2(char, int)
552 
553 VIENNACL_NORM_1_IMPL_1(unsigned char, int)
554 #ifdef VIENNACL_WITH_OPENMP
555  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
556 #endif
557 VIENNACL_NORM_1_IMPL_2(unsigned char, int)
558 
559 // short
560 VIENNACL_NORM_1_IMPL_1(short, int)
561 #ifdef VIENNACL_WITH_OPENMP
562  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
563 #endif
564 VIENNACL_NORM_1_IMPL_2(short, int)
565 
566 VIENNACL_NORM_1_IMPL_1(unsigned short, int)
567 #ifdef VIENNACL_WITH_OPENMP
568  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
569 #endif
570 VIENNACL_NORM_1_IMPL_2(unsigned short, int)
571 
572 
573 // int
574 VIENNACL_NORM_1_IMPL_1(int, int)
575 #ifdef VIENNACL_WITH_OPENMP
576  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
577 #endif
578 VIENNACL_NORM_1_IMPL_2(int, int)
579 
580 VIENNACL_NORM_1_IMPL_1(unsigned int, unsigned int)
581 #ifdef VIENNACL_WITH_OPENMP
582  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
583 #endif
584 VIENNACL_NORM_1_IMPL_2(unsigned int, unsigned int)
585 
586 
587 // long
588 VIENNACL_NORM_1_IMPL_1(long, long)
589 #ifdef VIENNACL_WITH_OPENMP
590  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
591 #endif
592 VIENNACL_NORM_1_IMPL_2(long, long)
593 
594 VIENNACL_NORM_1_IMPL_1(unsigned long, unsigned long)
595 #ifdef VIENNACL_WITH_OPENMP
596  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
597 #endif
598 VIENNACL_NORM_1_IMPL_2(unsigned long, unsigned long)
599 
600 
601 // float
602 VIENNACL_NORM_1_IMPL_1(float, float)
603 #ifdef VIENNACL_WITH_OPENMP
604  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
605 #endif
606 VIENNACL_NORM_1_IMPL_2(float, float)
607 
608 // double
609 VIENNACL_NORM_1_IMPL_1(double, double)
610 #ifdef VIENNACL_WITH_OPENMP
611  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
612 #endif
613 VIENNACL_NORM_1_IMPL_2(double, double)
614 
615 #undef VIENNACL_NORM_1_IMPL_1
616 #undef VIENNACL_NORM_1_IMPL_2
617 
618 }
619 
625 template<typename NumericT, typename ScalarT>
627  ScalarT & result)
628 {
629  typedef NumericT value_type;
630 
631  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
632 
636 
637  result = detail::norm_1_impl(data_vec1, start1, inc1, size1); //Note: Assignment to result might be expensive, thus using a temporary for accumulation
638 }
639 
640 
641 
642 namespace detail
643 {
644 
645 #define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART) \
646  inline RESULTSCALART norm_2_impl(RESULTSCALART const * data_vec1, vcl_size_t start1, vcl_size_t inc1, vcl_size_t size1) { \
647  TEMPSCALART temp = 0;
648 
649 #define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART) \
650  for (long i = 0; i < static_cast<long>(size1); ++i) { \
651  RESULTSCALART data = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1]; \
652  temp += static_cast<TEMPSCALART>(data * data); \
653  } \
654  return static_cast<RESULTSCALART>(temp); \
655  }
656 
657 // char
658 VIENNACL_NORM_2_IMPL_1(char, int)
659 #ifdef VIENNACL_WITH_OPENMP
660  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
661 #endif
662 VIENNACL_NORM_2_IMPL_2(char, int)
663 
664 VIENNACL_NORM_2_IMPL_1(unsigned char, int)
665 #ifdef VIENNACL_WITH_OPENMP
666  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
667 #endif
668 VIENNACL_NORM_2_IMPL_2(unsigned char, int)
669 
670 
671 // short
672 VIENNACL_NORM_2_IMPL_1(short, int)
673 #ifdef VIENNACL_WITH_OPENMP
674  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
675 #endif
676 VIENNACL_NORM_2_IMPL_2(short, int)
677 
678 VIENNACL_NORM_2_IMPL_1(unsigned short, int)
679 #ifdef VIENNACL_WITH_OPENMP
680  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
681 #endif
682 VIENNACL_NORM_2_IMPL_2(unsigned short, int)
683 
684 
685 // int
686 VIENNACL_NORM_2_IMPL_1(int, int)
687 #ifdef VIENNACL_WITH_OPENMP
688  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
689 #endif
690 VIENNACL_NORM_2_IMPL_2(int, int)
691 
692 VIENNACL_NORM_2_IMPL_1(unsigned int, unsigned int)
693 #ifdef VIENNACL_WITH_OPENMP
694  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
695 #endif
696 VIENNACL_NORM_2_IMPL_2(unsigned int, unsigned int)
697 
698 
699 // long
700 VIENNACL_NORM_2_IMPL_1(long, long)
701 #ifdef VIENNACL_WITH_OPENMP
702  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
703 #endif
704 VIENNACL_NORM_2_IMPL_2(long, long)
705 
706 VIENNACL_NORM_2_IMPL_1(unsigned long, unsigned long)
707 #ifdef VIENNACL_WITH_OPENMP
708  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
709 #endif
710 VIENNACL_NORM_2_IMPL_2(unsigned long, unsigned long)
711 
712 
713 // float
714 VIENNACL_NORM_2_IMPL_1(float, float)
715 #ifdef VIENNACL_WITH_OPENMP
716  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
717 #endif
718 VIENNACL_NORM_2_IMPL_2(float, float)
719 
720 // double
721 VIENNACL_NORM_2_IMPL_1(double, double)
722 #ifdef VIENNACL_WITH_OPENMP
723  #pragma omp parallel for reduction(+: temp) if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
724 #endif
725 VIENNACL_NORM_2_IMPL_2(double, double)
726 
727 #undef VIENNACL_NORM_2_IMPL_1
728 #undef VIENNACL_NORM_2_IMPL_2
729 
730 }
731 
732 
738 template<typename NumericT, typename ScalarT>
740  ScalarT & result)
741 {
742  typedef NumericT value_type;
743 
744  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
745 
749 
750  result = std::sqrt(detail::norm_2_impl(data_vec1, start1, inc1, size1)); //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
751 }
752 
758 template<typename NumericT, typename ScalarT>
760  ScalarT & result)
761 {
762  typedef NumericT value_type;
763 
764  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
765 
769 
770  value_type temp = 0;
771 
772  // Note: No max() reduction in OpenMP yet
773  for (vcl_size_t i = 0; i < size1; ++i)
774  temp = std::max<value_type>(temp, static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1])))); //casting to double in order to avoid problems if T is an integer type
775 
776  result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
777 }
778 
779 //This function should return a CPU scalar, otherwise statements like
780 // vcl_rhs[index_norm_inf(vcl_rhs)]
781 // are ambiguous
787 template<typename NumericT>
789 {
790  typedef NumericT value_type;
791 
792  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
793 
797 
798  value_type temp = 0;
799  value_type data;
800  vcl_size_t index = start1;
801 
802  // Note: No suitable reduction in OpenMP yet
803  for (vcl_size_t i = 0; i < size1; ++i)
804  {
805  data = static_cast<value_type>(std::fabs(static_cast<double>(data_vec1[i*inc1+start1]))); //casting to double in order to avoid problems if T is an integer type
806  if (data > temp)
807  {
808  index = i;
809  temp = data;
810  }
811  }
812 
813  return index;
814 }
815 
821 template<typename NumericT, typename ScalarT>
822 void max_impl(vector_base<NumericT> const & vec1,
823  ScalarT & result)
824 {
825  typedef NumericT value_type;
826 
827  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
828 
832 
833  value_type temp = data_vec1[start1];
834 
835  // Note: No max() reduction in OpenMP yet
836  for (vcl_size_t i = 1; i < size1; ++i)
837  temp = std::max<value_type>(temp, data_vec1[i*inc1+start1]);
838 
839  result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
840 }
841 
847 template<typename NumericT, typename ScalarT>
848 void min_impl(vector_base<NumericT> const & vec1,
849  ScalarT & result)
850 {
851  typedef NumericT value_type;
852 
853  value_type const * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
854 
858 
859  value_type temp = data_vec1[start1];
860 
861  // Note: No max() reduction in OpenMP yet
862  for (vcl_size_t i = 1; i < size1; ++i)
863  temp = std::min<value_type>(temp, data_vec1[i*inc1+start1]);
864 
865  result = temp; //Note: Assignment to result might be expensive, thus 'temp' is used for accumulation
866 }
867 
868 
869 
879 template<typename NumericT>
881  vector_base<NumericT> & vec2,
882  NumericT alpha, NumericT beta)
883 {
884  typedef NumericT value_type;
885 
886  value_type * data_vec1 = detail::extract_raw_pointer<value_type>(vec1);
887  value_type * data_vec2 = detail::extract_raw_pointer<value_type>(vec2);
888 
892 
895 
896  value_type data_alpha = alpha;
897  value_type data_beta = beta;
898 
899 #ifdef VIENNACL_WITH_OPENMP
900  #pragma omp parallel for if (size1 > VIENNACL_OPENMP_VECTOR_MIN_SIZE)
901 #endif
902  for (long i = 0; i < static_cast<long>(size1); ++i)
903  {
904  value_type temp1 = data_vec1[static_cast<vcl_size_t>(i)*inc1+start1];
905  value_type temp2 = data_vec2[static_cast<vcl_size_t>(i)*inc2+start2];
906 
907  data_vec1[static_cast<vcl_size_t>(i)*inc1+start1] = data_alpha * temp1 + data_beta * temp2;
908  data_vec2[static_cast<vcl_size_t>(i)*inc2+start2] = data_alpha * temp2 - data_beta * temp1;
909  }
910 }
911 
912 } //namespace host_based
913 } //namespace linalg
914 } //namespace viennacl
915 
916 
917 #endif
vcl_size_t const_size() const
Definition: vector.hpp:1129
#define VIENNACL_INNER_PROD_IMPL_2(RESULTSCALART)
#define VIENNACL_NORM_1_IMPL_2(RESULTSCALART, TEMPSCALART)
#define VIENNACL_NORM_2_IMPL_2(RESULTSCALART, TEMPSCALART)
void norm_1_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^1-norm of a vector.
Generic size and resize functionality for different vector and matrix types.
void norm_inf_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the supremum-norm of a vector.
void av(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
#define VIENNACL_NORM_1_IMPL_1(RESULTSCALART, TEMPSCALART)
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
Worker class for decomposing expression templates.
Definition: op_applier.hpp:43
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:45
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:65
Determines row and column increments for matrices and matrix proxies.
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:238
void vector_assign(vector_base< NumericT > &vec1, const NumericT &alpha, bool up_to_internal_size=false)
Assign a constant value to a vector (-range/-slice)
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:84
void norm_2_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the l^2-norm of a vector - implementation.
#define VIENNACL_INNER_PROD_IMPL_1(RESULTSCALART, TEMPSCALART)
vcl_size_t index_norm_inf(vector_base< NumericT > const &vec1)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
Tuple class holding pointers to multiple vectors. Mainly used as a temporary object returned from vie...
Definition: forwards.h:268
void min_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
void norm_2_impl(vector_base< NumericT > const &x, scalar< NumericT > &result)
Computes the l^2-norm of a vector - implementation using OpenCL summation at second step...
void vector_swap(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2)
Swaps the contents of two vectors, data is copied.
void element_op(matrix_base< NumericT > &A, matrix_expression< const matrix_base< NumericT >, const matrix_base< NumericT >, op_element_binary< OpT > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:44
void avbv(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
std::size_t vcl_size_t
Definition: forwards.h:74
Common routines for single-threaded or OpenMP-enabled execution on CPU.
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
void inner_prod_impl(vector_base< NumericT > const &vec1, vector_base< NumericT > const &vec2, ScalarT &result)
Computes the inner product of two vectors - implementation. Library users should call inner_prod(vec1...
void max_impl(vector_base< NumericT > const &vec1, ScalarT &result)
Computes the maximum of a vector.
#define VIENNACL_NORM_2_IMPL_1(RESULTSCALART, TEMPSCALART)
VectorType const & const_at(vcl_size_t i) const
Definition: vector.hpp:1132
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:129
size_type internal_size() const
Returns the internal length of the vector, which is given by size() plus the extra memory due to padd...
Definition: vector_def.hpp:120
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:133
void plane_rotation(vector_base< NumericT > &vec1, vector_base< NumericT > &vec2, NumericT alpha, NumericT beta)
Computes a plane rotation of two vectors.
void inner_prod_impl(vector_base< T > const &x, vector_tuple< T > const &y_tuple, vector_base< T > &result)
Computes the inner products , , ..., and writes the result to a (sub-)vector...
Implementation of the ViennaCL scalar class.
void avbv_v(vector_base< NumericT > &vec1, vector_base< NumericT > const &vec2, ScalarT1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, vector_base< NumericT > const &vec3, ScalarT2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
void norm_1_impl(viennacl::vector_expression< LHS, RHS, OP > const &vec, S2 &result)
Computes the l^1-norm of a vector - interface for a vector expression. Creates a temporary.
Simple enable-if variant that uses the SFINAE pattern.