ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
vector_float_double.cpp
Go to the documentation of this file.
1 /* =========================================================================
2  Copyright (c) 2010-2014, Institute for Microelectronics,
3  Institute for Analysis and Scientific Computing,
4  TU Wien.
5  Portions of this software are copyright by UChicago Argonne, LLC.
6 
7  -----------------
8  ViennaCL - The Vienna Computing Library
9  -----------------
10 
11  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
12 
13  (A list of authors and contributors can be found in the PDF manual)
14 
15  License: MIT (X11), see file LICENSE in the base directory
16 ============================================================================= */
17 
18 
23 //
24 // *** System
25 //
26 #include <iostream>
27 #include <iomanip>
28 #include <cmath>
29 
30 //
31 // *** ViennaCL
32 //
33 //#define VIENNACL_DEBUG_ALL
34 
35 #include "viennacl/vector.hpp"
42 
43 #include "Random.hpp"
44 
45 
46 template<typename NumericT>
48 {
49 public:
50  vector_proxy(NumericT * p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
51  : values_(p_values), start_(start_idx), inc_(increment), size_(num_elements) {}
52 
53  NumericT const & operator[](std::size_t index) const { return values_[start_ + index * inc_]; }
54  NumericT & operator[](std::size_t index) { return values_[start_ + index * inc_]; }
55 
56  std::size_t size() const { return size_; }
57 
58 private:
59  NumericT * values_;
60  std::size_t start_;
61  std::size_t inc_;
62  std::size_t size_;
63 };
64 
65 template<typename NumericT>
67 {
68  std::vector<NumericT> std_vec(host_vec.size());
69 
70  for (std::size_t i=0; i<host_vec.size(); ++i)
71  std_vec[i] = host_vec[i];
72 
73  viennacl::copy(std_vec.begin(), std_vec.end(), vcl_vec.begin());
74 }
75 
76 template<typename NumericT>
78 {
79  std::vector<NumericT> std_vec(vcl_vec.size());
80 
81  viennacl::copy(vcl_vec.begin(), vcl_vec.end(), std_vec.begin());
82 
83  for (std::size_t i=0; i<host_vec.size(); ++i)
84  host_vec[i] = std_vec[i];
85 }
86 
87 
88 //
89 // -------------------------------------------------------------
90 //
91 template<typename ScalarType>
93 {
95  if (std::fabs(s1 - s2) > 0 )
96  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
97  return 0;
98 }
99 //
100 // -------------------------------------------------------------
101 //
102 template<typename ScalarType>
104 {
106  if (std::fabs(s1 - s2) > 0 )
107  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
108  return 0;
109 }
110 //
111 // -------------------------------------------------------------
112 //
113 template<typename ScalarType>
115 {
117  if (std::fabs(s1 - s2) > 0 )
118  return (s1 - s2) / std::max(std::fabs(s1), std::fabs(s2));
119  return 0;
120 }
121 //
122 // -------------------------------------------------------------
123 //
124 template<typename ScalarType, typename ViennaCLVectorType>
125 ScalarType diff(vector_proxy<ScalarType> const & v1, ViennaCLVectorType const & vcl_vec)
126 {
127  std::vector<ScalarType> v2_cpu(vcl_vec.size());
129  viennacl::copy(vcl_vec, v2_cpu);
130 
131  for (unsigned int i=0;i<v1.size(); ++i)
132  {
133  if ( std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
134  v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) / std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
135  else
136  v2_cpu[i] = 0.0;
137  }
138 
139  ScalarType ret = 0;
140  for (std::size_t i=0; i<v2_cpu.size(); ++i)
141  ret = std::max(ret, std::fabs(v2_cpu[i]));
142  return ret;
143 }
144 
145 
146 template<typename T1, typename T2>
147 int check(T1 const & t1, T2 const & t2, double epsilon)
148 {
149  int retval = EXIT_SUCCESS;
150 
151  double temp = std::fabs(diff(t1, t2));
152  if (temp > epsilon)
153  {
154  std::cout << "# Error! Relative difference: " << temp << std::endl;
155  retval = EXIT_FAILURE;
156  }
157  return retval;
158 }
159 
160 
161 //
162 // -------------------------------------------------------------
163 //
164 template< typename NumericT, typename Epsilon, typename HostVectorType, typename ViennaCLVectorType1, typename ViennaCLVectorType2 >
165 int test(Epsilon const& epsilon,
166  HostVectorType & host_v1, HostVectorType & host_v2,
167  ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
168 {
169  int retval = EXIT_SUCCESS;
170 
171  NumericT cpu_result = 42.0;
172  viennacl::scalar<NumericT> gpu_result = 43.0;
173 
174  //
175  // Initializer:
176  //
177  std::cout << "Checking for zero_vector initializer..." << std::endl;
178  for (std::size_t i=0; i<host_v1.size(); ++i)
179  host_v1[i] = NumericT(0);
180  vcl_v1 = viennacl::zero_vector<NumericT>(vcl_v1.size());
181  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
182  return EXIT_FAILURE;
183 
184  std::cout << "Checking for scalar_vector initializer..." << std::endl;
185  for (std::size_t i=0; i<host_v1.size(); ++i)
186  host_v1[i] = NumericT(cpu_result);
187  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), cpu_result);
188  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
189  return EXIT_FAILURE;
190 
191  for (std::size_t i=0; i<host_v1.size(); ++i)
192  host_v1[i] = NumericT(gpu_result);
193  vcl_v1 = viennacl::scalar_vector<NumericT>(vcl_v1.size(), gpu_result);
194  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
195  return EXIT_FAILURE;
196 
197  std::cout << "Checking for unit_vector initializer..." << std::endl;
198  for (std::size_t i=0; i<host_v1.size(); ++i)
199  host_v1[i] = NumericT(0);
200  host_v1[5] = NumericT(1);
201  vcl_v1 = viennacl::unit_vector<NumericT>(vcl_v1.size(), 5);
202  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
203  return EXIT_FAILURE;
204 
205 
206  for (std::size_t i=0; i<host_v1.size(); ++i)
207  {
208  host_v1[i] = NumericT(1.0) + random<NumericT>();
209  host_v2[i] = NumericT(1.0) + random<NumericT>();
210  }
211 
212  proxy_copy(host_v1, vcl_v1); //resync
213  proxy_copy(host_v2, vcl_v2);
214 
215  std::cout << "Checking for successful copy..." << std::endl;
216  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
217  return EXIT_FAILURE;
218  if (check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
219  return EXIT_FAILURE;
220 
221  //
222  // Part 1: Norms and inner product
223  //
224 
225  // --------------------------------------------------------------------------
226  std::cout << "Testing inner_prod..." << std::endl;
227  cpu_result = 0;
228  for (std::size_t i=0; i<host_v1.size(); ++i)
229  cpu_result += host_v1[i] * host_v2[i];
230  NumericT cpu_result2 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
231  gpu_result = viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
232 
233  std::cout << "Reference: " << cpu_result << std::endl;
234  std::cout << cpu_result2 << std::endl;
235  std::cout << gpu_result << std::endl;
236  if (check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
237  return EXIT_FAILURE;
238  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
239  return EXIT_FAILURE;
240 
241  cpu_result = 0;
242  for (std::size_t i=0; i<host_v1.size(); ++i)
243  cpu_result += (host_v1[i] + host_v2[i]) * (host_v2[i] - host_v1[i]);
244  NumericT cpu_result3 = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1);
245  gpu_result = viennacl::linalg::inner_prod(vcl_v1 + vcl_v2, vcl_v2 - vcl_v1);
246 
247  std::cout << "Reference: " << cpu_result << std::endl;
248  std::cout << cpu_result3 << std::endl;
249  std::cout << gpu_result << std::endl;
250  if (check(cpu_result, cpu_result3, epsilon) != EXIT_SUCCESS)
251  return EXIT_FAILURE;
252  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
253  return EXIT_FAILURE;
254 
255  // --------------------------------------------------------------------------
256  std::cout << "Testing norm_1..." << std::endl;
257  cpu_result = 0;
258  for (std::size_t i=0; i<host_v1.size(); ++i)
259  cpu_result += std::fabs(host_v1[i]);
260  gpu_result = viennacl::linalg::norm_1(vcl_v1);
261 
262  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
263  return EXIT_FAILURE;
264 
265  gpu_result = 2 * cpu_result; //reset
266  cpu_result = 0;
267  for (std::size_t i=0; i<host_v1.size(); ++i)
268  cpu_result += std::fabs(host_v1[i]);
269  gpu_result = cpu_result;
270  cpu_result = 0;
271  cpu_result = viennacl::linalg::norm_1(vcl_v1);
272 
273  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
274  return EXIT_FAILURE;
275 
276  cpu_result = 0;
277  for (std::size_t i=0; i<host_v1.size(); ++i)
278  cpu_result += std::fabs(host_v1[i] + host_v2[i]);
279  gpu_result = cpu_result;
280  cpu_result = 0;
281  cpu_result = viennacl::linalg::norm_1(vcl_v1 + vcl_v2);
282 
283  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
284  return EXIT_FAILURE;
285 
286  // --------------------------------------------------------------------------
287  std::cout << "Testing norm_2..." << std::endl;
288  cpu_result = 0;
289  for (std::size_t i=0; i<host_v1.size(); ++i)
290  cpu_result += host_v1[i] * host_v1[i];
291  cpu_result = std::sqrt(cpu_result);
292  gpu_result = viennacl::linalg::norm_2(vcl_v1);
293 
294  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
295  return EXIT_FAILURE;
296 
297  gpu_result = 2 * cpu_result; //reset
298  cpu_result = 0;
299  for (std::size_t i=0; i<host_v1.size(); ++i)
300  cpu_result += host_v1[i] * host_v1[i];
301  gpu_result = std::sqrt(cpu_result);
302  cpu_result = viennacl::linalg::norm_2(vcl_v1);
303 
304  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
305  return EXIT_FAILURE;
306 
307  cpu_result = 0;
308  for (std::size_t i=0; i<host_v1.size(); ++i)
309  cpu_result += (host_v1[i] + host_v2[i]) * (host_v1[i] + host_v2[i]);
310  gpu_result = std::sqrt(cpu_result);
311  cpu_result = viennacl::linalg::norm_2(vcl_v1 + vcl_v2);
312 
313  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
314  return EXIT_FAILURE;
315 
316  // --------------------------------------------------------------------------
317  std::cout << "Testing norm_inf..." << std::endl;
318  cpu_result = std::fabs(host_v1[0]);
319  for (std::size_t i=0; i<host_v1.size(); ++i)
320  cpu_result = std::max(std::fabs(host_v1[i]), cpu_result);
321  gpu_result = viennacl::linalg::norm_inf(vcl_v1);
322 
323  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
324  return EXIT_FAILURE;
325 
326  gpu_result = 2 * cpu_result; //reset
327  cpu_result = std::fabs(host_v1[0]);
328  for (std::size_t i=0; i<host_v1.size(); ++i)
329  cpu_result = std::max(std::fabs(host_v1[i]), cpu_result);
330  gpu_result = cpu_result;
331  cpu_result = 0;
332  cpu_result = viennacl::linalg::norm_inf(vcl_v1);
333 
334  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
335  return EXIT_FAILURE;
336 
337  cpu_result = std::fabs(host_v1[0]);
338  for (std::size_t i=0; i<host_v1.size(); ++i)
339  cpu_result = std::max(std::fabs(host_v1[i] + host_v2[i]), cpu_result);
340  gpu_result = cpu_result;
341  cpu_result = 0;
342  cpu_result = viennacl::linalg::norm_inf(vcl_v1 + vcl_v2);
343 
344  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
345  return EXIT_FAILURE;
346 
347  // --------------------------------------------------------------------------
348  std::cout << "Testing index_norm_inf..." << std::endl;
349  std::size_t cpu_index = 0;
350  cpu_result = std::fabs(host_v1[0]);
351  for (std::size_t i=0; i<host_v1.size(); ++i)
352  {
353  if (std::fabs(host_v1[i]) > cpu_result)
354  {
355  cpu_result = std::fabs(host_v1[i]);
356  cpu_index = i;
357  }
358  }
359  std::size_t gpu_index = viennacl::linalg::index_norm_inf(vcl_v1);
360 
361  if (check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index), epsilon) != EXIT_SUCCESS)
362  return EXIT_FAILURE;
363  // --------------------------------------------------------------------------
364  cpu_result = host_v1[cpu_index];
365  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1)];
366 
367  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
368  return EXIT_FAILURE;
369 
370  cpu_result = std::fabs(host_v1[0] + host_v2[0]);
371  for (std::size_t i=0; i<host_v1.size(); ++i)
372  {
373  if (std::fabs(host_v1[i] + host_v2[i]) > cpu_result)
374  {
375  cpu_result = std::fabs(host_v1[i] + host_v2[i]);
376  cpu_index = i;
377  }
378  }
379  cpu_result = host_v1[cpu_index];
380  gpu_result = vcl_v1[viennacl::linalg::index_norm_inf(vcl_v1 + vcl_v2)];
381 
382  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
383  return EXIT_FAILURE;
384 
385 
386  // --------------------------------------------------------------------------
387  std::cout << "Testing max..." << std::endl;
388  cpu_result = host_v1[0];
389  for (std::size_t i=0; i<host_v1.size(); ++i)
390  cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
391  gpu_result = viennacl::linalg::max(vcl_v1);
392 
393  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
394  return EXIT_FAILURE;
395 
396  cpu_result = host_v1[0];
397  for (std::size_t i=0; i<host_v1.size(); ++i)
398  cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
399  gpu_result = cpu_result;
400  cpu_result *= 2; //reset
401  cpu_result = viennacl::linalg::max(vcl_v1);
402 
403  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
404  return EXIT_FAILURE;
405 
406  cpu_result = host_v1[0] + host_v2[0];
407  for (std::size_t i=0; i<host_v1.size(); ++i)
408  cpu_result = std::max<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
409  gpu_result = cpu_result;
410  cpu_result *= 2; //reset
411  cpu_result = viennacl::linalg::max(vcl_v1 + vcl_v2);
412 
413  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
414  return EXIT_FAILURE;
415 
416 
417  // --------------------------------------------------------------------------
418  std::cout << "Testing min..." << std::endl;
419  cpu_result = host_v1[0];
420  for (std::size_t i=0; i<host_v1.size(); ++i)
421  cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
422  gpu_result = viennacl::linalg::min(vcl_v1);
423 
424  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
425  return EXIT_FAILURE;
426 
427  cpu_result = host_v1[0];
428  for (std::size_t i=0; i<host_v1.size(); ++i)
429  cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
430  gpu_result = cpu_result;
431  cpu_result *= 2; //reset
432  cpu_result = viennacl::linalg::min(vcl_v1);
433 
434  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
435  return EXIT_FAILURE;
436 
437  cpu_result = host_v1[0] + host_v2[0];
438  for (std::size_t i=0; i<host_v1.size(); ++i)
439  cpu_result = std::min<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
440  gpu_result = cpu_result;
441  cpu_result *= 2; //reset
442  cpu_result = viennacl::linalg::min(vcl_v1 + vcl_v2);
443 
444  if (check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
445  return EXIT_FAILURE;
446 
447 
448 
449  //
450  // Plane rotation and assignments
451  //
452 
453  // --------------------------------------------------------------------------
454 
455  for (std::size_t i=0; i<host_v1.size(); ++i)
456  {
457  NumericT temp = NumericT(1.1) * host_v1[i] + NumericT(2.3) * host_v2[i];
458  host_v2[i] = - NumericT(2.3) * host_v1[i] + NumericT(1.1) * host_v2[i];
459  host_v1[i] = temp;
460  }
461  viennacl::linalg::plane_rotation(vcl_v1, vcl_v2, NumericT(1.1), NumericT(2.3));
462 
463  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
464  return EXIT_FAILURE;
465  if (check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
466  return EXIT_FAILURE;
467 
468  // --------------------------------------------------------------------------
469 
470  std::cout << "Testing assignments..." << std::endl;
471  NumericT val = static_cast<NumericT>(1e-1);
472  for (size_t i=0; i < host_v1.size(); ++i)
473  host_v1[i] = val;
474 
475  for (size_t i=0; i < vcl_v1.size(); ++i)
476  vcl_v1(i) = val;
477 
478  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
479  return EXIT_FAILURE;
480 
481  std::cout << "Testing assignments via iterators..." << std::endl;
482 
483  host_v1[2] = static_cast<NumericT>(1.9);
484  vcl_v1[2] = static_cast<NumericT>(1.9);
485 
486  host_v1[2] = static_cast<NumericT>(1.5);
487  typename ViennaCLVectorType1::iterator vcl_v1_it = vcl_v1.begin();
488  ++vcl_v1_it;
489  ++vcl_v1_it;
490  *vcl_v1_it = static_cast<NumericT>(1.5);
491 
492  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
493  return EXIT_FAILURE;
494 
495  //
496  // multiplication and division of vectors by scalars
497  //
498  for (std::size_t i=0; i < host_v1.size(); ++i)
499  {
500  host_v1[i] = NumericT(1.0) + random<NumericT>();
501  host_v2[i] = NumericT(3.1415) * host_v1[i];
502  }
503  proxy_copy(host_v1, vcl_v1); //resync
504  proxy_copy(host_v2, vcl_v2);
505 
506  std::cout << "Testing scaling with CPU scalar..." << std::endl;
507  NumericT alpha = static_cast<NumericT>(1.7182);
508  viennacl::scalar<NumericT> gpu_alpha = alpha;
509 
510  for (std::size_t i=0; i < host_v1.size(); ++i)
511  host_v1[i] *= NumericT(long(alpha));
512  vcl_v1 *= long(alpha);
513 
514  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
515  return EXIT_FAILURE;
516 
517  for (std::size_t i=0; i < host_v1.size(); ++i)
518  host_v1[i] *= NumericT(float(alpha));
519  vcl_v1 *= float(alpha);
520 
521  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
522  return EXIT_FAILURE;
523 
524  for (std::size_t i=0; i < host_v1.size(); ++i)
525  host_v1[i] *= NumericT(double(alpha));
526  vcl_v1 *= double(alpha);
527 
528  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
529  return EXIT_FAILURE;
530 
531 
532  std::cout << "Testing scaling with GPU scalar..." << std::endl;
533  for (std::size_t i=0; i < host_v1.size(); ++i)
534  host_v1[i] *= alpha;
535  vcl_v1 *= gpu_alpha;
536 
537  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
538  return EXIT_FAILURE;
539 
540  std::cout << "Testing scaling with scalar expression..." << std::endl;
541  cpu_result = 0;
542  for (std::size_t i=0; i < host_v1.size(); ++i)
543  cpu_result += host_v1[i] * host_v2[i];
544  for (std::size_t i=0; i < host_v1.size(); ++i)
545  host_v1[i] *= cpu_result;
546  vcl_v1 *= viennacl::linalg::inner_prod(vcl_v1, vcl_v2);
547 
548  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
549  return EXIT_FAILURE;
550 
551  NumericT beta = static_cast<NumericT>(1.4153);
552  viennacl::scalar<NumericT> gpu_beta = beta;
553 
554  std::cout << "Testing shrinking with CPU scalar..." << std::endl;
555  for (std::size_t i=0; i < host_v1.size(); ++i)
556  host_v1[i] /= NumericT(long(beta));
557  vcl_v1 /= long(beta);
558 
559  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
560  return EXIT_FAILURE;
561 
562  for (std::size_t i=0; i < host_v1.size(); ++i)
563  host_v1[i] /= NumericT(float(beta));
564  vcl_v1 /= float(beta);
565 
566  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
567  return EXIT_FAILURE;
568 
569  for (std::size_t i=0; i < host_v1.size(); ++i)
570  host_v1[i] /= NumericT(double(beta));
571  vcl_v1 /= double(beta);
572 
573  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
574  return EXIT_FAILURE;
575 
576 
577  std::cout << "Testing shrinking with GPU scalar..." << std::endl;
578  for (std::size_t i=0; i < host_v1.size(); ++i)
579  host_v1[i] /= beta;
580  vcl_v1 /= gpu_beta;
581 
582  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
583  return EXIT_FAILURE;
584 
585 
586 
587  //
588  // add and inplace_add of vectors
589  //
590  for (size_t i=0; i < host_v1.size(); ++i)
591  {
592  host_v1[i] = NumericT(1.0) + random<NumericT>();
593  host_v2[i] = NumericT(3.1415) * host_v1[i];
594  }
595  proxy_copy(host_v1, vcl_v1); //resync
596  proxy_copy(host_v2, vcl_v2);
597 
598  std::cout << "Testing add on vector..." << std::endl;
599 
600  std::cout << "Checking for successful copy..." << std::endl;
601  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
602  return EXIT_FAILURE;
603  if (check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
604  return EXIT_FAILURE;
605 
606  for (size_t i=0; i < host_v1.size(); ++i)
607  host_v1[i] = host_v1[i] + host_v2[i];
608  vcl_v1 = vcl_v1 + vcl_v2;
609 
610  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
611  return EXIT_FAILURE;
612 
613  std::cout << "Testing add on vector with flipsign..." << std::endl;
614  for (size_t i=0; i < host_v1.size(); ++i)
615  host_v1[i] = - host_v1[i] + host_v2[i];
616  vcl_v1 = - vcl_v1 + vcl_v2;
617 
618  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
619  return EXIT_FAILURE;
620 
621  std::cout << "Testing inplace-add on vector..." << std::endl;
622  for (size_t i=0; i < host_v1.size(); ++i)
623  host_v1[i] += host_v2[i];
624  vcl_v1 += vcl_v2;
625 
626  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
627  return EXIT_FAILURE;
628 
629  std::cout << "Testing assignment to vector with vector multiplied by scalar expression..." << std::endl;
630  cpu_result = 0;
631  for (std::size_t i=0; i < host_v1.size(); ++i)
632  cpu_result += host_v1[i] * host_v2[i];
633  for (std::size_t i=0; i < host_v1.size(); ++i)
634  host_v1[i] = cpu_result * host_v2[i];
635  //host_v1 = inner_prod(host_v1, host_v2) * host_v2;
636  vcl_v1 = viennacl::linalg::inner_prod(vcl_v1, vcl_v2) * vcl_v2;
637 
638  //
639  // subtract and inplace_subtract of vectors
640  //
641  std::cout << "Testing sub on vector..." << std::endl;
642  for (std::size_t i=0; i < host_v1.size(); ++i)
643  host_v2[i] = NumericT(3.1415) * host_v1[i];
644  proxy_copy(host_v1, vcl_v1);
645  proxy_copy(host_v2, vcl_v2);
646 
647  for (std::size_t i=0; i < host_v1.size(); ++i)
648  host_v1[i] = host_v1[i] - host_v2[i];
649  vcl_v1 = vcl_v1 - vcl_v2;
650 
651  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
652  return EXIT_FAILURE;
653 
654  std::cout << "Testing inplace-sub on vector..." << std::endl;
655  for (std::size_t i=0; i < host_v1.size(); ++i)
656  host_v1[i] -= host_v2[i];
657  vcl_v1 -= vcl_v2;
658 
659  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
660  return EXIT_FAILURE;
661 
662 
663 
664  //
665  // multiply-add
666  //
667  std::cout << "Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
668  for (size_t i=0; i < host_v1.size(); ++i)
669  {
670  host_v1[i] = NumericT(1.0) + random<NumericT>();
671  host_v2[i] = NumericT(3.1415) * host_v1[i];
672  }
673  proxy_copy(host_v1, vcl_v1);
674  proxy_copy(host_v2, vcl_v2);
675 
676  for (std::size_t i=0; i < host_v1.size(); ++i)
677  host_v1[i] = host_v1[i] + host_v2[i] * NumericT(float(alpha));
678  vcl_v1 = vcl_v1 + vcl_v2 * float(alpha);
679 
680  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
681  return EXIT_FAILURE;
682 
683  for (std::size_t i=0; i < host_v1.size(); ++i)
684  host_v1[i] = host_v1[i] + host_v2[i] * NumericT(double(alpha));
685  vcl_v1 = vcl_v1 + vcl_v2 * double(alpha);
686 
687  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
688  return EXIT_FAILURE;
689 
690 
691  std::cout << "Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
692  for (std::size_t i=0; i < host_v1.size(); ++i)
693  host_v2[i] = NumericT(3.1415) * host_v1[i];
694  proxy_copy(host_v1, vcl_v1);
695  proxy_copy(host_v2, vcl_v2);
696 
697  for (std::size_t i=0; i < host_v1.size(); ++i)
698  host_v1[i] = NumericT(long(alpha)) * host_v1[i] + host_v2[i];
699  vcl_v1 = long(alpha) * vcl_v1 + vcl_v2;
700 
701  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
702  return EXIT_FAILURE;
703 
704  for (std::size_t i=0; i < host_v1.size(); ++i)
705  host_v1[i] = NumericT(float(alpha)) * host_v1[i] + host_v2[i];
706  vcl_v1 = float(alpha) * vcl_v1 + vcl_v2;
707 
708  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
709  return EXIT_FAILURE;
710 
711  for (std::size_t i=0; i < host_v1.size(); ++i)
712  host_v1[i] = NumericT(double(alpha)) * host_v1[i] + host_v2[i];
713  vcl_v1 = double(alpha) * vcl_v1 + vcl_v2;
714 
715  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
716  return EXIT_FAILURE;
717 
718 
719  std::cout << "Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
720  for (std::size_t i=0; i < host_v1.size(); ++i)
721  host_v2[i] = NumericT(3.1415) * host_v1[i];
722  proxy_copy(host_v1, vcl_v1);
723  proxy_copy(host_v2, vcl_v2);
724 
725  for (std::size_t i=0; i < host_v1.size(); ++i)
726  host_v1[i] = NumericT(long(alpha)) * host_v1[i] + NumericT(long(beta)) * host_v2[i];
727  vcl_v1 = long(alpha) * vcl_v1 + long(beta) * vcl_v2;
728 
729  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
730  return EXIT_FAILURE;
731 
732  for (std::size_t i=0; i < host_v1.size(); ++i)
733  host_v1[i] = NumericT(float(alpha)) * host_v1[i] + NumericT(float(beta)) * host_v2[i];
734  vcl_v1 = float(alpha) * vcl_v1 + float(beta) * vcl_v2;
735 
736  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
737  return EXIT_FAILURE;
738 
739  for (std::size_t i=0; i < host_v1.size(); ++i)
740  host_v1[i] = NumericT(double(alpha)) * host_v1[i] + NumericT(double(beta)) * host_v2[i];
741  vcl_v1 = double(alpha) * vcl_v1 + double(beta) * vcl_v2;
742 
743  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
744  return EXIT_FAILURE;
745 
746 
747  std::cout << "Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
748  for (std::size_t i=0; i < host_v1.size(); ++i)
749  host_v2[i] = NumericT(3.1415) * host_v1[i];
750  proxy_copy(host_v1, vcl_v1);
751  proxy_copy(host_v2, vcl_v2);
752 
753  for (std::size_t i=0; i < host_v1.size(); ++i)
754  host_v1[i] += host_v2[i] * NumericT(long(alpha));
755  vcl_v1 += vcl_v2 * long(alpha);
756 
757  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
758  return EXIT_FAILURE;
759 
760  for (std::size_t i=0; i < host_v1.size(); ++i)
761  host_v1[i] += host_v2[i] * NumericT(float(alpha));
762  vcl_v1 += vcl_v2 * float(alpha);
763 
764  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
765  return EXIT_FAILURE;
766 
767  for (std::size_t i=0; i < host_v1.size(); ++i)
768  host_v1[i] += NumericT(double(alpha)) * host_v2[i];
769  vcl_v1 += double(alpha) * vcl_v2;
770 
771  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
772  return EXIT_FAILURE;
773 
774 
775  std::cout << "Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
776  for (std::size_t i=0; i < host_v1.size(); ++i)
777  host_v2[i] = NumericT(3.1415) * host_v1[i];
778  proxy_copy(host_v1, vcl_v1);
779  proxy_copy(host_v2, vcl_v2);
780 
781  for (std::size_t i=0; i < host_v1.size(); ++i)
782  host_v1[i] = host_v1[i] + alpha * host_v2[i];
783  vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
784 
785  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
786  return EXIT_FAILURE;
787 
788  std::cout << "Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
789  for (std::size_t i=0; i < host_v1.size(); ++i)
790  host_v2[i] = NumericT(3.1415) * host_v1[i];
791  proxy_copy(host_v1, vcl_v1);
792  proxy_copy(host_v2, vcl_v2);
793 
794  for (std::size_t i=0; i < host_v1.size(); ++i)
795  host_v1[i] = host_v1[i] + alpha * host_v2[i];
796  vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
797 
798  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
799  return EXIT_FAILURE;
800 
801  std::cout << "Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
802  for (std::size_t i=0; i < host_v1.size(); ++i)
803  host_v2[i] = NumericT(3.1415) * host_v1[i];
804  proxy_copy(host_v1, vcl_v1);
805  proxy_copy(host_v2, vcl_v2);
806 
807  for (std::size_t i=0; i < host_v1.size(); ++i)
808  host_v1[i] = alpha * host_v1[i] + beta * host_v2[i];
809  vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
810 
811  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
812  return EXIT_FAILURE;
813 
814 
815  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
816  for (std::size_t i=0; i < host_v1.size(); ++i)
817  host_v2[i] = NumericT(3.1415) * host_v1[i];
818  proxy_copy(host_v1, vcl_v1);
819  proxy_copy(host_v2, vcl_v2);
820 
821  for (std::size_t i=0; i < host_v1.size(); ++i)
822  host_v1[i] += alpha * host_v1[i] + beta * host_v2[i];
823  vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
824 
825  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
826  return EXIT_FAILURE;
827 
828  std::cout << "Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
829  for (std::size_t i=0; i < host_v1.size(); ++i)
830  host_v2[i] = NumericT(3.1415) * host_v1[i];
831  proxy_copy(host_v1, vcl_v1);
832  proxy_copy(host_v2, vcl_v2);
833 
834  for (std::size_t i=0; i < host_v1.size(); ++i)
835  host_v1[i] += alpha * host_v1[i] - beta * host_v2[i];
836  vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
837 
838  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
839  return EXIT_FAILURE;
840 
841 
842 
843  std::cout << "Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
844  for (std::size_t i=0; i < host_v1.size(); ++i)
845  host_v2[i] = NumericT(3.1415) * host_v1[i];
846  proxy_copy(host_v1, vcl_v1);
847  proxy_copy(host_v2, vcl_v2);
848 
849  for (std::size_t i=0; i < host_v1.size(); ++i)
850  host_v1[i] += alpha * host_v2[i];
851  vcl_v1 += gpu_alpha * vcl_v2;
852 
853  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
854  return EXIT_FAILURE;
855 
856 
857  //
858  // division-add
859  //
860  std::cout << "Testing division-add on vector with CPU scalar (right)..." << std::endl;
861  for (size_t i=0; i < host_v1.size(); ++i)
862  {
863  host_v1[i] = NumericT(1.0) + random<NumericT>();
864  host_v2[i] = NumericT(3.1415) * host_v1[i];
865  }
866  proxy_copy(host_v1, vcl_v1);
867  proxy_copy(host_v2, vcl_v2);
868 
869  for (std::size_t i=0; i < host_v1.size(); ++i)
870  host_v1[i] = host_v1[i] + host_v2[i] / NumericT(long(alpha));
871  vcl_v1 = vcl_v1 + vcl_v2 / long(alpha);
872 
873  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
874  return EXIT_FAILURE;
875 
876  for (std::size_t i=0; i < host_v1.size(); ++i)
877  host_v1[i] = host_v1[i] + host_v2[i] / NumericT(float(alpha));
878  vcl_v1 = vcl_v1 + vcl_v2 / float(alpha);
879 
880  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
881  return EXIT_FAILURE;
882 
883  for (std::size_t i=0; i < host_v1.size(); ++i)
884  host_v1[i] = host_v1[i] + host_v2[i] / NumericT(double(alpha));
885  vcl_v1 = vcl_v1 + vcl_v2 / double(alpha);
886 
887  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
888  return EXIT_FAILURE;
889 
890 
891  std::cout << "Testing division-add on vector with CPU scalar (left)..." << std::endl;
892  for (std::size_t i=0; i < host_v1.size(); ++i)
893  host_v2[i] = NumericT(3.1415) * host_v1[i];
894  proxy_copy(host_v1, vcl_v1);
895  proxy_copy(host_v2, vcl_v2);
896 
897  for (std::size_t i=0; i < host_v1.size(); ++i)
898  host_v1[i] = host_v1[i] / NumericT(float(alpha)) + host_v2[i];
899  vcl_v1 = vcl_v1 / float(alpha) + vcl_v2;
900 
901  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
902  return EXIT_FAILURE;
903 
904  for (std::size_t i=0; i < host_v1.size(); ++i)
905  host_v1[i] = host_v1[i] / NumericT(double(alpha)) + host_v2[i];
906  vcl_v1 = vcl_v1 / double(alpha) + vcl_v2;
907 
908  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
909  return EXIT_FAILURE;
910 
911 
912  std::cout << "Testing division-add on vector with CPU scalar (both)..." << std::endl;
913  for (std::size_t i=0; i < host_v1.size(); ++i)
914  host_v2[i] = NumericT(3.1415) * host_v1[i];
915  proxy_copy(host_v1, vcl_v1);
916  proxy_copy(host_v2, vcl_v2);
917 
918  for (std::size_t i=0; i < host_v1.size(); ++i)
919  host_v1[i] = host_v1[i] / NumericT(float(alpha)) + host_v2[i] / NumericT(float(beta));
920  vcl_v1 = vcl_v1 / float(alpha) + vcl_v2 / float(beta);
921 
922  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
923  return EXIT_FAILURE;
924 
925  for (std::size_t i=0; i < host_v1.size(); ++i)
926  host_v1[i] = host_v1[i] / NumericT(double(alpha)) + host_v2[i] / NumericT(double(beta));
927  vcl_v1 = vcl_v1 / double(alpha) + vcl_v2 / double(beta);
928 
929  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
930  return EXIT_FAILURE;
931 
932  std::cout << "Testing division-multiply-add on vector with CPU scalar..." << std::endl;
933  for (std::size_t i=0; i < host_v1.size(); ++i)
934  host_v2[i] = NumericT(3.1415) * host_v1[i];
935  proxy_copy(host_v1, vcl_v1);
936  proxy_copy(host_v2, vcl_v2);
937 
938  for (std::size_t i=0; i < host_v1.size(); ++i)
939  host_v1[i] = host_v1[i] / alpha + host_v2[i] * beta;
940  vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
941 
942  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
943  return EXIT_FAILURE;
944 
945 
946  std::cout << "Testing multiply-division-add on vector with CPU scalar..." << std::endl;
947  for (std::size_t i=0; i < host_v1.size(); ++i)
948  host_v2[i] = NumericT(3.1415) * host_v1[i];
949  proxy_copy(host_v1, vcl_v1);
950  proxy_copy(host_v2, vcl_v2);
951 
952  for (std::size_t i=0; i < host_v1.size(); ++i)
953  host_v1[i] = host_v1[i] * alpha + host_v2[i] / beta;
954  vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
955 
956  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
957  return EXIT_FAILURE;
958 
959 
960 
961  std::cout << "Testing inplace division-add on vector with CPU scalar..." << std::endl;
962  for (std::size_t i=0; i < host_v1.size(); ++i)
963  host_v2[i] = NumericT(3.1415) * host_v1[i];
964  proxy_copy(host_v1, vcl_v1);
965  proxy_copy(host_v2, vcl_v2);
966 
967  for (std::size_t i=0; i < host_v1.size(); ++i)
968  host_v1[i] += host_v2[i] / alpha;
969  vcl_v1 += vcl_v2 / alpha;
970 
971  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
972  return EXIT_FAILURE;
973 
974 
975  std::cout << "Testing division-add on vector with GPU scalar (right)..." << std::endl;
976  for (std::size_t i=0; i < host_v1.size(); ++i)
977  host_v2[i] = NumericT(3.1415) * host_v1[i];
978  proxy_copy(host_v1, vcl_v1);
979  proxy_copy(host_v2, vcl_v2);
980 
981  for (std::size_t i=0; i < host_v1.size(); ++i)
982  host_v1[i] = host_v1[i] + host_v2[i] / alpha;
983  vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
984 
985  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
986  return EXIT_FAILURE;
987 
988  std::cout << "Testing division-add on vector with GPU scalar (left)..." << std::endl;
989  for (std::size_t i=0; i < host_v1.size(); ++i)
990  host_v2[i] = NumericT(3.1415) * host_v1[i];
991  proxy_copy(host_v1, vcl_v1);
992  proxy_copy(host_v2, vcl_v2);
993 
994  for (std::size_t i=0; i < host_v1.size(); ++i)
995  host_v1[i] = host_v1[i] + host_v2[i] / alpha;
996  vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
997 
998  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
999  return EXIT_FAILURE;
1000 
1001  std::cout << "Testing division-add on vector with GPU scalar (both)..." << std::endl;
1002  for (std::size_t i=0; i < host_v1.size(); ++i)
1003  host_v2[i] = NumericT(3.1415) * host_v1[i];
1004  proxy_copy(host_v1, vcl_v1);
1005  proxy_copy(host_v2, vcl_v2);
1006 
1007  for (std::size_t i=0; i < host_v1.size(); ++i)
1008  host_v1[i] = host_v1[i] / alpha + host_v2[i] / beta;
1009  vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1010 
1011  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1012  return EXIT_FAILURE;
1013 
1014 
1015  std::cout << "Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
1016  for (std::size_t i=0; i < host_v1.size(); ++i)
1017  host_v2[i] = NumericT(3.1415) * host_v1[i];
1018  proxy_copy(host_v1, vcl_v1);
1019  proxy_copy(host_v2, vcl_v2);
1020 
1021  for (std::size_t i=0; i < host_v1.size(); ++i)
1022  host_v1[i] += host_v1[i] / alpha + host_v2[i] / beta;
1023  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1024 
1025  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1026  return EXIT_FAILURE;
1027 
1028  std::cout << "Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl;
1029  for (std::size_t i=0; i < host_v1.size(); ++i)
1030  host_v2[i] = NumericT(3.1415) * host_v1[i];
1031  proxy_copy(host_v1, vcl_v1);
1032  proxy_copy(host_v2, vcl_v2);
1033 
1034  for (std::size_t i=0; i < host_v1.size(); ++i)
1035  host_v1[i] += host_v1[i] / alpha - host_v2[i] / beta;
1036  vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1037 
1038  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1039  return EXIT_FAILURE;
1040 
1041  std::cout << "Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
1042  for (std::size_t i=0; i < host_v1.size(); ++i)
1043  host_v2[i] = NumericT(3.1415) * host_v1[i];
1044  proxy_copy(host_v1, vcl_v1);
1045  proxy_copy(host_v2, vcl_v2);
1046 
1047  for (std::size_t i=0; i < host_v1.size(); ++i)
1048  host_v1[i] += host_v1[i] / alpha + host_v2[i] * beta;
1049  vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1050 
1051  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1052  return EXIT_FAILURE;
1053 
1054  std::cout << "Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl;
1055  for (std::size_t i=0; i < host_v1.size(); ++i)
1056  host_v2[i] = NumericT(3.1415) * host_v1[i];
1057  proxy_copy(host_v1, vcl_v1);
1058  proxy_copy(host_v2, vcl_v2);
1059 
1060  for (std::size_t i=0; i < host_v1.size(); ++i)
1061  host_v1[i] += host_v1[i] * alpha - host_v2[i] / beta;
1062  vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1063 
1064  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1065  return EXIT_FAILURE;
1066 
1067 
1068 
1069  std::cout << "Testing inplace division-add on vector with GPU scalar..." << std::endl;
1070  for (std::size_t i=0; i < host_v1.size(); ++i)
1071  host_v2[i] = NumericT(3.1415) * host_v1[i];
1072  proxy_copy(host_v1, vcl_v1);
1073  proxy_copy(host_v2, vcl_v2);
1074 
1075  for (std::size_t i=0; i < host_v1.size(); ++i)
1076  host_v1[i] += host_v2[i] * alpha;
1077  vcl_v1 += vcl_v2 * gpu_alpha;
1078 
1079  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1080  return EXIT_FAILURE;
1081 
1082 
1083 
1084  //
1085  // multiply-subtract
1086  //
1087  std::cout << "Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
1088  for (size_t i=0; i < host_v1.size(); ++i)
1089  {
1090  host_v1[i] = NumericT(1.0) + random<NumericT>();
1091  host_v2[i] = NumericT(3.1415) * host_v1[i];
1092  }
1093  proxy_copy(host_v1, vcl_v1);
1094  proxy_copy(host_v2, vcl_v2);
1095 
1096  for (std::size_t i=0; i < host_v1.size(); ++i)
1097  host_v1[i] = host_v1[i] - alpha * host_v2[i];
1098  vcl_v1 = vcl_v1 - alpha * vcl_v2;
1099 
1100  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1101  return EXIT_FAILURE;
1102 
1103 
1104  std::cout << "Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
1105  for (std::size_t i=0; i < host_v1.size(); ++i)
1106  host_v2[i] = NumericT(3.1415) * host_v1[i];
1107  proxy_copy(host_v1, vcl_v1);
1108  proxy_copy(host_v2, vcl_v2);
1109 
1110  for (std::size_t i=0; i < host_v1.size(); ++i)
1111  host_v1[i] = alpha * host_v1[i] - host_v2[i];
1112  vcl_v1 = alpha * vcl_v1 - vcl_v2;
1113 
1114  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1115  return EXIT_FAILURE;
1116 
1117  std::cout << "Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
1118  for (std::size_t i=0; i < host_v1.size(); ++i)
1119  host_v2[i] = NumericT(3.1415) * host_v1[i];
1120  proxy_copy(host_v1, vcl_v1);
1121  proxy_copy(host_v2, vcl_v2);
1122 
1123  for (std::size_t i=0; i < host_v1.size(); ++i)
1124  host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1125  vcl_v1 = alpha * vcl_v1 - beta * vcl_v2;
1126 
1127  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1128  return EXIT_FAILURE;
1129 
1130 
1131  std::cout << "Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
1132  for (std::size_t i=0; i < host_v1.size(); ++i)
1133  host_v2[i] = NumericT(3.1415) * host_v1[i];
1134  proxy_copy(host_v1, vcl_v1);
1135  proxy_copy(host_v2, vcl_v2);
1136 
1137  for (std::size_t i=0; i < host_v1.size(); ++i)
1138  host_v1[i] -= alpha * host_v2[i];
1139  vcl_v1 -= alpha * vcl_v2;
1140 
1141  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1142  return EXIT_FAILURE;
1143 
1144 
1145  std::cout << "Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
1146  for (std::size_t i=0; i < host_v1.size(); ++i)
1147  host_v2[i] = NumericT(3.1415) * host_v1[i];
1148  proxy_copy(host_v1, vcl_v1);
1149  proxy_copy(host_v2, vcl_v2);
1150 
1151  for (std::size_t i=0; i < host_v1.size(); ++i)
1152  host_v1[i] = host_v1[i] - alpha * host_v2[i];
1153  vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1154 
1155  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1156  return EXIT_FAILURE;
1157 
1158  std::cout << "Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
1159  for (std::size_t i=0; i < host_v1.size(); ++i)
1160  host_v2[i] = NumericT(3.1415) * host_v1[i];
1161  proxy_copy(host_v1, vcl_v1);
1162  proxy_copy(host_v2, vcl_v2);
1163 
1164  for (std::size_t i=0; i < host_v1.size(); ++i)
1165  host_v1[i] = host_v1[i] - alpha * host_v2[i];
1166  vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1167 
1168  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1169  return EXIT_FAILURE;
1170 
1171  std::cout << "Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
1172  for (std::size_t i=0; i < host_v1.size(); ++i)
1173  host_v2[i] = NumericT(3.1415) * host_v1[i];
1174  proxy_copy(host_v1, vcl_v1);
1175  proxy_copy(host_v2, vcl_v2);
1176 
1177  for (std::size_t i=0; i < host_v1.size(); ++i)
1178  host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1179  vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1180 
1181  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1182  return EXIT_FAILURE;
1183 
1184  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1185  for (std::size_t i=0; i < host_v1.size(); ++i)
1186  host_v2[i] = NumericT(3.1415) * host_v1[i];
1187  proxy_copy(host_v1, vcl_v1);
1188  proxy_copy(host_v2, vcl_v2);
1189 
1190  for (std::size_t i=0; i < host_v1.size(); ++i)
1191  host_v1[i] -= alpha * host_v1[i] + beta * host_v2[i];
1192  vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
1193 
1194  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1195  return EXIT_FAILURE;
1196 
1197  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1198  for (std::size_t i=0; i < host_v1.size(); ++i)
1199  host_v2[i] = NumericT(3.1415) * host_v1[i];
1200  proxy_copy(host_v1, vcl_v1);
1201  proxy_copy(host_v2, vcl_v2);
1202 
1203  for (std::size_t i=0; i < host_v1.size(); ++i)
1204  host_v1[i] -= alpha * host_v1[i] - beta * host_v2[i];
1205  vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1206 
1207  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1208  return EXIT_FAILURE;
1209 
1210 
1211  std::cout << "Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
1212  for (std::size_t i=0; i < host_v1.size(); ++i)
1213  host_v2[i] = NumericT(3.1415) * host_v1[i];
1214  proxy_copy(host_v1, vcl_v1);
1215  proxy_copy(host_v2, vcl_v2);
1216 
1217  for (std::size_t i=0; i < host_v1.size(); ++i)
1218  host_v1[i] -= alpha * host_v2[i];
1219  vcl_v1 -= gpu_alpha * vcl_v2;
1220 
1221  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1222  return EXIT_FAILURE;
1223 
1224 
1225 
1226  //
1227  // division-subtract
1228  //
1229  std::cout << "Testing division-subtract on vector with CPU scalar (right)..." << std::endl;
1230  for (size_t i=0; i < host_v1.size(); ++i)
1231  {
1232  host_v1[i] = NumericT(1.0) + random<NumericT>();
1233  host_v2[i] = NumericT(3.1415) * host_v1[i];
1234  }
1235  proxy_copy(host_v1, vcl_v1);
1236  proxy_copy(host_v2, vcl_v2);
1237 
1238  for (std::size_t i=0; i < host_v1.size(); ++i)
1239  host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1240  vcl_v1 = vcl_v1 - vcl_v2 / alpha;
1241 
1242  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1243  return EXIT_FAILURE;
1244 
1245 
1246  std::cout << "Testing division-subtract on vector with CPU scalar (left)..." << std::endl;
1247  for (std::size_t i=0; i < host_v1.size(); ++i)
1248  host_v2[i] = NumericT(3.1415) * host_v1[i];
1249  proxy_copy(host_v1, vcl_v1);
1250  proxy_copy(host_v2, vcl_v2);
1251 
1252  for (std::size_t i=0; i < host_v1.size(); ++i)
1253  host_v1[i] = host_v1[i] / alpha - host_v2[i];
1254  vcl_v1 = vcl_v1 / alpha - vcl_v2;
1255 
1256  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1257  return EXIT_FAILURE;
1258 
1259  std::cout << "Testing division-subtract on vector with CPU scalar (both)..." << std::endl;
1260  for (std::size_t i=0; i < host_v1.size(); ++i)
1261  host_v2[i] = NumericT(3.1415) * host_v1[i];
1262  proxy_copy(host_v1, vcl_v1);
1263  proxy_copy(host_v2, vcl_v2);
1264 
1265  for (std::size_t i=0; i < host_v1.size(); ++i)
1266  host_v1[i] = host_v1[i] / alpha - host_v2[i] / alpha;
1267  vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha;
1268 
1269  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1270  return EXIT_FAILURE;
1271 
1272 
1273  std::cout << "Testing inplace division-subtract on vector with CPU scalar..." << std::endl;
1274  for (std::size_t i=0; i < host_v1.size(); ++i)
1275  host_v2[i] = NumericT(3.1415) * host_v1[i];
1276  proxy_copy(host_v1, vcl_v1);
1277  proxy_copy(host_v2, vcl_v2);
1278 
1279  for (std::size_t i=0; i < host_v1.size(); ++i)
1280  host_v1[i] -= host_v2[i] / alpha;
1281  vcl_v1 -= vcl_v2 / alpha;
1282 
1283  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1284  return EXIT_FAILURE;
1285 
1286  std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1287  for (std::size_t i=0; i < host_v1.size(); ++i)
1288  host_v2[i] = NumericT(3.1415) * host_v1[i];
1289  proxy_copy(host_v1, vcl_v1);
1290  proxy_copy(host_v2, vcl_v2);
1291 
1292  for (std::size_t i=0; i < host_v1.size(); ++i)
1293  host_v1[i] -= host_v2[i] / alpha;
1294  vcl_v1 -= vcl_v2 / gpu_alpha;
1295 
1296  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1297  return EXIT_FAILURE;
1298 
1299 
1300  std::cout << "Testing division-subtract on vector with GPU scalar (right)..." << std::endl;
1301  for (std::size_t i=0; i < host_v1.size(); ++i)
1302  host_v2[i] = NumericT(3.1415) * host_v1[i];
1303  proxy_copy(host_v1, vcl_v1);
1304  proxy_copy(host_v2, vcl_v2);
1305 
1306  for (std::size_t i=0; i < host_v1.size(); ++i)
1307  host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1308  vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1309 
1310  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1311  return EXIT_FAILURE;
1312 
1313  std::cout << "Testing division-subtract on vector with GPU scalar (left)..." << std::endl;
1314  for (std::size_t i=0; i < host_v1.size(); ++i)
1315  host_v2[i] = NumericT(3.1415) * host_v1[i];
1316  proxy_copy(host_v1, vcl_v1);
1317  proxy_copy(host_v2, vcl_v2);
1318 
1319  for (std::size_t i=0; i < host_v1.size(); ++i)
1320  host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1321  vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1322 
1323  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1324  return EXIT_FAILURE;
1325 
1326  std::cout << "Testing division-subtract on vector with GPU scalar (both)..." << std::endl;
1327  for (std::size_t i=0; i < host_v1.size(); ++i)
1328  host_v2[i] = NumericT(3.1415) * host_v1[i];
1329  proxy_copy(host_v1, vcl_v1);
1330  proxy_copy(host_v2, vcl_v2);
1331 
1332  for (std::size_t i=0; i < host_v1.size(); ++i)
1333  host_v1[i] = host_v1[i] / alpha - host_v2[i] / beta;
1334  vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1335 
1336  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1337  return EXIT_FAILURE;
1338 
1339  std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1340  for (std::size_t i=0; i < host_v1.size(); ++i)
1341  host_v2[i] = NumericT(3.1415) * host_v1[i];
1342  proxy_copy(host_v1, vcl_v1);
1343  proxy_copy(host_v2, vcl_v2);
1344 
1345  for (std::size_t i=0; i < host_v1.size(); ++i)
1346  host_v1[i] -= host_v1[i] / alpha + host_v2[i] / beta;
1347  vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1348 
1349  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1350  return EXIT_FAILURE;
1351 
1352  std::cout << "Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1353  for (std::size_t i=0; i < host_v1.size(); ++i)
1354  host_v2[i] = NumericT(3.1415) * host_v1[i];
1355  proxy_copy(host_v1, vcl_v1);
1356  proxy_copy(host_v2, vcl_v2);
1357 
1358  for (std::size_t i=0; i < host_v1.size(); ++i)
1359  host_v1[i] -= host_v1[i] / alpha - host_v2[i] / beta;
1360  vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1361 
1362  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1363  return EXIT_FAILURE;
1364 
1365  std::cout << "Testing multiply-division-subtract on vector with GPU scalar..." << std::endl;
1366  for (std::size_t i=0; i < host_v1.size(); ++i)
1367  host_v2[i] = NumericT(3.1415) * host_v1[i];
1368  proxy_copy(host_v1, vcl_v1);
1369  proxy_copy(host_v2, vcl_v2);
1370 
1371  for (std::size_t i=0; i < host_v1.size(); ++i)
1372  host_v1[i] = host_v1[i] * alpha - host_v2[i] / beta;
1373  vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1374 
1375  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1376  return EXIT_FAILURE;
1377 
1378  std::cout << "Testing division-multiply-subtract on vector with GPU scalar..." << std::endl;
1379  for (std::size_t i=0; i < host_v1.size(); ++i)
1380  host_v2[i] = NumericT(3.1415) * host_v1[i];
1381  proxy_copy(host_v1, vcl_v1);
1382  proxy_copy(host_v2, vcl_v2);
1383 
1384  for (std::size_t i=0; i < host_v1.size(); ++i)
1385  host_v1[i] = host_v1[i] / alpha - host_v2[i] * beta;
1386  vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1387 
1388  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1389  return EXIT_FAILURE;
1390 
1391  std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl;
1392  for (std::size_t i=0; i < host_v1.size(); ++i)
1393  host_v2[i] = NumericT(3.1415) * host_v1[i];
1394  proxy_copy(host_v1, vcl_v1);
1395  proxy_copy(host_v2, vcl_v2);
1396 
1397  for (std::size_t i=0; i < host_v1.size(); ++i)
1398  host_v1[i] -= host_v1[i] * alpha + host_v2[i] / beta;
1399  vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta;
1400 
1401  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1402  return EXIT_FAILURE;
1403 
1404  std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl;
1405  for (std::size_t i=0; i < host_v1.size(); ++i)
1406  host_v2[i] = NumericT(3.1415) * host_v1[i];
1407  proxy_copy(host_v1, vcl_v1);
1408  proxy_copy(host_v2, vcl_v2);
1409 
1410  for (std::size_t i=0; i < host_v1.size(); ++i)
1411  host_v1[i] -= host_v1[i] / alpha + host_v2[i] * beta;
1412  vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1413 
1414  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1415  return EXIT_FAILURE;
1416 
1417  std::cout << "Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1418  for (std::size_t i=0; i < host_v1.size(); ++i)
1419  host_v2[i] = NumericT(3.1415) * host_v1[i];
1420  proxy_copy(host_v1, vcl_v1);
1421  proxy_copy(host_v2, vcl_v2);
1422 
1423  for (std::size_t i=0; i < host_v1.size(); ++i)
1424  host_v1[i] -= host_v1[i] * alpha - host_v2[i] / beta;
1425  vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1426 
1427  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1428  return EXIT_FAILURE;
1429 
1430  std::cout << "Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1431  for (std::size_t i=0; i < host_v1.size(); ++i)
1432  host_v2[i] = NumericT(3.1415) * host_v1[i];
1433  proxy_copy(host_v1, vcl_v1);
1434  proxy_copy(host_v2, vcl_v2);
1435 
1436  for (std::size_t i=0; i < host_v1.size(); ++i)
1437  host_v1[i] -= host_v1[i] / alpha - host_v2[i] * beta;
1438  vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1439 
1440  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1441  return EXIT_FAILURE;
1442 
1443 
1444  std::cout << "Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1445  for (std::size_t i=0; i < host_v1.size(); ++i)
1446  host_v2[i] = NumericT(3.1415) * host_v1[i];
1447  proxy_copy(host_v1, vcl_v1);
1448  proxy_copy(host_v2, vcl_v2);
1449 
1450  for (std::size_t i=0; i < host_v1.size(); ++i)
1451  host_v1[i] -= alpha * host_v2[i];
1452  vcl_v1 -= gpu_alpha * vcl_v2;
1453 
1454  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1455  return EXIT_FAILURE;
1456 
1457 
1458 
1459  //
1460  // More complicated expressions (for ensuring the operator overloads work correctly)
1461  //
1462  for (std::size_t i=0; i < host_v1.size(); ++i)
1463  {
1464  host_v1[i] = NumericT(1.0) + random<NumericT>();
1465  host_v2[i] = NumericT(3.1415) * host_v1[i];
1466  }
1467  proxy_copy(host_v1, vcl_v1);
1468  proxy_copy(host_v2, vcl_v2);
1469 
1470  std::cout << "Testing three vector additions..." << std::endl;
1471  for (std::size_t i=0; i < host_v1.size(); ++i)
1472  host_v1[i] = host_v2[i] + host_v1[i] + host_v2[i];
1473  vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
1474 
1475  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1476  return EXIT_FAILURE;
1477 
1478 
1479  for (std::size_t i=0; i < host_v1.size(); ++i)
1480  host_v2[i] = NumericT(3.1415) * host_v1[i];
1481  proxy_copy(host_v1, vcl_v1);
1482  proxy_copy(host_v2, vcl_v2);
1483 
1484  std::cout << "Testing complicated vector expression with CPU scalar..." << std::endl;
1485  for (std::size_t i=0; i < host_v1.size(); ++i)
1486  host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1487  vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2);
1488 
1489  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1490  return EXIT_FAILURE;
1491 
1492  std::cout << "Testing complicated vector expression with GPU scalar..." << std::endl;
1493  for (std::size_t i=0; i < host_v1.size(); ++i)
1494  host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1495  vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2);
1496 
1497  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1498  return EXIT_FAILURE;
1499 
1500  // --------------------------------------------------------------------------
1501  for (std::size_t i=0; i < host_v1.size(); ++i)
1502  host_v2[i] = NumericT(3.1415) * host_v1[i];
1503  proxy_copy(host_v1, vcl_v1);
1504  proxy_copy(host_v2, vcl_v2);
1505 
1506  std::cout << "Testing swap..." << std::endl;
1507  for (std::size_t i=0; i < host_v1.size(); ++i)
1508  {
1509  NumericT temp = host_v1[i];
1510  host_v1[i] = host_v2[i];
1511  host_v2[i] = temp;
1512  }
1513  swap(vcl_v1, vcl_v2);
1514 
1515  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1516  return EXIT_FAILURE;
1517 
1518  // --------------------------------------------------------------------------
1519  for (std::size_t i=0; i<host_v1.size(); ++i)
1520  {
1521  host_v1[i] = NumericT(1.0) + random<NumericT>();
1522  host_v2[i] = NumericT(5.0) + random<NumericT>();
1523  }
1524 
1525  proxy_copy(host_v1, vcl_v1);
1526  proxy_copy(host_v2, vcl_v2);
1527 
1528  std::cout << "Testing unary operator-..." << std::endl;
1529  for (std::size_t i=0; i < host_v1.size(); ++i)
1530  host_v1[i] = - host_v2[i];
1531  vcl_v1 = - vcl_v2;
1532 
1533  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1534  return EXIT_FAILURE;
1535 
1536 
1537  std::cout << "Testing elementwise multiplication..." << std::endl;
1538  std::cout << " v1 = element_prod(v1, v2);" << std::endl;
1539  for (std::size_t i=0; i < host_v1.size(); ++i)
1540  host_v1[i] = host_v1[i] * host_v2[i];
1541  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1542 
1543  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1544  return EXIT_FAILURE;
1545 
1546  std::cout << " v1 += element_prod(v1, v2);" << std::endl;
1547  for (std::size_t i=0; i < host_v1.size(); ++i)
1548  host_v1[i] += host_v1[i] * host_v2[i];
1549  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1550 
1551  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1552  return EXIT_FAILURE;
1553 
1554  std::cout << " v1 -= element_prod(v1, v2);" << std::endl;
1555  for (std::size_t i=0; i < host_v1.size(); ++i)
1556  host_v1[i] -= host_v1[i] * host_v2[i];
1557  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2);
1558 
1559  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1560  return EXIT_FAILURE;
1561 
1563  std::cout << " v1 = element_prod(v1 + v2, v2);" << std::endl;
1564  for (std::size_t i=0; i < host_v1.size(); ++i)
1565  host_v1[i] = (host_v1[i] + host_v2[i]) * host_v2[i];
1566  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1567 
1568  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1569  return EXIT_FAILURE;
1570 
1571  std::cout << " v1 += element_prod(v1 + v2, v2);" << std::endl;
1572  for (std::size_t i=0; i < host_v1.size(); ++i)
1573  host_v1[i] += (host_v1[i] + host_v2[i]) * host_v2[i];
1574  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1575 
1576  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1577  return EXIT_FAILURE;
1578 
1579  std::cout << " v1 -= element_prod(v1 + v2, v2);" << std::endl;
1580  for (std::size_t i=0; i < host_v1.size(); ++i)
1581  host_v1[i] -= (host_v1[i] + host_v2[i]) * host_v2[i];
1582  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2);
1583 
1584  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1585  return EXIT_FAILURE;
1586 
1588  std::cout << " v1 = element_prod(v1, v2 + v1);" << std::endl;
1589  for (std::size_t i=0; i < host_v1.size(); ++i)
1590  host_v1[i] = host_v1[i] * (host_v2[i] + host_v1[i]);
1591  vcl_v1 = viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1592 
1593  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1594  return EXIT_FAILURE;
1595 
1596  std::cout << " v1 += element_prod(v1, v2 + v1);" << std::endl;
1597  for (std::size_t i=0; i < host_v1.size(); ++i)
1598  host_v1[i] += host_v1[i] * (host_v2[i] + host_v1[i]);
1599  vcl_v1 += viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1600 
1601  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1602  return EXIT_FAILURE;
1603 
1604  std::cout << " v1 -= element_prod(v1, v2 + v1);" << std::endl;
1605  for (std::size_t i=0; i < host_v1.size(); ++i)
1606  host_v1[i] -= host_v1[i] * (host_v2[i] + host_v1[i]);
1607  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1, vcl_v2 + vcl_v1);
1608 
1609  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1610  return EXIT_FAILURE;
1611 
1613  std::cout << " v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
1614  for (std::size_t i=0; i < host_v1.size(); ++i)
1615  host_v1[i] = (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1616  vcl_v1 = viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1617 
1618  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1619  return EXIT_FAILURE;
1620 
1621  std::cout << " v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
1622  for (std::size_t i=0; i < host_v1.size(); ++i)
1623  host_v1[i] += (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1624  vcl_v1 += viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1625 
1626  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1627  return EXIT_FAILURE;
1628 
1629  std::cout << " v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl;
1630  for (std::size_t i=0; i < host_v1.size(); ++i)
1631  host_v1[i] -= (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1632  vcl_v1 -= viennacl::linalg::element_prod(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1633 
1634  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1635  return EXIT_FAILURE;
1636 
1637 
1638  std::cout << "Testing elementwise division..." << std::endl;
1639  for (std::size_t i=0; i<host_v1.size(); ++i)
1640  {
1641  host_v1[i] = NumericT(1.0) + random<NumericT>();
1642  host_v2[i] = NumericT(5.0) + random<NumericT>();
1643  }
1644 
1645  proxy_copy(host_v1, vcl_v1);
1646  proxy_copy(host_v2, vcl_v2);
1647 
1648  for (std::size_t i=0; i < host_v1.size(); ++i)
1649  host_v1[i] = host_v1[i] / host_v2[i];
1650  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2);
1651 
1652  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1653  return EXIT_FAILURE;
1654 
1655  for (std::size_t i=0; i < host_v1.size(); ++i)
1656  host_v1[i] += host_v1[i] / host_v2[i];
1657  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2);
1658 
1659  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1660  return EXIT_FAILURE;
1661 
1662  for (std::size_t i=0; i < host_v1.size(); ++i)
1663  host_v1[i] -= host_v1[i] / host_v2[i];
1664  vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2);
1665 
1666  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1667  return EXIT_FAILURE;
1668 
1670  for (std::size_t i=0; i < host_v1.size(); ++i)
1671  host_v1[i] = (host_v1[i] + host_v2[i]) / host_v2[i];
1672  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1673 
1674  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1675  return EXIT_FAILURE;
1676 
1677  for (std::size_t i=0; i < host_v1.size(); ++i)
1678  host_v1[i] += (host_v1[i] + host_v2[i]) / host_v2[i];
1679  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1680 
1681  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1682  return EXIT_FAILURE;
1683 
1684  for (std::size_t i=0; i < host_v1.size(); ++i)
1685  host_v1[i] -= (host_v1[i] + host_v2[i]) / host_v2[i];
1686  vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2);
1687 
1688  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1689  return EXIT_FAILURE;
1690 
1692  for (std::size_t i=0; i < host_v1.size(); ++i)
1693  host_v1[i] = host_v1[i] / (host_v2[i] + host_v1[i]);
1694  vcl_v1 = viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1695 
1696  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1697  return EXIT_FAILURE;
1698 
1699  for (std::size_t i=0; i < host_v1.size(); ++i)
1700  host_v1[i] += host_v1[i] / (host_v2[i] + host_v1[i]);
1701  vcl_v1 += viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1702 
1703  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1704  return EXIT_FAILURE;
1705 
1706  for (std::size_t i=0; i < host_v1.size(); ++i)
1707  host_v1[i] -= host_v1[i] / (host_v2[i] + host_v1[i]);
1708  vcl_v1 -= viennacl::linalg::element_div(vcl_v1, vcl_v2 + vcl_v1);
1709 
1710  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1711  return EXIT_FAILURE;
1712 
1714  for (std::size_t i=0; i < host_v1.size(); ++i)
1715  host_v1[i] = (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1716  vcl_v1 = viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1717 
1718  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1719  return EXIT_FAILURE;
1720 
1721  for (std::size_t i=0; i < host_v1.size(); ++i)
1722  host_v1[i] += (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1723  vcl_v1 += viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1724 
1725  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1726  return EXIT_FAILURE;
1727 
1728  for (std::size_t i=0; i < host_v1.size(); ++i)
1729  host_v1[i] -= (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1730  vcl_v1 -= viennacl::linalg::element_div(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1731 
1732  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1733  return EXIT_FAILURE;
1734 
1735 
1736  std::cout << "Testing elementwise power function..." << std::endl;
1737  for (std::size_t i=0; i<host_v1.size(); ++i)
1738  {
1739  host_v1[i] = NumericT(1.1) + NumericT(0.5) * random<NumericT>();
1740  host_v2[i] = NumericT(1.1) + NumericT(0.5) * random<NumericT>();
1741  }
1742  std::vector<NumericT> std_v3(host_v1.size());
1743  vector_proxy<NumericT> host_v3(&std_v3[0], 0, 1, host_v1.size());
1744 
1745  proxy_copy(host_v1, vcl_v1);
1746  proxy_copy(host_v2, vcl_v2);
1747 
1748  for (std::size_t i=0; i<host_v3.size(); ++i)
1749  host_v3[i] = std::pow(host_v1[i], host_v2[i]);
1750  vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1751 
1752  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1753  {
1754  std::cerr << "** Failure in v1 = pow(v1, v2);" << std::endl;
1755  return EXIT_FAILURE;
1756  }
1757 
1758  proxy_copy(host_v1, vcl_v1);
1759  for (std::size_t i=0; i<host_v3.size(); ++i)
1760  host_v3[i] = host_v1[i];
1761  for (std::size_t i=0; i<host_v3.size(); ++i)
1762  host_v3[i] += std::pow(host_v1[i], host_v2[i]);
1763  vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1764 
1765  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1766  {
1767  std::cerr << "** Failure in v1 += pow(v1, v2);" << std::endl;
1768  return EXIT_FAILURE;
1769  }
1770 
1771  proxy_copy(host_v1, vcl_v1);
1772  for (std::size_t i=0; i<host_v3.size(); ++i)
1773  host_v3[i] = host_v1[i];
1774  for (std::size_t i=0; i<host_v3.size(); ++i)
1775  host_v3[i] -= std::pow(host_v1[i], host_v2[i]);
1776  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1777 
1778  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1779  {
1780  std::cerr << "** Failure in v1 -= pow(v1, v2);" << std::endl;
1781  return EXIT_FAILURE;
1782  }
1783 
1785  proxy_copy(host_v1, vcl_v1);
1786  for (std::size_t i=0; i<host_v3.size(); ++i)
1787  host_v3[i] = host_v1[i];
1788  for (std::size_t i=0; i<host_v3.size(); ++i)
1789  host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1790  vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1791 
1792  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1793  {
1794  std::cerr << "** Failure in v1 = pow(v1 + v2, v2);" << std::endl;
1795  return EXIT_FAILURE;
1796  }
1797 
1798  proxy_copy(host_v1, vcl_v1);
1799  for (std::size_t i=0; i<host_v3.size(); ++i)
1800  host_v3[i] = host_v1[i];
1801  for (std::size_t i=0; i<host_v3.size(); ++i)
1802  host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1803  vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1804 
1805  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1806  {
1807  std::cerr << "** Failure in v1 += pow(v1 + v2, v2);" << std::endl;
1808  return EXIT_FAILURE;
1809  }
1810 
1811  proxy_copy(host_v1, vcl_v1);
1812  for (std::size_t i=0; i<host_v3.size(); ++i)
1813  host_v3[i] = host_v1[i];
1814  for (std::size_t i=0; i<host_v3.size(); ++i)
1815  host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1816  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1817 
1818  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1819  {
1820  std::cerr << "** Failure in v1 -= pow(v1 + v2, v2);" << std::endl;
1821  return EXIT_FAILURE;
1822  }
1823 
1825  proxy_copy(host_v1, vcl_v1);
1826  for (std::size_t i=0; i<host_v3.size(); ++i)
1827  host_v3[i] = host_v1[i];
1828  for (std::size_t i=0; i<host_v3.size(); ++i)
1829  host_v3[i] = std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1830  vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1831 
1832  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1833  {
1834  std::cerr << "** Failure in v1 = pow(v1, v2 + v1);" << std::endl;
1835  return EXIT_FAILURE;
1836  }
1837 
1838  proxy_copy(host_v1, vcl_v1);
1839  for (std::size_t i=0; i<host_v3.size(); ++i)
1840  host_v3[i] = host_v1[i];
1841  for (std::size_t i=0; i<host_v3.size(); ++i)
1842  host_v3[i] += std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1843  vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1844 
1845  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1846  {
1847  std::cerr << "** Failure in v1 += pow(v1, v2 + v1);" << std::endl;
1848  return EXIT_FAILURE;
1849  }
1850 
1851  proxy_copy(host_v1, vcl_v1);
1852  for (std::size_t i=0; i<host_v3.size(); ++i)
1853  host_v3[i] = host_v1[i];
1854  for (std::size_t i=0; i<host_v3.size(); ++i)
1855  host_v3[i] -= std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1856  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1857 
1858  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1859  {
1860  std::cerr << "** Failure in v1 -= pow(v1, v2 + v1);" << std::endl;
1861  return EXIT_FAILURE;
1862  }
1863 
1865  proxy_copy(host_v1, vcl_v1);
1866  for (std::size_t i=0; i<host_v3.size(); ++i)
1867  host_v3[i] = host_v1[i];
1868  for (std::size_t i=0; i<host_v3.size(); ++i)
1869  host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1870  vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1871 
1872  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1873  {
1874  std::cerr << "** Failure in v1 = pow(v1 + v2, v2 + v1);" << std::endl;
1875  return EXIT_FAILURE;
1876  }
1877 
1878  proxy_copy(host_v1, vcl_v1);
1879  for (std::size_t i=0; i<host_v3.size(); ++i)
1880  host_v3[i] = host_v1[i];
1881  for (std::size_t i=0; i<host_v3.size(); ++i)
1882  host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1883  vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1884 
1885  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1886  {
1887  std::cerr << "** Failure in v1 += pow(v1 + v2, v2 + v1);" << std::endl;
1888  return EXIT_FAILURE;
1889  }
1890 
1891  proxy_copy(host_v1, vcl_v1);
1892  for (std::size_t i=0; i<host_v3.size(); ++i)
1893  host_v3[i] = host_v1[i];
1894  for (std::size_t i=0; i<host_v3.size(); ++i)
1895  host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1896  vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1897 
1898  if (check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1899  {
1900  std::cerr << "** Failure in v1 -= pow(v1 + v2, v2 + v1);" << std::endl;
1901  return EXIT_FAILURE;
1902  }
1903 
1904  std::cout << "Testing unary elementwise operations..." << std::endl;
1905  for (size_t i=0; i < host_v1.size(); ++i)
1906  host_v1[i] = random<NumericT>() / NumericT(4);
1907 
1908 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \
1909  for (std::size_t i=0; i<host_v1.size(); ++i) \
1910  host_v2[i] = NumericT(3.1415) * host_v1[i]; \
1911  proxy_copy(host_v1, vcl_v1); \
1912  proxy_copy(host_v2, vcl_v2); \
1913  \
1914  for (std::size_t i=0; i<host_v1.size(); ++i) \
1915  host_v1[i] = std::FUNCNAME(host_v2[i]); \
1916  vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1917  \
1918  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1919  { \
1920  std::cout << "Failure at v1 = " << #FUNCNAME << "(v2)" << std::endl; \
1921  return EXIT_FAILURE; \
1922  } \
1923  \
1924  for (std::size_t i=0; i<host_v1.size(); ++i) \
1925  host_v1[i] = std::FUNCNAME(host_v1[i] + host_v2[i]); \
1926  vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1927  \
1928  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1929  { \
1930  std::cout << "Failure at v1 = " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1931  return EXIT_FAILURE; \
1932  } \
1933  \
1934  for (std::size_t i=0; i<host_v1.size(); ++i) \
1935  host_v1[i] += std::FUNCNAME(host_v1[i]); \
1936  vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1); \
1937  \
1938  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1939  { \
1940  std::cout << "Failure at v1 += " << #FUNCNAME << "(v2)" << std::endl; \
1941  return EXIT_FAILURE; \
1942  } \
1943  \
1944  for (std::size_t i=0; i<host_v1.size(); ++i) \
1945  host_v1[i] += std::FUNCNAME(host_v1[i] + host_v2[i]); \
1946  vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1947  \
1948  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1949  { \
1950  std::cout << "Failure at v1 += " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1951  return EXIT_FAILURE; \
1952  } \
1953  \
1954  for (std::size_t i=0; i<host_v1.size(); ++i) \
1955  host_v1[i] -= std::FUNCNAME(host_v2[i]); \
1956  vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1957  \
1958  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1959  { \
1960  std::cout << "Failure at v1 -= " << #FUNCNAME << "(v2)" << std::endl; \
1961  return EXIT_FAILURE; \
1962  } \
1963  \
1964  for (std::size_t i=0; i<host_v1.size(); ++i) \
1965  host_v1[i] -= std::FUNCNAME(host_v1[i] + host_v2[i]); \
1966  vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1967  \
1968  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1969  { \
1970  std::cout << "Failure at v1 -= " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1971  return EXIT_FAILURE; \
1972  } \
1973 
1975  GENERATE_UNARY_OP_TEST(cosh);
1976  for (std::size_t i=0; i < host_v1.size(); ++i)
1977  host_v1[i] = random<NumericT>() / NumericT(4);
1979  GENERATE_UNARY_OP_TEST(floor);
1980  GENERATE_UNARY_OP_TEST(fabs);
1982  GENERATE_UNARY_OP_TEST(log10);
1984  GENERATE_UNARY_OP_TEST(sinh);
1985  GENERATE_UNARY_OP_TEST(fabs);
1986  //GENERATE_UNARY_OP_TEST(abs); //OpenCL allows abs on integers only
1987  GENERATE_UNARY_OP_TEST(sqrt);
1989  GENERATE_UNARY_OP_TEST(tanh);
1990 
1991  // --------------------------------------------------------------------------
1992  for (std::size_t i=0; i<host_v1.size(); ++i)
1993  host_v2[i] = NumericT(3.1415) * host_v1[i];
1994  proxy_copy(host_v1, vcl_v1);
1995  proxy_copy(host_v2, vcl_v2);
1996 
1997  std::cout << "Testing another complicated vector expression with CPU scalars..." << std::endl;
1998  for (std::size_t i=0; i<host_v1.size(); ++i)
1999  host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2000  vcl_v1 = vcl_v2 / alpha + beta * (vcl_v1 - alpha*vcl_v2);
2001 
2002  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2003  return EXIT_FAILURE;
2004 
2005  std::cout << "Testing another complicated vector expression with GPU scalars..." << std::endl;
2006  for (std::size_t i=0; i<host_v1.size(); ++i)
2007  host_v2[i] = NumericT(3.1415) * host_v1[i];
2008  proxy_copy(host_v1, vcl_v1);
2009  proxy_copy(host_v2, vcl_v2);
2010 
2011  for (std::size_t i=0; i<host_v1.size(); ++i)
2012  host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2013  vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1 - gpu_alpha*vcl_v2);
2014 
2015  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2016  return EXIT_FAILURE;
2017 
2018 
2019  std::cout << "Testing lenghty sum of scaled vectors..." << std::endl;
2020  for (std::size_t i=0; i<host_v1.size(); ++i)
2021  host_v2[i] = NumericT(3.1415) * host_v1[i];
2022  proxy_copy(host_v1, vcl_v1);
2023  proxy_copy(host_v2, vcl_v2);
2024 
2025  for (std::size_t i=0; i<host_v1.size(); ++i)
2026  host_v1[i] = host_v2[i] / alpha + beta * host_v1[i] - alpha * host_v2[i] + beta * host_v1[i] - alpha * host_v1[i];
2027  vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1;
2028 
2029  if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2030  return EXIT_FAILURE;
2031 
2032  // --------------------------------------------------------------------------
2033  return retval;
2034 }
2035 
2036 
2037 template< typename NumericT, typename Epsilon >
2038 int test(Epsilon const& epsilon)
2039 {
2040  int retval = EXIT_SUCCESS;
2041  std::size_t size = 24656;
2042 
2043  std::cout << "Running tests for vector of size " << size << std::endl;
2044 
2045  //
2046  // Set up host objects
2047  //
2048  std::vector<NumericT> std_full_vec(size);
2049  std::vector<NumericT> std_full_vec2(std_full_vec.size());
2050 
2051  for (std::size_t i=0; i<std_full_vec.size(); ++i)
2052  {
2053  std_full_vec[i] = NumericT(1.0) + random<NumericT>();
2054  std_full_vec2[i] = NumericT(1.0) + random<NumericT>();
2055  }
2056 
2057  std::size_t r1_start = std_full_vec.size() / 4;
2058  std::size_t r1_stop = 2 * std_full_vec.size() / 4;
2059  std::size_t r2_start = 2 * std_full_vec2.size() / 4;
2060  std::size_t r2_stop = 3 * std_full_vec2.size() / 4;
2061  vector_proxy<NumericT> host_range_vec (&std_full_vec[0], r1_start, 1, r1_stop - r1_start);
2062  vector_proxy<NumericT> host_range_vec2(&std_full_vec2[0], r2_start, 1, r2_stop - r2_start);
2063 
2064  std::size_t s1_start = std_full_vec.size() / 4;
2065  std::size_t s1_inc = 3;
2066  std::size_t s1_size = std_full_vec.size() / 4;
2067  std::size_t s2_start = 2 * std_full_vec2.size() / 4;
2068  std::size_t s2_inc = 2;
2069  std::size_t s2_size = std_full_vec2.size() / 4;
2070  vector_proxy<NumericT> host_slice_vec (&std_full_vec[0], s1_start, s1_inc, s1_size);
2071  vector_proxy<NumericT> host_slice_vec2(&std_full_vec2[0], s2_start, s2_inc, s2_size);
2072 
2073  //
2074  // Set up ViennaCL objects
2075  //
2076  viennacl::vector<NumericT> vcl_full_vec(std_full_vec.size());
2077  viennacl::vector<NumericT> vcl_full_vec2(std_full_vec2.size());
2078 
2079  viennacl::fast_copy(std_full_vec.begin(), std_full_vec.end(), vcl_full_vec.begin());
2080  viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
2081 
2082  viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
2083  viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
2084  viennacl::vector_range< viennacl::vector<NumericT> > vcl_range_vec(vcl_full_vec, vcl_r1);
2085  viennacl::vector_range< viennacl::vector<NumericT> > vcl_range_vec2(vcl_full_vec2, vcl_r2);
2086 
2087  {
2088  viennacl::vector<NumericT> vcl_short_vec(vcl_range_vec);
2089  viennacl::vector<NumericT> vcl_short_vec2 = vcl_range_vec2;
2090 
2091  std::vector<NumericT> std_short_vec(host_range_vec.size());
2092  for (std::size_t i=0; i<std_short_vec.size(); ++i)
2093  std_short_vec[i] = host_range_vec[i];
2094  vector_proxy<NumericT> host_short_vec(&std_short_vec[0], 0, 1, std_short_vec.size());
2095 
2096  std::vector<NumericT> std_short_vec2(host_range_vec2.size());
2097  for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2098  std_short_vec2[i] = host_range_vec2[i];
2099  vector_proxy<NumericT> host_short_vec2(&std_short_vec2[0], 0, 1, std_short_vec.size());
2100 
2101  std::cout << "Testing creation of vectors from range..." << std::endl;
2102  if (check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2103  return EXIT_FAILURE;
2104  if (check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2105  return EXIT_FAILURE;
2106  }
2107 
2108  viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
2109  viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
2110  viennacl::vector_slice< viennacl::vector<NumericT> > vcl_slice_vec(vcl_full_vec, vcl_s1);
2111  viennacl::vector_slice< viennacl::vector<NumericT> > vcl_slice_vec2(vcl_full_vec2, vcl_s2);
2112 
2113  viennacl::vector<NumericT> vcl_short_vec(vcl_slice_vec);
2114  viennacl::vector<NumericT> vcl_short_vec2 = vcl_slice_vec2;
2115 
2116  std::vector<NumericT> std_short_vec(host_slice_vec.size());
2117  for (std::size_t i=0; i<std_short_vec.size(); ++i)
2118  std_short_vec[i] = host_slice_vec[i];
2119  vector_proxy<NumericT> host_short_vec(&std_short_vec[0], 0, 1, std_short_vec.size());
2120 
2121  std::vector<NumericT> std_short_vec2(host_slice_vec2.size());
2122  for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2123  std_short_vec2[i] = host_slice_vec2[i];
2124  vector_proxy<NumericT> host_short_vec2(&std_short_vec2[0], 0, 1, std_short_vec.size());
2125 
2126  std::cout << "Testing creation of vectors from slice..." << std::endl;
2127  if (check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2128  return EXIT_FAILURE;
2129  if (check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2130  return EXIT_FAILURE;
2131 
2132 
2133  //
2134  // Now start running tests for vectors, ranges and slices:
2135  //
2136 
2137  std::cout << " ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
2138  retval = test<NumericT>(epsilon,
2139  host_short_vec, host_short_vec2,
2140  vcl_short_vec, vcl_short_vec2);
2141  if (retval != EXIT_SUCCESS)
2142  return EXIT_FAILURE;
2143 
2144  std::cout << " ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
2145  retval = test<NumericT>(epsilon,
2146  host_short_vec, host_short_vec2,
2147  vcl_short_vec, vcl_range_vec2);
2148  if (retval != EXIT_SUCCESS)
2149  return EXIT_FAILURE;
2150 
2151  std::cout << " ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
2152  retval = test<NumericT>(epsilon,
2153  host_short_vec, host_short_vec2,
2154  vcl_short_vec, vcl_slice_vec2);
2155  if (retval != EXIT_SUCCESS)
2156  return EXIT_FAILURE;
2157 
2159 
2160  std::cout << " ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
2161  retval = test<NumericT>(epsilon,
2162  host_short_vec, host_short_vec2,
2163  vcl_range_vec, vcl_short_vec2);
2164  if (retval != EXIT_SUCCESS)
2165  return EXIT_FAILURE;
2166 
2167  std::cout << " ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
2168  retval = test<NumericT>(epsilon,
2169  host_short_vec, host_short_vec2,
2170  vcl_range_vec, vcl_range_vec2);
2171  if (retval != EXIT_SUCCESS)
2172  return EXIT_FAILURE;
2173 
2174  std::cout << " ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
2175  retval = test<NumericT>(epsilon,
2176  host_short_vec, host_short_vec2,
2177  vcl_range_vec, vcl_slice_vec2);
2178  if (retval != EXIT_SUCCESS)
2179  return EXIT_FAILURE;
2180 
2182 
2183  std::cout << " ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
2184  retval = test<NumericT>(epsilon,
2185  host_short_vec, host_short_vec2,
2186  vcl_slice_vec, vcl_short_vec2);
2187  if (retval != EXIT_SUCCESS)
2188  return EXIT_FAILURE;
2189 
2190  std::cout << " ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
2191  retval = test<NumericT>(epsilon,
2192  host_short_vec, host_short_vec2,
2193  vcl_slice_vec, vcl_range_vec2);
2194  if (retval != EXIT_SUCCESS)
2195  return EXIT_FAILURE;
2196 
2197  std::cout << " ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
2198  retval = test<NumericT>(epsilon,
2199  host_short_vec, host_short_vec2,
2200  vcl_slice_vec, vcl_slice_vec2);
2201  if (retval != EXIT_SUCCESS)
2202  return EXIT_FAILURE;
2203 
2204  return EXIT_SUCCESS;
2205 }
2206 
2207 
2208 //
2209 // -------------------------------------------------------------
2210 //
2211 int main()
2212 {
2213  std::cout << std::endl;
2214  std::cout << "----------------------------------------------" << std::endl;
2215  std::cout << "----------------------------------------------" << std::endl;
2216  std::cout << "## Test :: Vector" << std::endl;
2217  std::cout << "----------------------------------------------" << std::endl;
2218  std::cout << "----------------------------------------------" << std::endl;
2219  std::cout << std::endl;
2220 
2221  int retval = EXIT_SUCCESS;
2222 
2223  std::cout << std::endl;
2224  std::cout << "----------------------------------------------" << std::endl;
2225  std::cout << std::endl;
2226  {
2227  typedef float NumericT;
2228  NumericT epsilon = static_cast<NumericT>(1.0E-2);
2229  std::cout << "# Testing setup:" << std::endl;
2230  std::cout << " eps: " << epsilon << std::endl;
2231  std::cout << " numeric: float" << std::endl;
2232  retval = test<NumericT>(epsilon);
2233  if ( retval == EXIT_SUCCESS )
2234  std::cout << "# Test passed" << std::endl;
2235  else
2236  return retval;
2237  }
2238  std::cout << std::endl;
2239  std::cout << "----------------------------------------------" << std::endl;
2240  std::cout << std::endl;
2241  #ifdef VIENNACL_WITH_OPENCL
2243  #endif
2244  {
2245  {
2246  typedef double NumericT;
2247  NumericT epsilon = 1.0E-10;
2248  std::cout << "# Testing setup:" << std::endl;
2249  std::cout << " eps: " << epsilon << std::endl;
2250  std::cout << " numeric: double" << std::endl;
2251  retval = test<NumericT>(epsilon);
2252  if ( retval == EXIT_SUCCESS )
2253  std::cout << "# Test passed" << std::endl;
2254  else
2255  return retval;
2256  }
2257  std::cout << std::endl;
2258  std::cout << "----------------------------------------------" << std::endl;
2259  std::cout << std::endl;
2260  }
2261 
2262  std::cout << std::endl;
2263  std::cout << "------- Test completed --------" << std::endl;
2264  std::cout << std::endl;
2265 
2266 
2267  return retval;
2268 }
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
T norm_2(std::vector< T, A > const &v1)
Definition: norm_2.hpp:86
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:226
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
vector_proxy(NumericT *p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
Definition: memory.hpp:54
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
Definition: inner_prod.hpp:89
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
NumericT & operator[](std::size_t index)
int test(Epsilon const &epsilon, HostVectorType &host_v1, HostVectorType &host_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
Definition: util.hpp:59
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Definition: backend.hpp:351
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
Class for representing non-strided subvectors of a bigger vector x.
Definition: forwards.h:433
std::size_t size() const
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
Definition: vector.hpp:827
int check(T1 const &t1, T2 const &t2, double epsilon)
void proxy_copy(vector_proxy< NumericT > const &host_vec, viennacl::vector_base< NumericT > &vcl_vec)
Class for representing strided subvectors of a bigger vector x.
Definition: forwards.h:436
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
Definition: device.hpp:956
NumericT const & operator[](std::size_t index) const
Proxy classes for vectors.
int main()
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
Represents a vector consisting of 1 at a given index and zeros otherwise.
Definition: vector_def.hpp:76
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: vector_def.hpp:87
NumericT max(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:47
T norm_inf(std::vector< T, A > const &v1)
Definition: norm_inf.hpp:60
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
T norm_1(std::vector< T, A > const &v1)
Definition: norm_1.hpp:61
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector_def.hpp:118
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:423
float ScalarType
Definition: fft_1d.cpp:42
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
Definition: vector.hpp:834
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
Definition: forwards.h:428
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:232
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
Definition: maxmin.hpp:91
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)