46 template<
typename NumericT>
50 vector_proxy(NumericT * p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
51 : values_(p_values), start_(start_idx), inc_(increment), size_(num_elements) {}
53 NumericT
const &
operator[](std::size_t index)
const {
return values_[start_ + index * inc_]; }
54 NumericT &
operator[](std::size_t index) {
return values_[start_ + index * inc_]; }
56 std::size_t
size()
const {
return size_; }
65 template<
typename NumericT>
68 std::vector<NumericT> std_vec(host_vec.
size());
70 for (std::size_t i=0; i<host_vec.
size(); ++i)
71 std_vec[i] = host_vec[i];
76 template<
typename NumericT>
79 std::vector<NumericT> std_vec(vcl_vec.
size());
83 for (std::size_t i=0; i<host_vec.
size(); ++i)
84 host_vec[i] = std_vec[i];
91 template<
typename ScalarType>
95 if (std::fabs(s1 - s2) > 0 )
96 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
102 template<
typename ScalarType>
106 if (std::fabs(s1 - s2) > 0 )
107 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
113 template<
typename ScalarType>
117 if (std::fabs(s1 - s2) > 0 )
118 return (s1 - s2) /
std::max(std::fabs(s1), std::fabs(s2));
124 template<
typename ScalarType,
typename ViennaCLVectorType>
127 std::vector<ScalarType> v2_cpu(vcl_vec.size());
131 for (
unsigned int i=0;i<v1.
size(); ++i)
133 if (
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) ) > 0 )
134 v2_cpu[i] = std::fabs(v2_cpu[i] - v1[i]) /
std::max( std::fabs(v2_cpu[i]), std::fabs(v1[i]) );
140 for (std::size_t i=0; i<v2_cpu.size(); ++i)
141 ret =
std::max(ret, std::fabs(v2_cpu[i]));
146 template<
typename T1,
typename T2>
147 int check(T1
const & t1, T2
const & t2,
double epsilon)
149 int retval = EXIT_SUCCESS;
151 double temp = std::fabs(
diff(t1, t2));
154 std::cout <<
"# Error! Relative difference: " << temp << std::endl;
155 retval = EXIT_FAILURE;
164 template<
typename NumericT,
typename Epsilon,
typename HostVectorType,
typename ViennaCLVectorType1,
typename ViennaCLVectorType2 >
165 int test(Epsilon
const& epsilon,
166 HostVectorType & host_v1, HostVectorType & host_v2,
167 ViennaCLVectorType1 & vcl_v1, ViennaCLVectorType2 & vcl_v2)
169 int retval = EXIT_SUCCESS;
171 NumericT cpu_result = 42.0;
177 std::cout <<
"Checking for zero_vector initializer..." << std::endl;
178 for (std::size_t i=0; i<host_v1.size(); ++i)
179 host_v1[i] = NumericT(0);
181 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
184 std::cout <<
"Checking for scalar_vector initializer..." << std::endl;
185 for (std::size_t i=0; i<host_v1.size(); ++i)
186 host_v1[i] = NumericT(cpu_result);
188 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
191 for (std::size_t i=0; i<host_v1.size(); ++i)
192 host_v1[i] = NumericT(gpu_result);
194 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
197 std::cout <<
"Checking for unit_vector initializer..." << std::endl;
198 for (std::size_t i=0; i<host_v1.size(); ++i)
199 host_v1[i] = NumericT(0);
200 host_v1[5] = NumericT(1);
202 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
206 for (std::size_t i=0; i<host_v1.size(); ++i)
208 host_v1[i] = NumericT(1.0) + random<NumericT>();
209 host_v2[i] = NumericT(1.0) + random<NumericT>();
215 std::cout <<
"Checking for successful copy..." << std::endl;
216 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
218 if (
check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
226 std::cout <<
"Testing inner_prod..." << std::endl;
228 for (std::size_t i=0; i<host_v1.size(); ++i)
229 cpu_result += host_v1[i] * host_v2[i];
233 std::cout <<
"Reference: " << cpu_result << std::endl;
234 std::cout << cpu_result2 << std::endl;
235 std::cout << gpu_result << std::endl;
236 if (
check(cpu_result, cpu_result2, epsilon) != EXIT_SUCCESS)
238 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
242 for (std::size_t i=0; i<host_v1.size(); ++i)
243 cpu_result += (host_v1[i] + host_v2[i]) * (host_v2[i] - host_v1[i]);
247 std::cout <<
"Reference: " << cpu_result << std::endl;
248 std::cout << cpu_result3 << std::endl;
249 std::cout << gpu_result << std::endl;
250 if (
check(cpu_result, cpu_result3, epsilon) != EXIT_SUCCESS)
252 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
256 std::cout <<
"Testing norm_1..." << std::endl;
258 for (std::size_t i=0; i<host_v1.size(); ++i)
259 cpu_result += std::fabs(host_v1[i]);
262 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
265 gpu_result = 2 * cpu_result;
267 for (std::size_t i=0; i<host_v1.size(); ++i)
268 cpu_result += std::fabs(host_v1[i]);
269 gpu_result = cpu_result;
273 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
277 for (std::size_t i=0; i<host_v1.size(); ++i)
278 cpu_result += std::fabs(host_v1[i] + host_v2[i]);
279 gpu_result = cpu_result;
283 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
287 std::cout <<
"Testing norm_2..." << std::endl;
289 for (std::size_t i=0; i<host_v1.size(); ++i)
290 cpu_result += host_v1[i] * host_v1[i];
291 cpu_result = std::sqrt(cpu_result);
294 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
297 gpu_result = 2 * cpu_result;
299 for (std::size_t i=0; i<host_v1.size(); ++i)
300 cpu_result += host_v1[i] * host_v1[i];
301 gpu_result = std::sqrt(cpu_result);
304 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
308 for (std::size_t i=0; i<host_v1.size(); ++i)
309 cpu_result += (host_v1[i] + host_v2[i]) * (host_v1[i] + host_v2[i]);
310 gpu_result = std::sqrt(cpu_result);
313 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
317 std::cout <<
"Testing norm_inf..." << std::endl;
318 cpu_result = std::fabs(host_v1[0]);
319 for (std::size_t i=0; i<host_v1.size(); ++i)
320 cpu_result =
std::max(std::fabs(host_v1[i]), cpu_result);
323 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
326 gpu_result = 2 * cpu_result;
327 cpu_result = std::fabs(host_v1[0]);
328 for (std::size_t i=0; i<host_v1.size(); ++i)
329 cpu_result =
std::max(std::fabs(host_v1[i]), cpu_result);
330 gpu_result = cpu_result;
334 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
337 cpu_result = std::fabs(host_v1[0]);
338 for (std::size_t i=0; i<host_v1.size(); ++i)
339 cpu_result =
std::max(std::fabs(host_v1[i] + host_v2[i]), cpu_result);
340 gpu_result = cpu_result;
344 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
348 std::cout <<
"Testing index_norm_inf..." << std::endl;
349 std::size_t cpu_index = 0;
350 cpu_result = std::fabs(host_v1[0]);
351 for (std::size_t i=0; i<host_v1.size(); ++i)
353 if (std::fabs(host_v1[i]) > cpu_result)
355 cpu_result = std::fabs(host_v1[i]);
361 if (
check(static_cast<NumericT>(cpu_index), static_cast<NumericT>(gpu_index), epsilon) != EXIT_SUCCESS)
364 cpu_result = host_v1[cpu_index];
367 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
370 cpu_result = std::fabs(host_v1[0] + host_v2[0]);
371 for (std::size_t i=0; i<host_v1.size(); ++i)
373 if (std::fabs(host_v1[i] + host_v2[i]) > cpu_result)
375 cpu_result = std::fabs(host_v1[i] + host_v2[i]);
379 cpu_result = host_v1[cpu_index];
382 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
387 std::cout <<
"Testing max..." << std::endl;
388 cpu_result = host_v1[0];
389 for (std::size_t i=0; i<host_v1.size(); ++i)
390 cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
393 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
396 cpu_result = host_v1[0];
397 for (std::size_t i=0; i<host_v1.size(); ++i)
398 cpu_result = std::max<NumericT>(cpu_result, host_v1[i]);
399 gpu_result = cpu_result;
403 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
406 cpu_result = host_v1[0] + host_v2[0];
407 for (std::size_t i=0; i<host_v1.size(); ++i)
408 cpu_result = std::max<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
409 gpu_result = cpu_result;
413 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
418 std::cout <<
"Testing min..." << std::endl;
419 cpu_result = host_v1[0];
420 for (std::size_t i=0; i<host_v1.size(); ++i)
421 cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
424 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
427 cpu_result = host_v1[0];
428 for (std::size_t i=0; i<host_v1.size(); ++i)
429 cpu_result = std::min<NumericT>(cpu_result, host_v1[i]);
430 gpu_result = cpu_result;
434 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
437 cpu_result = host_v1[0] + host_v2[0];
438 for (std::size_t i=0; i<host_v1.size(); ++i)
439 cpu_result = std::min<NumericT>(cpu_result, host_v1[i] + host_v2[i]);
440 gpu_result = cpu_result;
444 if (
check(cpu_result, gpu_result, epsilon) != EXIT_SUCCESS)
455 for (std::size_t i=0; i<host_v1.size(); ++i)
457 NumericT temp = NumericT(1.1) * host_v1[i] + NumericT(2.3) * host_v2[i];
458 host_v2[i] = - NumericT(2.3) * host_v1[i] + NumericT(1.1) * host_v2[i];
463 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
465 if (
check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
470 std::cout <<
"Testing assignments..." << std::endl;
471 NumericT val =
static_cast<NumericT
>(1e-1);
472 for (
size_t i=0; i < host_v1.size(); ++i)
475 for (
size_t i=0; i < vcl_v1.size(); ++i)
478 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
481 std::cout <<
"Testing assignments via iterators..." << std::endl;
483 host_v1[2] =
static_cast<NumericT
>(1.9);
484 vcl_v1[2] =
static_cast<NumericT
>(1.9);
486 host_v1[2] =
static_cast<NumericT
>(1.5);
487 typename ViennaCLVectorType1::iterator vcl_v1_it = vcl_v1.begin();
490 *vcl_v1_it =
static_cast<NumericT
>(1.5);
492 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
498 for (std::size_t i=0; i < host_v1.size(); ++i)
500 host_v1[i] = NumericT(1.0) + random<NumericT>();
501 host_v2[i] = NumericT(3.1415) * host_v1[i];
506 std::cout <<
"Testing scaling with CPU scalar..." << std::endl;
507 NumericT alpha =
static_cast<NumericT
>(1.7182);
510 for (std::size_t i=0; i < host_v1.size(); ++i)
511 host_v1[i] *= NumericT(
long(alpha));
512 vcl_v1 *= long(alpha);
514 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
517 for (std::size_t i=0; i < host_v1.size(); ++i)
518 host_v1[i] *= NumericT(
float(alpha));
519 vcl_v1 *= float(alpha);
521 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
524 for (std::size_t i=0; i < host_v1.size(); ++i)
525 host_v1[i] *= NumericT(
double(alpha));
526 vcl_v1 *= double(alpha);
528 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
532 std::cout <<
"Testing scaling with GPU scalar..." << std::endl;
533 for (std::size_t i=0; i < host_v1.size(); ++i)
537 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
540 std::cout <<
"Testing scaling with scalar expression..." << std::endl;
542 for (std::size_t i=0; i < host_v1.size(); ++i)
543 cpu_result += host_v1[i] * host_v2[i];
544 for (std::size_t i=0; i < host_v1.size(); ++i)
545 host_v1[i] *= cpu_result;
548 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
551 NumericT beta =
static_cast<NumericT
>(1.4153);
554 std::cout <<
"Testing shrinking with CPU scalar..." << std::endl;
555 for (std::size_t i=0; i < host_v1.size(); ++i)
556 host_v1[i] /= NumericT(
long(beta));
557 vcl_v1 /= long(beta);
559 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
562 for (std::size_t i=0; i < host_v1.size(); ++i)
563 host_v1[i] /= NumericT(
float(beta));
564 vcl_v1 /= float(beta);
566 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
569 for (std::size_t i=0; i < host_v1.size(); ++i)
570 host_v1[i] /= NumericT(
double(beta));
571 vcl_v1 /= double(beta);
573 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
577 std::cout <<
"Testing shrinking with GPU scalar..." << std::endl;
578 for (std::size_t i=0; i < host_v1.size(); ++i)
582 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
590 for (
size_t i=0; i < host_v1.size(); ++i)
592 host_v1[i] = NumericT(1.0) + random<NumericT>();
593 host_v2[i] = NumericT(3.1415) * host_v1[i];
598 std::cout <<
"Testing add on vector..." << std::endl;
600 std::cout <<
"Checking for successful copy..." << std::endl;
601 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
603 if (
check(host_v2, vcl_v2, epsilon) != EXIT_SUCCESS)
606 for (
size_t i=0; i < host_v1.size(); ++i)
607 host_v1[i] = host_v1[i] + host_v2[i];
608 vcl_v1 = vcl_v1 + vcl_v2;
610 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
613 std::cout <<
"Testing add on vector with flipsign..." << std::endl;
614 for (
size_t i=0; i < host_v1.size(); ++i)
615 host_v1[i] = - host_v1[i] + host_v2[i];
616 vcl_v1 = - vcl_v1 + vcl_v2;
618 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
621 std::cout <<
"Testing inplace-add on vector..." << std::endl;
622 for (
size_t i=0; i < host_v1.size(); ++i)
623 host_v1[i] += host_v2[i];
626 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
629 std::cout <<
"Testing assignment to vector with vector multiplied by scalar expression..." << std::endl;
631 for (std::size_t i=0; i < host_v1.size(); ++i)
632 cpu_result += host_v1[i] * host_v2[i];
633 for (std::size_t i=0; i < host_v1.size(); ++i)
634 host_v1[i] = cpu_result * host_v2[i];
641 std::cout <<
"Testing sub on vector..." << std::endl;
642 for (std::size_t i=0; i < host_v1.size(); ++i)
643 host_v2[i] = NumericT(3.1415) * host_v1[i];
647 for (std::size_t i=0; i < host_v1.size(); ++i)
648 host_v1[i] = host_v1[i] - host_v2[i];
649 vcl_v1 = vcl_v1 - vcl_v2;
651 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
654 std::cout <<
"Testing inplace-sub on vector..." << std::endl;
655 for (std::size_t i=0; i < host_v1.size(); ++i)
656 host_v1[i] -= host_v2[i];
659 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
667 std::cout <<
"Testing multiply-add on vector with CPU scalar (right)..." << std::endl;
668 for (
size_t i=0; i < host_v1.size(); ++i)
670 host_v1[i] = NumericT(1.0) + random<NumericT>();
671 host_v2[i] = NumericT(3.1415) * host_v1[i];
676 for (std::size_t i=0; i < host_v1.size(); ++i)
677 host_v1[i] = host_v1[i] + host_v2[i] * NumericT(
float(alpha));
678 vcl_v1 = vcl_v1 + vcl_v2 * float(alpha);
680 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
683 for (std::size_t i=0; i < host_v1.size(); ++i)
684 host_v1[i] = host_v1[i] + host_v2[i] * NumericT(
double(alpha));
685 vcl_v1 = vcl_v1 + vcl_v2 * double(alpha);
687 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
691 std::cout <<
"Testing multiply-add on vector with CPU scalar (left)..." << std::endl;
692 for (std::size_t i=0; i < host_v1.size(); ++i)
693 host_v2[i] = NumericT(3.1415) * host_v1[i];
697 for (std::size_t i=0; i < host_v1.size(); ++i)
698 host_v1[i] = NumericT(
long(alpha)) * host_v1[i] + host_v2[i];
699 vcl_v1 = long(alpha) * vcl_v1 + vcl_v2;
701 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
704 for (std::size_t i=0; i < host_v1.size(); ++i)
705 host_v1[i] = NumericT(
float(alpha)) * host_v1[i] + host_v2[i];
706 vcl_v1 = float(alpha) * vcl_v1 + vcl_v2;
708 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
711 for (std::size_t i=0; i < host_v1.size(); ++i)
712 host_v1[i] = NumericT(
double(alpha)) * host_v1[i] + host_v2[i];
713 vcl_v1 = double(alpha) * vcl_v1 + vcl_v2;
715 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
719 std::cout <<
"Testing multiply-add on vector with CPU scalar (both)..." << std::endl;
720 for (std::size_t i=0; i < host_v1.size(); ++i)
721 host_v2[i] = NumericT(3.1415) * host_v1[i];
725 for (std::size_t i=0; i < host_v1.size(); ++i)
726 host_v1[i] = NumericT(
long(alpha)) * host_v1[i] + NumericT(
long(beta)) * host_v2[i];
727 vcl_v1 = long(alpha) * vcl_v1 + long(beta) * vcl_v2;
729 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
732 for (std::size_t i=0; i < host_v1.size(); ++i)
733 host_v1[i] = NumericT(
float(alpha)) * host_v1[i] + NumericT(
float(beta)) * host_v2[i];
734 vcl_v1 = float(alpha) * vcl_v1 + float(beta) * vcl_v2;
736 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
739 for (std::size_t i=0; i < host_v1.size(); ++i)
740 host_v1[i] = NumericT(
double(alpha)) * host_v1[i] + NumericT(
double(beta)) * host_v2[i];
741 vcl_v1 = double(alpha) * vcl_v1 + double(beta) * vcl_v2;
743 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
747 std::cout <<
"Testing inplace multiply-add on vector with CPU scalar..." << std::endl;
748 for (std::size_t i=0; i < host_v1.size(); ++i)
749 host_v2[i] = NumericT(3.1415) * host_v1[i];
753 for (std::size_t i=0; i < host_v1.size(); ++i)
754 host_v1[i] += host_v2[i] * NumericT(
long(alpha));
755 vcl_v1 += vcl_v2 * long(alpha);
757 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
760 for (std::size_t i=0; i < host_v1.size(); ++i)
761 host_v1[i] += host_v2[i] * NumericT(
float(alpha));
762 vcl_v1 += vcl_v2 * float(alpha);
764 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
767 for (std::size_t i=0; i < host_v1.size(); ++i)
768 host_v1[i] += NumericT(
double(alpha)) * host_v2[i];
769 vcl_v1 += double(alpha) * vcl_v2;
771 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
775 std::cout <<
"Testing multiply-add on vector with GPU scalar (right)..." << std::endl;
776 for (std::size_t i=0; i < host_v1.size(); ++i)
777 host_v2[i] = NumericT(3.1415) * host_v1[i];
781 for (std::size_t i=0; i < host_v1.size(); ++i)
782 host_v1[i] = host_v1[i] + alpha * host_v2[i];
783 vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
785 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
788 std::cout <<
"Testing multiply-add on vector with GPU scalar (left)..." << std::endl;
789 for (std::size_t i=0; i < host_v1.size(); ++i)
790 host_v2[i] = NumericT(3.1415) * host_v1[i];
794 for (std::size_t i=0; i < host_v1.size(); ++i)
795 host_v1[i] = host_v1[i] + alpha * host_v2[i];
796 vcl_v1 = vcl_v1 + gpu_alpha * vcl_v2;
798 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
801 std::cout <<
"Testing multiply-add on vector with GPU scalar (both)..." << std::endl;
802 for (std::size_t i=0; i < host_v1.size(); ++i)
803 host_v2[i] = NumericT(3.1415) * host_v1[i];
807 for (std::size_t i=0; i < host_v1.size(); ++i)
808 host_v1[i] = alpha * host_v1[i] + beta * host_v2[i];
809 vcl_v1 = gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
811 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
815 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar (both, adding)..." << std::endl;
816 for (std::size_t i=0; i < host_v1.size(); ++i)
817 host_v2[i] = NumericT(3.1415) * host_v1[i];
821 for (std::size_t i=0; i < host_v1.size(); ++i)
822 host_v1[i] += alpha * host_v1[i] + beta * host_v2[i];
823 vcl_v1 += gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
825 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
828 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar (both, subtracting)..." << std::endl;
829 for (std::size_t i=0; i < host_v1.size(); ++i)
830 host_v2[i] = NumericT(3.1415) * host_v1[i];
834 for (std::size_t i=0; i < host_v1.size(); ++i)
835 host_v1[i] += alpha * host_v1[i] - beta * host_v2[i];
836 vcl_v1 += gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
838 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
843 std::cout <<
"Testing inplace multiply-add on vector with GPU scalar..." << std::endl;
844 for (std::size_t i=0; i < host_v1.size(); ++i)
845 host_v2[i] = NumericT(3.1415) * host_v1[i];
849 for (std::size_t i=0; i < host_v1.size(); ++i)
850 host_v1[i] += alpha * host_v2[i];
851 vcl_v1 += gpu_alpha * vcl_v2;
853 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
860 std::cout <<
"Testing division-add on vector with CPU scalar (right)..." << std::endl;
861 for (
size_t i=0; i < host_v1.size(); ++i)
863 host_v1[i] = NumericT(1.0) + random<NumericT>();
864 host_v2[i] = NumericT(3.1415) * host_v1[i];
869 for (std::size_t i=0; i < host_v1.size(); ++i)
870 host_v1[i] = host_v1[i] + host_v2[i] / NumericT(
long(alpha));
871 vcl_v1 = vcl_v1 + vcl_v2 / long(alpha);
873 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
876 for (std::size_t i=0; i < host_v1.size(); ++i)
877 host_v1[i] = host_v1[i] + host_v2[i] / NumericT(
float(alpha));
878 vcl_v1 = vcl_v1 + vcl_v2 / float(alpha);
880 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
883 for (std::size_t i=0; i < host_v1.size(); ++i)
884 host_v1[i] = host_v1[i] + host_v2[i] / NumericT(
double(alpha));
885 vcl_v1 = vcl_v1 + vcl_v2 / double(alpha);
887 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
891 std::cout <<
"Testing division-add on vector with CPU scalar (left)..." << std::endl;
892 for (std::size_t i=0; i < host_v1.size(); ++i)
893 host_v2[i] = NumericT(3.1415) * host_v1[i];
897 for (std::size_t i=0; i < host_v1.size(); ++i)
898 host_v1[i] = host_v1[i] / NumericT(
float(alpha)) + host_v2[i];
899 vcl_v1 = vcl_v1 / float(alpha) + vcl_v2;
901 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
904 for (std::size_t i=0; i < host_v1.size(); ++i)
905 host_v1[i] = host_v1[i] / NumericT(
double(alpha)) + host_v2[i];
906 vcl_v1 = vcl_v1 / double(alpha) + vcl_v2;
908 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
912 std::cout <<
"Testing division-add on vector with CPU scalar (both)..." << std::endl;
913 for (std::size_t i=0; i < host_v1.size(); ++i)
914 host_v2[i] = NumericT(3.1415) * host_v1[i];
918 for (std::size_t i=0; i < host_v1.size(); ++i)
919 host_v1[i] = host_v1[i] / NumericT(
float(alpha)) + host_v2[i] / NumericT(
float(beta));
920 vcl_v1 = vcl_v1 / float(alpha) + vcl_v2 / float(beta);
922 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
925 for (std::size_t i=0; i < host_v1.size(); ++i)
926 host_v1[i] = host_v1[i] / NumericT(
double(alpha)) + host_v2[i] / NumericT(
double(beta));
927 vcl_v1 = vcl_v1 / double(alpha) + vcl_v2 / double(beta);
929 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
932 std::cout <<
"Testing division-multiply-add on vector with CPU scalar..." << std::endl;
933 for (std::size_t i=0; i < host_v1.size(); ++i)
934 host_v2[i] = NumericT(3.1415) * host_v1[i];
938 for (std::size_t i=0; i < host_v1.size(); ++i)
939 host_v1[i] = host_v1[i] / alpha + host_v2[i] * beta;
940 vcl_v1 = vcl_v1 / alpha + vcl_v2 * beta;
942 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
946 std::cout <<
"Testing multiply-division-add on vector with CPU scalar..." << std::endl;
947 for (std::size_t i=0; i < host_v1.size(); ++i)
948 host_v2[i] = NumericT(3.1415) * host_v1[i];
952 for (std::size_t i=0; i < host_v1.size(); ++i)
953 host_v1[i] = host_v1[i] * alpha + host_v2[i] / beta;
954 vcl_v1 = vcl_v1 * alpha + vcl_v2 / beta;
956 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
961 std::cout <<
"Testing inplace division-add on vector with CPU scalar..." << std::endl;
962 for (std::size_t i=0; i < host_v1.size(); ++i)
963 host_v2[i] = NumericT(3.1415) * host_v1[i];
967 for (std::size_t i=0; i < host_v1.size(); ++i)
968 host_v1[i] += host_v2[i] / alpha;
969 vcl_v1 += vcl_v2 / alpha;
971 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
975 std::cout <<
"Testing division-add on vector with GPU scalar (right)..." << std::endl;
976 for (std::size_t i=0; i < host_v1.size(); ++i)
977 host_v2[i] = NumericT(3.1415) * host_v1[i];
981 for (std::size_t i=0; i < host_v1.size(); ++i)
982 host_v1[i] = host_v1[i] + host_v2[i] / alpha;
983 vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
985 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
988 std::cout <<
"Testing division-add on vector with GPU scalar (left)..." << std::endl;
989 for (std::size_t i=0; i < host_v1.size(); ++i)
990 host_v2[i] = NumericT(3.1415) * host_v1[i];
994 for (std::size_t i=0; i < host_v1.size(); ++i)
995 host_v1[i] = host_v1[i] + host_v2[i] / alpha;
996 vcl_v1 = vcl_v1 + vcl_v2 / gpu_alpha;
998 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1001 std::cout <<
"Testing division-add on vector with GPU scalar (both)..." << std::endl;
1002 for (std::size_t i=0; i < host_v1.size(); ++i)
1003 host_v2[i] = NumericT(3.1415) * host_v1[i];
1007 for (std::size_t i=0; i < host_v1.size(); ++i)
1008 host_v1[i] = host_v1[i] / alpha + host_v2[i] / beta;
1009 vcl_v1 = vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1011 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1012 return EXIT_FAILURE;
1015 std::cout <<
"Testing inplace division-add on vector with GPU scalar (both, adding)..." << std::endl;
1016 for (std::size_t i=0; i < host_v1.size(); ++i)
1017 host_v2[i] = NumericT(3.1415) * host_v1[i];
1021 for (std::size_t i=0; i < host_v1.size(); ++i)
1022 host_v1[i] += host_v1[i] / alpha + host_v2[i] / beta;
1023 vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1025 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1026 return EXIT_FAILURE;
1028 std::cout <<
"Testing inplace division-add on vector with GPU scalar (both, subtracting)..." << std::endl;
1029 for (std::size_t i=0; i < host_v1.size(); ++i)
1030 host_v2[i] = NumericT(3.1415) * host_v1[i];
1034 for (std::size_t i=0; i < host_v1.size(); ++i)
1035 host_v1[i] += host_v1[i] / alpha - host_v2[i] / beta;
1036 vcl_v1 += vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1038 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1039 return EXIT_FAILURE;
1041 std::cout <<
"Testing inplace division-multiply-add on vector with GPU scalar (adding)..." << std::endl;
1042 for (std::size_t i=0; i < host_v1.size(); ++i)
1043 host_v2[i] = NumericT(3.1415) * host_v1[i];
1047 for (std::size_t i=0; i < host_v1.size(); ++i)
1048 host_v1[i] += host_v1[i] / alpha + host_v2[i] * beta;
1049 vcl_v1 += vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1051 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1052 return EXIT_FAILURE;
1054 std::cout <<
"Testing inplace multiply-division-add on vector with GPU scalar (subtracting)..." << std::endl;
1055 for (std::size_t i=0; i < host_v1.size(); ++i)
1056 host_v2[i] = NumericT(3.1415) * host_v1[i];
1060 for (std::size_t i=0; i < host_v1.size(); ++i)
1061 host_v1[i] += host_v1[i] * alpha - host_v2[i] / beta;
1062 vcl_v1 += vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1064 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1065 return EXIT_FAILURE;
1069 std::cout <<
"Testing inplace division-add on vector with GPU scalar..." << std::endl;
1070 for (std::size_t i=0; i < host_v1.size(); ++i)
1071 host_v2[i] = NumericT(3.1415) * host_v1[i];
1075 for (std::size_t i=0; i < host_v1.size(); ++i)
1076 host_v1[i] += host_v2[i] * alpha;
1077 vcl_v1 += vcl_v2 * gpu_alpha;
1079 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1080 return EXIT_FAILURE;
1087 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (right)..." << std::endl;
1088 for (
size_t i=0; i < host_v1.size(); ++i)
1090 host_v1[i] = NumericT(1.0) + random<NumericT>();
1091 host_v2[i] = NumericT(3.1415) * host_v1[i];
1096 for (std::size_t i=0; i < host_v1.size(); ++i)
1097 host_v1[i] = host_v1[i] - alpha * host_v2[i];
1098 vcl_v1 = vcl_v1 - alpha * vcl_v2;
1100 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1101 return EXIT_FAILURE;
1104 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (left)..." << std::endl;
1105 for (std::size_t i=0; i < host_v1.size(); ++i)
1106 host_v2[i] = NumericT(3.1415) * host_v1[i];
1110 for (std::size_t i=0; i < host_v1.size(); ++i)
1111 host_v1[i] = alpha * host_v1[i] - host_v2[i];
1112 vcl_v1 = alpha * vcl_v1 - vcl_v2;
1114 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1115 return EXIT_FAILURE;
1117 std::cout <<
"Testing multiply-subtract on vector with CPU scalar (both)..." << std::endl;
1118 for (std::size_t i=0; i < host_v1.size(); ++i)
1119 host_v2[i] = NumericT(3.1415) * host_v1[i];
1123 for (std::size_t i=0; i < host_v1.size(); ++i)
1124 host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1125 vcl_v1 = alpha * vcl_v1 - beta * vcl_v2;
1127 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1128 return EXIT_FAILURE;
1131 std::cout <<
"Testing inplace multiply-subtract on vector with CPU scalar..." << std::endl;
1132 for (std::size_t i=0; i < host_v1.size(); ++i)
1133 host_v2[i] = NumericT(3.1415) * host_v1[i];
1137 for (std::size_t i=0; i < host_v1.size(); ++i)
1138 host_v1[i] -= alpha * host_v2[i];
1139 vcl_v1 -= alpha * vcl_v2;
1141 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1142 return EXIT_FAILURE;
1145 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (right)..." << std::endl;
1146 for (std::size_t i=0; i < host_v1.size(); ++i)
1147 host_v2[i] = NumericT(3.1415) * host_v1[i];
1151 for (std::size_t i=0; i < host_v1.size(); ++i)
1152 host_v1[i] = host_v1[i] - alpha * host_v2[i];
1153 vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1155 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1156 return EXIT_FAILURE;
1158 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (left)..." << std::endl;
1159 for (std::size_t i=0; i < host_v1.size(); ++i)
1160 host_v2[i] = NumericT(3.1415) * host_v1[i];
1164 for (std::size_t i=0; i < host_v1.size(); ++i)
1165 host_v1[i] = host_v1[i] - alpha * host_v2[i];
1166 vcl_v1 = vcl_v1 - gpu_alpha * vcl_v2;
1168 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1169 return EXIT_FAILURE;
1171 std::cout <<
"Testing multiply-subtract on vector with GPU scalar (both)..." << std::endl;
1172 for (std::size_t i=0; i < host_v1.size(); ++i)
1173 host_v2[i] = NumericT(3.1415) * host_v1[i];
1177 for (std::size_t i=0; i < host_v1.size(); ++i)
1178 host_v1[i] = alpha * host_v1[i] - beta * host_v2[i];
1179 vcl_v1 = gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1181 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1182 return EXIT_FAILURE;
1184 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1185 for (std::size_t i=0; i < host_v1.size(); ++i)
1186 host_v2[i] = NumericT(3.1415) * host_v1[i];
1190 for (std::size_t i=0; i < host_v1.size(); ++i)
1191 host_v1[i] -= alpha * host_v1[i] + beta * host_v2[i];
1192 vcl_v1 -= gpu_alpha * vcl_v1 + gpu_beta * vcl_v2;
1194 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1195 return EXIT_FAILURE;
1197 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1198 for (std::size_t i=0; i < host_v1.size(); ++i)
1199 host_v2[i] = NumericT(3.1415) * host_v1[i];
1203 for (std::size_t i=0; i < host_v1.size(); ++i)
1204 host_v1[i] -= alpha * host_v1[i] - beta * host_v2[i];
1205 vcl_v1 -= gpu_alpha * vcl_v1 - gpu_beta * vcl_v2;
1207 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1208 return EXIT_FAILURE;
1211 std::cout <<
"Testing inplace multiply-subtract on vector with GPU scalar..." << std::endl;
1212 for (std::size_t i=0; i < host_v1.size(); ++i)
1213 host_v2[i] = NumericT(3.1415) * host_v1[i];
1217 for (std::size_t i=0; i < host_v1.size(); ++i)
1218 host_v1[i] -= alpha * host_v2[i];
1219 vcl_v1 -= gpu_alpha * vcl_v2;
1221 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1222 return EXIT_FAILURE;
1229 std::cout <<
"Testing division-subtract on vector with CPU scalar (right)..." << std::endl;
1230 for (
size_t i=0; i < host_v1.size(); ++i)
1232 host_v1[i] = NumericT(1.0) + random<NumericT>();
1233 host_v2[i] = NumericT(3.1415) * host_v1[i];
1238 for (std::size_t i=0; i < host_v1.size(); ++i)
1239 host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1240 vcl_v1 = vcl_v1 - vcl_v2 / alpha;
1242 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1243 return EXIT_FAILURE;
1246 std::cout <<
"Testing division-subtract on vector with CPU scalar (left)..." << std::endl;
1247 for (std::size_t i=0; i < host_v1.size(); ++i)
1248 host_v2[i] = NumericT(3.1415) * host_v1[i];
1252 for (std::size_t i=0; i < host_v1.size(); ++i)
1253 host_v1[i] = host_v1[i] / alpha - host_v2[i];
1254 vcl_v1 = vcl_v1 / alpha - vcl_v2;
1256 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1257 return EXIT_FAILURE;
1259 std::cout <<
"Testing division-subtract on vector with CPU scalar (both)..." << std::endl;
1260 for (std::size_t i=0; i < host_v1.size(); ++i)
1261 host_v2[i] = NumericT(3.1415) * host_v1[i];
1265 for (std::size_t i=0; i < host_v1.size(); ++i)
1266 host_v1[i] = host_v1[i] / alpha - host_v2[i] / alpha;
1267 vcl_v1 = vcl_v1 / alpha - vcl_v2 / alpha;
1269 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1270 return EXIT_FAILURE;
1273 std::cout <<
"Testing inplace division-subtract on vector with CPU scalar..." << std::endl;
1274 for (std::size_t i=0; i < host_v1.size(); ++i)
1275 host_v2[i] = NumericT(3.1415) * host_v1[i];
1279 for (std::size_t i=0; i < host_v1.size(); ++i)
1280 host_v1[i] -= host_v2[i] / alpha;
1281 vcl_v1 -= vcl_v2 / alpha;
1283 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1284 return EXIT_FAILURE;
1286 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1287 for (std::size_t i=0; i < host_v1.size(); ++i)
1288 host_v2[i] = NumericT(3.1415) * host_v1[i];
1292 for (std::size_t i=0; i < host_v1.size(); ++i)
1293 host_v1[i] -= host_v2[i] / alpha;
1294 vcl_v1 -= vcl_v2 / gpu_alpha;
1296 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1297 return EXIT_FAILURE;
1300 std::cout <<
"Testing division-subtract on vector with GPU scalar (right)..." << std::endl;
1301 for (std::size_t i=0; i < host_v1.size(); ++i)
1302 host_v2[i] = NumericT(3.1415) * host_v1[i];
1306 for (std::size_t i=0; i < host_v1.size(); ++i)
1307 host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1308 vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1310 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1311 return EXIT_FAILURE;
1313 std::cout <<
"Testing division-subtract on vector with GPU scalar (left)..." << std::endl;
1314 for (std::size_t i=0; i < host_v1.size(); ++i)
1315 host_v2[i] = NumericT(3.1415) * host_v1[i];
1319 for (std::size_t i=0; i < host_v1.size(); ++i)
1320 host_v1[i] = host_v1[i] - host_v2[i] / alpha;
1321 vcl_v1 = vcl_v1 - vcl_v2 / gpu_alpha;
1323 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1324 return EXIT_FAILURE;
1326 std::cout <<
"Testing division-subtract on vector with GPU scalar (both)..." << std::endl;
1327 for (std::size_t i=0; i < host_v1.size(); ++i)
1328 host_v2[i] = NumericT(3.1415) * host_v1[i];
1332 for (std::size_t i=0; i < host_v1.size(); ++i)
1333 host_v1[i] = host_v1[i] / alpha - host_v2[i] / beta;
1334 vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1336 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1337 return EXIT_FAILURE;
1339 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar (both, adding)..." << std::endl;
1340 for (std::size_t i=0; i < host_v1.size(); ++i)
1341 host_v2[i] = NumericT(3.1415) * host_v1[i];
1345 for (std::size_t i=0; i < host_v1.size(); ++i)
1346 host_v1[i] -= host_v1[i] / alpha + host_v2[i] / beta;
1347 vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 / gpu_beta;
1349 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1350 return EXIT_FAILURE;
1352 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar (both, subtracting)..." << std::endl;
1353 for (std::size_t i=0; i < host_v1.size(); ++i)
1354 host_v2[i] = NumericT(3.1415) * host_v1[i];
1358 for (std::size_t i=0; i < host_v1.size(); ++i)
1359 host_v1[i] -= host_v1[i] / alpha - host_v2[i] / beta;
1360 vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 / gpu_beta;
1362 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1363 return EXIT_FAILURE;
1365 std::cout <<
"Testing multiply-division-subtract on vector with GPU scalar..." << std::endl;
1366 for (std::size_t i=0; i < host_v1.size(); ++i)
1367 host_v2[i] = NumericT(3.1415) * host_v1[i];
1371 for (std::size_t i=0; i < host_v1.size(); ++i)
1372 host_v1[i] = host_v1[i] * alpha - host_v2[i] / beta;
1373 vcl_v1 = vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1375 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1376 return EXIT_FAILURE;
1378 std::cout <<
"Testing division-multiply-subtract on vector with GPU scalar..." << std::endl;
1379 for (std::size_t i=0; i < host_v1.size(); ++i)
1380 host_v2[i] = NumericT(3.1415) * host_v1[i];
1384 for (std::size_t i=0; i < host_v1.size(); ++i)
1385 host_v1[i] = host_v1[i] / alpha - host_v2[i] * beta;
1386 vcl_v1 = vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1388 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1389 return EXIT_FAILURE;
1391 std::cout <<
"Testing inplace multiply-division-subtract on vector with GPU scalar (adding)..." << std::endl;
1392 for (std::size_t i=0; i < host_v1.size(); ++i)
1393 host_v2[i] = NumericT(3.1415) * host_v1[i];
1397 for (std::size_t i=0; i < host_v1.size(); ++i)
1398 host_v1[i] -= host_v1[i] * alpha + host_v2[i] / beta;
1399 vcl_v1 -= vcl_v1 * gpu_alpha + vcl_v2 / gpu_beta;
1401 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1402 return EXIT_FAILURE;
1404 std::cout <<
"Testing inplace division-multiply-subtract on vector with GPU scalar (adding)..." << std::endl;
1405 for (std::size_t i=0; i < host_v1.size(); ++i)
1406 host_v2[i] = NumericT(3.1415) * host_v1[i];
1410 for (std::size_t i=0; i < host_v1.size(); ++i)
1411 host_v1[i] -= host_v1[i] / alpha + host_v2[i] * beta;
1412 vcl_v1 -= vcl_v1 / gpu_alpha + vcl_v2 * gpu_beta;
1414 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1415 return EXIT_FAILURE;
1417 std::cout <<
"Testing inplace multiply-division-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1418 for (std::size_t i=0; i < host_v1.size(); ++i)
1419 host_v2[i] = NumericT(3.1415) * host_v1[i];
1423 for (std::size_t i=0; i < host_v1.size(); ++i)
1424 host_v1[i] -= host_v1[i] * alpha - host_v2[i] / beta;
1425 vcl_v1 -= vcl_v1 * gpu_alpha - vcl_v2 / gpu_beta;
1427 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1428 return EXIT_FAILURE;
1430 std::cout <<
"Testing inplace division-multiply-subtract on vector with GPU scalar (subtracting)..." << std::endl;
1431 for (std::size_t i=0; i < host_v1.size(); ++i)
1432 host_v2[i] = NumericT(3.1415) * host_v1[i];
1436 for (std::size_t i=0; i < host_v1.size(); ++i)
1437 host_v1[i] -= host_v1[i] / alpha - host_v2[i] * beta;
1438 vcl_v1 -= vcl_v1 / gpu_alpha - vcl_v2 * gpu_beta;
1440 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1441 return EXIT_FAILURE;
1444 std::cout <<
"Testing inplace division-subtract on vector with GPU scalar..." << std::endl;
1445 for (std::size_t i=0; i < host_v1.size(); ++i)
1446 host_v2[i] = NumericT(3.1415) * host_v1[i];
1450 for (std::size_t i=0; i < host_v1.size(); ++i)
1451 host_v1[i] -= alpha * host_v2[i];
1452 vcl_v1 -= gpu_alpha * vcl_v2;
1454 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1455 return EXIT_FAILURE;
1462 for (std::size_t i=0; i < host_v1.size(); ++i)
1464 host_v1[i] = NumericT(1.0) + random<NumericT>();
1465 host_v2[i] = NumericT(3.1415) * host_v1[i];
1470 std::cout <<
"Testing three vector additions..." << std::endl;
1471 for (std::size_t i=0; i < host_v1.size(); ++i)
1472 host_v1[i] = host_v2[i] + host_v1[i] + host_v2[i];
1473 vcl_v1 = vcl_v2 + vcl_v1 + vcl_v2;
1475 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1476 return EXIT_FAILURE;
1479 for (std::size_t i=0; i < host_v1.size(); ++i)
1480 host_v2[i] = NumericT(3.1415) * host_v1[i];
1484 std::cout <<
"Testing complicated vector expression with CPU scalar..." << std::endl;
1485 for (std::size_t i=0; i < host_v1.size(); ++i)
1486 host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1487 vcl_v1 = beta * (vcl_v1 - alpha * vcl_v2);
1489 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1490 return EXIT_FAILURE;
1492 std::cout <<
"Testing complicated vector expression with GPU scalar..." << std::endl;
1493 for (std::size_t i=0; i < host_v1.size(); ++i)
1494 host_v1[i] = beta * (host_v1[i] - alpha * host_v2[i]);
1495 vcl_v1 = gpu_beta * (vcl_v1 - gpu_alpha * vcl_v2);
1497 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1498 return EXIT_FAILURE;
1501 for (std::size_t i=0; i < host_v1.size(); ++i)
1502 host_v2[i] = NumericT(3.1415) * host_v1[i];
1506 std::cout <<
"Testing swap..." << std::endl;
1507 for (std::size_t i=0; i < host_v1.size(); ++i)
1509 NumericT temp = host_v1[i];
1510 host_v1[i] = host_v2[i];
1513 swap(vcl_v1, vcl_v2);
1515 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1516 return EXIT_FAILURE;
1519 for (std::size_t i=0; i<host_v1.size(); ++i)
1521 host_v1[i] = NumericT(1.0) + random<NumericT>();
1522 host_v2[i] = NumericT(5.0) + random<NumericT>();
1528 std::cout <<
"Testing unary operator-..." << std::endl;
1529 for (std::size_t i=0; i < host_v1.size(); ++i)
1530 host_v1[i] = - host_v2[i];
1533 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1534 return EXIT_FAILURE;
1537 std::cout <<
"Testing elementwise multiplication..." << std::endl;
1538 std::cout <<
" v1 = element_prod(v1, v2);" << std::endl;
1539 for (std::size_t i=0; i < host_v1.size(); ++i)
1540 host_v1[i] = host_v1[i] * host_v2[i];
1543 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1544 return EXIT_FAILURE;
1546 std::cout <<
" v1 += element_prod(v1, v2);" << std::endl;
1547 for (std::size_t i=0; i < host_v1.size(); ++i)
1548 host_v1[i] += host_v1[i] * host_v2[i];
1551 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1552 return EXIT_FAILURE;
1554 std::cout <<
" v1 -= element_prod(v1, v2);" << std::endl;
1555 for (std::size_t i=0; i < host_v1.size(); ++i)
1556 host_v1[i] -= host_v1[i] * host_v2[i];
1559 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1560 return EXIT_FAILURE;
1563 std::cout <<
" v1 = element_prod(v1 + v2, v2);" << std::endl;
1564 for (std::size_t i=0; i < host_v1.size(); ++i)
1565 host_v1[i] = (host_v1[i] + host_v2[i]) * host_v2[i];
1568 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1569 return EXIT_FAILURE;
1571 std::cout <<
" v1 += element_prod(v1 + v2, v2);" << std::endl;
1572 for (std::size_t i=0; i < host_v1.size(); ++i)
1573 host_v1[i] += (host_v1[i] + host_v2[i]) * host_v2[i];
1576 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1577 return EXIT_FAILURE;
1579 std::cout <<
" v1 -= element_prod(v1 + v2, v2);" << std::endl;
1580 for (std::size_t i=0; i < host_v1.size(); ++i)
1581 host_v1[i] -= (host_v1[i] + host_v2[i]) * host_v2[i];
1584 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1585 return EXIT_FAILURE;
1588 std::cout <<
" v1 = element_prod(v1, v2 + v1);" << std::endl;
1589 for (std::size_t i=0; i < host_v1.size(); ++i)
1590 host_v1[i] = host_v1[i] * (host_v2[i] + host_v1[i]);
1593 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1594 return EXIT_FAILURE;
1596 std::cout <<
" v1 += element_prod(v1, v2 + v1);" << std::endl;
1597 for (std::size_t i=0; i < host_v1.size(); ++i)
1598 host_v1[i] += host_v1[i] * (host_v2[i] + host_v1[i]);
1601 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1602 return EXIT_FAILURE;
1604 std::cout <<
" v1 -= element_prod(v1, v2 + v1);" << std::endl;
1605 for (std::size_t i=0; i < host_v1.size(); ++i)
1606 host_v1[i] -= host_v1[i] * (host_v2[i] + host_v1[i]);
1609 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1610 return EXIT_FAILURE;
1613 std::cout <<
" v1 = element_prod(v1 + v2, v2 + v1);" << std::endl;
1614 for (std::size_t i=0; i < host_v1.size(); ++i)
1615 host_v1[i] = (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1618 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1619 return EXIT_FAILURE;
1621 std::cout <<
" v1 += element_prod(v1 + v2, v2 + v1);" << std::endl;
1622 for (std::size_t i=0; i < host_v1.size(); ++i)
1623 host_v1[i] += (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1626 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1627 return EXIT_FAILURE;
1629 std::cout <<
" v1 -= element_prod(v1 + v2, v2 + v1);" << std::endl;
1630 for (std::size_t i=0; i < host_v1.size(); ++i)
1631 host_v1[i] -= (host_v1[i] + host_v2[i]) * (host_v2[i] + host_v1[i]);
1634 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1635 return EXIT_FAILURE;
1638 std::cout <<
"Testing elementwise division..." << std::endl;
1639 for (std::size_t i=0; i<host_v1.size(); ++i)
1641 host_v1[i] = NumericT(1.0) + random<NumericT>();
1642 host_v2[i] = NumericT(5.0) + random<NumericT>();
1648 for (std::size_t i=0; i < host_v1.size(); ++i)
1649 host_v1[i] = host_v1[i] / host_v2[i];
1652 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1653 return EXIT_FAILURE;
1655 for (std::size_t i=0; i < host_v1.size(); ++i)
1656 host_v1[i] += host_v1[i] / host_v2[i];
1659 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1660 return EXIT_FAILURE;
1662 for (std::size_t i=0; i < host_v1.size(); ++i)
1663 host_v1[i] -= host_v1[i] / host_v2[i];
1666 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1667 return EXIT_FAILURE;
1670 for (std::size_t i=0; i < host_v1.size(); ++i)
1671 host_v1[i] = (host_v1[i] + host_v2[i]) / host_v2[i];
1674 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1675 return EXIT_FAILURE;
1677 for (std::size_t i=0; i < host_v1.size(); ++i)
1678 host_v1[i] += (host_v1[i] + host_v2[i]) / host_v2[i];
1681 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1682 return EXIT_FAILURE;
1684 for (std::size_t i=0; i < host_v1.size(); ++i)
1685 host_v1[i] -= (host_v1[i] + host_v2[i]) / host_v2[i];
1688 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1689 return EXIT_FAILURE;
1692 for (std::size_t i=0; i < host_v1.size(); ++i)
1693 host_v1[i] = host_v1[i] / (host_v2[i] + host_v1[i]);
1696 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1697 return EXIT_FAILURE;
1699 for (std::size_t i=0; i < host_v1.size(); ++i)
1700 host_v1[i] += host_v1[i] / (host_v2[i] + host_v1[i]);
1703 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1704 return EXIT_FAILURE;
1706 for (std::size_t i=0; i < host_v1.size(); ++i)
1707 host_v1[i] -= host_v1[i] / (host_v2[i] + host_v1[i]);
1710 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1711 return EXIT_FAILURE;
1714 for (std::size_t i=0; i < host_v1.size(); ++i)
1715 host_v1[i] = (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1718 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1719 return EXIT_FAILURE;
1721 for (std::size_t i=0; i < host_v1.size(); ++i)
1722 host_v1[i] += (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1725 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1726 return EXIT_FAILURE;
1728 for (std::size_t i=0; i < host_v1.size(); ++i)
1729 host_v1[i] -= (host_v1[i] + host_v2[i]) / (host_v2[i] + host_v1[i]);
1732 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
1733 return EXIT_FAILURE;
1736 std::cout <<
"Testing elementwise power function..." << std::endl;
1737 for (std::size_t i=0; i<host_v1.size(); ++i)
1739 host_v1[i] = NumericT(1.1) + NumericT(0.5) * random<NumericT>();
1740 host_v2[i] = NumericT(1.1) + NumericT(0.5) * random<NumericT>();
1742 std::vector<NumericT> std_v3(host_v1.size());
1748 for (std::size_t i=0; i<host_v3.size(); ++i)
1749 host_v3[i] = std::pow(host_v1[i], host_v2[i]);
1750 vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1752 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1754 std::cerr <<
"** Failure in v1 = pow(v1, v2);" << std::endl;
1755 return EXIT_FAILURE;
1759 for (std::size_t i=0; i<host_v3.size(); ++i)
1760 host_v3[i] = host_v1[i];
1761 for (std::size_t i=0; i<host_v3.size(); ++i)
1762 host_v3[i] += std::pow(host_v1[i], host_v2[i]);
1763 vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1765 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1767 std::cerr <<
"** Failure in v1 += pow(v1, v2);" << std::endl;
1768 return EXIT_FAILURE;
1772 for (std::size_t i=0; i<host_v3.size(); ++i)
1773 host_v3[i] = host_v1[i];
1774 for (std::size_t i=0; i<host_v3.size(); ++i)
1775 host_v3[i] -= std::pow(host_v1[i], host_v2[i]);
1776 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2);
1778 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1780 std::cerr <<
"** Failure in v1 -= pow(v1, v2);" << std::endl;
1781 return EXIT_FAILURE;
1786 for (std::size_t i=0; i<host_v3.size(); ++i)
1787 host_v3[i] = host_v1[i];
1788 for (std::size_t i=0; i<host_v3.size(); ++i)
1789 host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1790 vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1792 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1794 std::cerr <<
"** Failure in v1 = pow(v1 + v2, v2);" << std::endl;
1795 return EXIT_FAILURE;
1799 for (std::size_t i=0; i<host_v3.size(); ++i)
1800 host_v3[i] = host_v1[i];
1801 for (std::size_t i=0; i<host_v3.size(); ++i)
1802 host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1803 vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1805 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1807 std::cerr <<
"** Failure in v1 += pow(v1 + v2, v2);" << std::endl;
1808 return EXIT_FAILURE;
1812 for (std::size_t i=0; i<host_v3.size(); ++i)
1813 host_v3[i] = host_v1[i];
1814 for (std::size_t i=0; i<host_v3.size(); ++i)
1815 host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i]);
1816 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2);
1818 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1820 std::cerr <<
"** Failure in v1 -= pow(v1 + v2, v2);" << std::endl;
1821 return EXIT_FAILURE;
1826 for (std::size_t i=0; i<host_v3.size(); ++i)
1827 host_v3[i] = host_v1[i];
1828 for (std::size_t i=0; i<host_v3.size(); ++i)
1829 host_v3[i] = std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1830 vcl_v1 = viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1832 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1834 std::cerr <<
"** Failure in v1 = pow(v1, v2 + v1);" << std::endl;
1835 return EXIT_FAILURE;
1839 for (std::size_t i=0; i<host_v3.size(); ++i)
1840 host_v3[i] = host_v1[i];
1841 for (std::size_t i=0; i<host_v3.size(); ++i)
1842 host_v3[i] += std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1843 vcl_v1 += viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1845 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1847 std::cerr <<
"** Failure in v1 += pow(v1, v2 + v1);" << std::endl;
1848 return EXIT_FAILURE;
1852 for (std::size_t i=0; i<host_v3.size(); ++i)
1853 host_v3[i] = host_v1[i];
1854 for (std::size_t i=0; i<host_v3.size(); ++i)
1855 host_v3[i] -= std::pow(host_v1[i], host_v2[i] + host_v1[i]);
1856 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1, vcl_v2 + vcl_v1);
1858 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1860 std::cerr <<
"** Failure in v1 -= pow(v1, v2 + v1);" << std::endl;
1861 return EXIT_FAILURE;
1866 for (std::size_t i=0; i<host_v3.size(); ++i)
1867 host_v3[i] = host_v1[i];
1868 for (std::size_t i=0; i<host_v3.size(); ++i)
1869 host_v3[i] = std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1870 vcl_v1 = viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1872 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1874 std::cerr <<
"** Failure in v1 = pow(v1 + v2, v2 + v1);" << std::endl;
1875 return EXIT_FAILURE;
1879 for (std::size_t i=0; i<host_v3.size(); ++i)
1880 host_v3[i] = host_v1[i];
1881 for (std::size_t i=0; i<host_v3.size(); ++i)
1882 host_v3[i] += std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1883 vcl_v1 += viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1885 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1887 std::cerr <<
"** Failure in v1 += pow(v1 + v2, v2 + v1);" << std::endl;
1888 return EXIT_FAILURE;
1892 for (std::size_t i=0; i<host_v3.size(); ++i)
1893 host_v3[i] = host_v1[i];
1894 for (std::size_t i=0; i<host_v3.size(); ++i)
1895 host_v3[i] -= std::pow(host_v1[i] + host_v2[i], host_v2[i] + host_v1[i]);
1896 vcl_v1 -= viennacl::linalg::element_pow(vcl_v1 + vcl_v2, vcl_v2 + vcl_v1);
1898 if (
check(host_v3, vcl_v1, epsilon) != EXIT_SUCCESS)
1900 std::cerr <<
"** Failure in v1 -= pow(v1 + v2, v2 + v1);" << std::endl;
1901 return EXIT_FAILURE;
1904 std::cout <<
"Testing unary elementwise operations..." << std::endl;
1905 for (
size_t i=0; i < host_v1.size(); ++i)
1906 host_v1[i] = random<NumericT>() / NumericT(4);
1908 #define GENERATE_UNARY_OP_TEST(FUNCNAME) \
1909 for (std::size_t i=0; i<host_v1.size(); ++i) \
1910 host_v2[i] = NumericT(3.1415) * host_v1[i]; \
1911 proxy_copy(host_v1, vcl_v1); \
1912 proxy_copy(host_v2, vcl_v2); \
1914 for (std::size_t i=0; i<host_v1.size(); ++i) \
1915 host_v1[i] = std::FUNCNAME(host_v2[i]); \
1916 vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1918 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1920 std::cout << "Failure at v1 = " << #FUNCNAME << "(v2)" << std::endl; \
1921 return EXIT_FAILURE; \
1924 for (std::size_t i=0; i<host_v1.size(); ++i) \
1925 host_v1[i] = std::FUNCNAME(host_v1[i] + host_v2[i]); \
1926 vcl_v1 = viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1928 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1930 std::cout << "Failure at v1 = " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1931 return EXIT_FAILURE; \
1934 for (std::size_t i=0; i<host_v1.size(); ++i) \
1935 host_v1[i] += std::FUNCNAME(host_v1[i]); \
1936 vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1); \
1938 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1940 std::cout << "Failure at v1 += " << #FUNCNAME << "(v2)" << std::endl; \
1941 return EXIT_FAILURE; \
1944 for (std::size_t i=0; i<host_v1.size(); ++i) \
1945 host_v1[i] += std::FUNCNAME(host_v1[i] + host_v2[i]); \
1946 vcl_v1 += viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1948 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1950 std::cout << "Failure at v1 += " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1951 return EXIT_FAILURE; \
1954 for (std::size_t i=0; i<host_v1.size(); ++i) \
1955 host_v1[i] -= std::FUNCNAME(host_v2[i]); \
1956 vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v2); \
1958 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1960 std::cout << "Failure at v1 -= " << #FUNCNAME << "(v2)" << std::endl; \
1961 return EXIT_FAILURE; \
1964 for (std::size_t i=0; i<host_v1.size(); ++i) \
1965 host_v1[i] -= std::FUNCNAME(host_v1[i] + host_v2[i]); \
1966 vcl_v1 -= viennacl::linalg::element_##FUNCNAME(vcl_v1 + vcl_v2); \
1968 if (check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS) \
1970 std::cout << "Failure at v1 -= " << #FUNCNAME << "(v1 + v2)" << std::endl; \
1971 return EXIT_FAILURE; \
1976 for (std::size_t i=0; i < host_v1.size(); ++i)
1977 host_v1[i] = random<NumericT>() / NumericT(4);
1992 for (std::size_t i=0; i<host_v1.size(); ++i)
1993 host_v2[i] = NumericT(3.1415) * host_v1[i];
1997 std::cout <<
"Testing another complicated vector expression with CPU scalars..." << std::endl;
1998 for (std::size_t i=0; i<host_v1.size(); ++i)
1999 host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2000 vcl_v1 = vcl_v2 / alpha + beta * (vcl_v1 - alpha*vcl_v2);
2002 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2003 return EXIT_FAILURE;
2005 std::cout <<
"Testing another complicated vector expression with GPU scalars..." << std::endl;
2006 for (std::size_t i=0; i<host_v1.size(); ++i)
2007 host_v2[i] = NumericT(3.1415) * host_v1[i];
2011 for (std::size_t i=0; i<host_v1.size(); ++i)
2012 host_v1[i] = host_v2[i] / alpha + beta * (host_v1[i] - alpha*host_v2[i]);
2013 vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * (vcl_v1 - gpu_alpha*vcl_v2);
2015 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2016 return EXIT_FAILURE;
2019 std::cout <<
"Testing lenghty sum of scaled vectors..." << std::endl;
2020 for (std::size_t i=0; i<host_v1.size(); ++i)
2021 host_v2[i] = NumericT(3.1415) * host_v1[i];
2025 for (std::size_t i=0; i<host_v1.size(); ++i)
2026 host_v1[i] = host_v2[i] / alpha + beta * host_v1[i] - alpha * host_v2[i] + beta * host_v1[i] - alpha * host_v1[i];
2027 vcl_v1 = vcl_v2 / gpu_alpha + gpu_beta * vcl_v1 - alpha * vcl_v2 + beta * vcl_v1 - alpha * vcl_v1;
2029 if (
check(host_v1, vcl_v1, epsilon) != EXIT_SUCCESS)
2030 return EXIT_FAILURE;
2037 template<
typename NumericT,
typename Epsilon >
2040 int retval = EXIT_SUCCESS;
2041 std::size_t
size = 24656;
2043 std::cout <<
"Running tests for vector of size " << size << std::endl;
2048 std::vector<NumericT> std_full_vec(size);
2049 std::vector<NumericT> std_full_vec2(std_full_vec.size());
2051 for (std::size_t i=0; i<std_full_vec.size(); ++i)
2053 std_full_vec[i] = NumericT(1.0) + random<NumericT>();
2054 std_full_vec2[i] = NumericT(1.0) + random<NumericT>();
2057 std::size_t r1_start = std_full_vec.size() / 4;
2058 std::size_t r1_stop = 2 * std_full_vec.size() / 4;
2059 std::size_t r2_start = 2 * std_full_vec2.size() / 4;
2060 std::size_t r2_stop = 3 * std_full_vec2.size() / 4;
2064 std::size_t s1_start = std_full_vec.size() / 4;
2065 std::size_t s1_inc = 3;
2066 std::size_t s1_size = std_full_vec.size() / 4;
2067 std::size_t s2_start = 2 * std_full_vec2.size() / 4;
2068 std::size_t s2_inc = 2;
2069 std::size_t s2_size = std_full_vec2.size() / 4;
2080 viennacl::copy(std_full_vec2.begin(), std_full_vec2.end(), vcl_full_vec2.begin());
2082 viennacl::range vcl_r1( vcl_full_vec.size() / 4, 2 * vcl_full_vec.size() / 4);
2083 viennacl::range vcl_r2(2 * vcl_full_vec2.size() / 4, 3 * vcl_full_vec2.size() / 4);
2091 std::vector<NumericT> std_short_vec(host_range_vec.
size());
2092 for (std::size_t i=0; i<std_short_vec.size(); ++i)
2093 std_short_vec[i] = host_range_vec[i];
2096 std::vector<NumericT> std_short_vec2(host_range_vec2.
size());
2097 for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2098 std_short_vec2[i] = host_range_vec2[i];
2101 std::cout <<
"Testing creation of vectors from range..." << std::endl;
2102 if (
check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2103 return EXIT_FAILURE;
2104 if (
check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2105 return EXIT_FAILURE;
2108 viennacl::slice vcl_s1( vcl_full_vec.size() / 4, 3, vcl_full_vec.size() / 4);
2109 viennacl::slice vcl_s2(2 * vcl_full_vec2.size() / 4, 2, vcl_full_vec2.size() / 4);
2116 std::vector<NumericT> std_short_vec(host_slice_vec.
size());
2117 for (std::size_t i=0; i<std_short_vec.size(); ++i)
2118 std_short_vec[i] = host_slice_vec[i];
2121 std::vector<NumericT> std_short_vec2(host_slice_vec2.
size());
2122 for (std::size_t i=0; i<std_short_vec2.size(); ++i)
2123 std_short_vec2[i] = host_slice_vec2[i];
2126 std::cout <<
"Testing creation of vectors from slice..." << std::endl;
2127 if (
check(host_short_vec, vcl_short_vec, epsilon) != EXIT_SUCCESS)
2128 return EXIT_FAILURE;
2129 if (
check(host_short_vec2, vcl_short_vec2, epsilon) != EXIT_SUCCESS)
2130 return EXIT_FAILURE;
2137 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = vector **" << std::endl;
2138 retval = test<NumericT>(epsilon,
2139 host_short_vec, host_short_vec2,
2140 vcl_short_vec, vcl_short_vec2);
2141 if (retval != EXIT_SUCCESS)
2142 return EXIT_FAILURE;
2144 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = range **" << std::endl;
2145 retval = test<NumericT>(epsilon,
2146 host_short_vec, host_short_vec2,
2147 vcl_short_vec, vcl_range_vec2);
2148 if (retval != EXIT_SUCCESS)
2149 return EXIT_FAILURE;
2151 std::cout <<
" ** vcl_v1 = vector, vcl_v2 = slice **" << std::endl;
2152 retval = test<NumericT>(epsilon,
2153 host_short_vec, host_short_vec2,
2154 vcl_short_vec, vcl_slice_vec2);
2155 if (retval != EXIT_SUCCESS)
2156 return EXIT_FAILURE;
2160 std::cout <<
" ** vcl_v1 = range, vcl_v2 = vector **" << std::endl;
2161 retval = test<NumericT>(epsilon,
2162 host_short_vec, host_short_vec2,
2163 vcl_range_vec, vcl_short_vec2);
2164 if (retval != EXIT_SUCCESS)
2165 return EXIT_FAILURE;
2167 std::cout <<
" ** vcl_v1 = range, vcl_v2 = range **" << std::endl;
2168 retval = test<NumericT>(epsilon,
2169 host_short_vec, host_short_vec2,
2170 vcl_range_vec, vcl_range_vec2);
2171 if (retval != EXIT_SUCCESS)
2172 return EXIT_FAILURE;
2174 std::cout <<
" ** vcl_v1 = range, vcl_v2 = slice **" << std::endl;
2175 retval = test<NumericT>(epsilon,
2176 host_short_vec, host_short_vec2,
2177 vcl_range_vec, vcl_slice_vec2);
2178 if (retval != EXIT_SUCCESS)
2179 return EXIT_FAILURE;
2183 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = vector **" << std::endl;
2184 retval = test<NumericT>(epsilon,
2185 host_short_vec, host_short_vec2,
2186 vcl_slice_vec, vcl_short_vec2);
2187 if (retval != EXIT_SUCCESS)
2188 return EXIT_FAILURE;
2190 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = range **" << std::endl;
2191 retval = test<NumericT>(epsilon,
2192 host_short_vec, host_short_vec2,
2193 vcl_slice_vec, vcl_range_vec2);
2194 if (retval != EXIT_SUCCESS)
2195 return EXIT_FAILURE;
2197 std::cout <<
" ** vcl_v1 = slice, vcl_v2 = slice **" << std::endl;
2198 retval = test<NumericT>(epsilon,
2199 host_short_vec, host_short_vec2,
2200 vcl_slice_vec, vcl_slice_vec2);
2201 if (retval != EXIT_SUCCESS)
2202 return EXIT_FAILURE;
2204 return EXIT_SUCCESS;
2213 std::cout << std::endl;
2214 std::cout <<
"----------------------------------------------" << std::endl;
2215 std::cout <<
"----------------------------------------------" << std::endl;
2216 std::cout <<
"## Test :: Vector" << std::endl;
2217 std::cout <<
"----------------------------------------------" << std::endl;
2218 std::cout <<
"----------------------------------------------" << std::endl;
2219 std::cout << std::endl;
2221 int retval = EXIT_SUCCESS;
2223 std::cout << std::endl;
2224 std::cout <<
"----------------------------------------------" << std::endl;
2225 std::cout << std::endl;
2227 typedef float NumericT;
2228 NumericT epsilon =
static_cast<NumericT
>(1.0E-2);
2229 std::cout <<
"# Testing setup:" << std::endl;
2230 std::cout <<
" eps: " << epsilon << std::endl;
2231 std::cout <<
" numeric: float" << std::endl;
2232 retval = test<NumericT>(epsilon);
2233 if ( retval == EXIT_SUCCESS )
2234 std::cout <<
"# Test passed" << std::endl;
2238 std::cout << std::endl;
2239 std::cout <<
"----------------------------------------------" << std::endl;
2240 std::cout << std::endl;
2241 #ifdef VIENNACL_WITH_OPENCL
2246 typedef double NumericT;
2247 NumericT epsilon = 1.0E-10;
2248 std::cout <<
"# Testing setup:" << std::endl;
2249 std::cout <<
" eps: " << epsilon << std::endl;
2250 std::cout <<
" numeric: double" << std::endl;
2251 retval = test<NumericT>(epsilon);
2252 if ( retval == EXIT_SUCCESS )
2253 std::cout <<
"# Test passed" << std::endl;
2257 std::cout << std::endl;
2258 std::cout <<
"----------------------------------------------" << std::endl;
2259 std::cout << std::endl;
2262 std::cout << std::endl;
2263 std::cout <<
"------- Test completed --------" << std::endl;
2264 std::cout << std::endl;
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_div > > element_div(vector_base< T > const &v1, vector_base< T > const &v2)
T norm_2(std::vector< T, A > const &v1)
vcl_size_t index_norm_inf(vector_base< T > const &vec)
Computes the index of the first entry that is equal to the supremum-norm in modulus.
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Generic interface for the l^2-norm. See viennacl/linalg/vector_operations.hpp for implementations...
vector_proxy(NumericT *p_values, std::size_t start_idx, std::size_t increment, std::size_t num_elements)
void plane_rotation(vector_base< T > &vec1, vector_base< T > &vec2, T alpha, T beta)
Computes a plane rotation of two vectors.
void finish()
Synchronizes the execution. finish() will only return after all compute kernels (CUDA, OpenCL) have completed.
ScalarType diff(ScalarType const &s1, ScalarType const &s2)
viennacl::enable_if< viennacl::is_stl< typename viennacl::traits::tag_of< VectorT1 >::type >::value, typename VectorT1::value_type >::type inner_prod(VectorT1 const &v1, VectorT2 const &v2)
#define GENERATE_UNARY_OP_TEST(FUNCNAME)
NumericT & operator[](std::size_t index)
int test(Epsilon const &epsilon, HostVectorType &host_v1, HostVectorType &host_v2, ViennaCLVectorType1 &vcl_v1, ViennaCLVectorType2 &vcl_v2)
viennacl::scalar< int > s2
viennacl::scalar< float > s1
T max(const T &lhs, const T &rhs)
Maximum.
viennacl::ocl::device const & current_device()
Convenience function for returning the active device in the current context.
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
Generic interface for the l^1-norm. See viennacl/linalg/vector_operations.hpp for implementations...
viennacl::vector< float > v1
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Class for representing non-strided subvectors of a bigger vector x.
iterator begin()
Returns an iterator pointing to the beginning of the vector (STL like)
int check(T1 const &t1, T2 const &t2, double epsilon)
void proxy_copy(vector_proxy< NumericT > const &host_vec, viennacl::vector_base< NumericT > &vcl_vec)
Class for representing strided subvectors of a bigger vector x.
bool double_support() const
ViennaCL convenience function: Returns true if the device supports double precision.
NumericT const & operator[](std::size_t index) const
Proxy classes for vectors.
viennacl::enable_if< viennacl::is_scalar< ScalarT1 >::value &&viennacl::is_scalar< ScalarT2 >::value >::type swap(ScalarT1 &s1, ScalarT2 &s2)
Swaps the contents of two scalars, data is copied.
Represents a vector consisting of 1 at a given index and zeros otherwise.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
NumericT max(std::vector< NumericT > const &v1)
T norm_inf(std::vector< T, A > const &v1)
void copy(std::vector< NumericT > &cpu_vec, circulant_matrix< NumericT, AlignmentV > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
T norm_1(std::vector< T, A > const &v1)
size_type size() const
Returns the length of the vector (cf. std::vector)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
viennacl::vector_expression< const vector_base< T >, const vector_base< T >, op_element_binary< op_prod > > element_prod(vector_base< T > const &v1, vector_base< T > const &v2)
iterator end()
Returns an iterator pointing to the end of the vector (STL like)
A slice class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Generic interface for the l^infty-norm. See viennacl/linalg/vector_operations.hpp for implementations...
NumericT min(std::vector< NumericT > const &v1)
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)