ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_ 00002 #define VIENNACL_LINALG_CUDA_SCALAR_OPERATIONS_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include "viennacl/forwards.h" 00026 #include "viennacl/tools/tools.hpp" 00027 #include "viennacl/meta/predicate.hpp" 00028 #include "viennacl/meta/enable_if.hpp" 00029 #include "viennacl/traits/size.hpp" 00030 #include "viennacl/traits/start.hpp" 00031 #include "viennacl/traits/stride.hpp" 00032 #include "viennacl/linalg/cuda/common.hpp" 00033 00034 // includes CUDA 00035 #include <cuda_runtime.h> 00036 00037 00038 namespace viennacl 00039 { 00040 namespace linalg 00041 { 00042 namespace cuda 00043 { 00044 00045 namespace detail 00046 { 00047 00048 } 00049 00051 00052 template <typename T> 00053 __global__ void as_kernel(T * s1, const T * fac2, unsigned int options2, const T * s2) 00054 { 00055 T alpha = *fac2; 00056 if (options2 & (1 << 0)) 00057 alpha = -alpha; 00058 if (options2 & (1 << 1)) 00059 alpha = ((T)(1)) / alpha; 00060 00061 *s1 = *s2 * alpha; 00062 } 00063 00064 template <typename T> 00065 __global__ void as_kernel(T * s1, T fac2, unsigned int options2, const T * s2) 00066 { 00067 T alpha = fac2; 00068 if (options2 & (1 << 0)) 00069 alpha = -alpha; 00070 if (options2 & (1 << 1)) 00071 alpha = ((T)(1)) / alpha; 00072 00073 *s1 = *s2 * alpha; 00074 } 00075 00076 template <typename S1, 00077 typename S2, typename ScalarType1> 00078 typename viennacl::enable_if< viennacl::is_scalar<S1>::value 00079 && viennacl::is_scalar<S2>::value 00080 && viennacl::is_any_scalar<ScalarType1>::value 00081 >::type 00082 as(S1 & s1, 00083 S2 const & s2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha) 00084 { 00085 typedef typename viennacl::result_of::cpu_value_type<S1>::type value_type; 00086 00087 unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha); 00088 00089 value_type temporary_alpha = 0; 00090 if (viennacl::is_cpu_scalar<ScalarType1>::value) 00091 temporary_alpha = alpha; 00092 00093 as_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1), 00094 detail::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)), 00095 options_alpha, 00096 detail::cuda_arg<value_type>(s2)); 00097 VIENNACL_CUDA_LAST_ERROR_CHECK("as_kernel"); 00098 } 00099 00101 00102 // alpha and beta on GPU 00103 template <typename T> 00104 __global__ void asbs_kernel(T * s1, 00105 const T * fac2, unsigned int options2, const T * s2, 00106 const T * fac3, unsigned int options3, const T * s3) 00107 { 00108 T alpha = *fac2; 00109 if (options2 & (1 << 0)) 00110 alpha = -alpha; 00111 if (options2 & (1 << 1)) 00112 alpha = ((T)(1)) / alpha; 00113 00114 T beta = *fac3; 00115 if (options3 & (1 << 0)) 00116 beta = -beta; 00117 if (options3 & (1 << 1)) 00118 beta = ((T)(1)) / beta; 00119 00120 *s1 = *s2 * alpha + *s3 * beta; 00121 } 00122 00123 // alpha on CPU, beta on GPU 00124 template <typename T> 00125 __global__ void asbs_kernel(T * s1, 00126 T fac2, unsigned int options2, const T * s2, 00127 const T * fac3, unsigned int options3, const T * s3) 00128 { 00129 T alpha = fac2; 00130 if (options2 & (1 << 0)) 00131 alpha = -alpha; 00132 if (options2 & (1 << 1)) 00133 alpha = ((T)(1)) / alpha; 00134 00135 T beta = *fac3; 00136 if (options3 & (1 << 0)) 00137 beta = -beta; 00138 if (options3 & (1 << 1)) 00139 beta = ((T)(1)) / beta; 00140 00141 *s1 = *s2 * alpha + *s3 * beta; 00142 } 00143 00144 // alpha on GPU, beta on CPU 00145 template <typename T> 00146 __global__ void asbs_kernel(T * s1, 00147 const T * fac2, unsigned int options2, const T * s2, 00148 T fac3, unsigned int options3, const T * s3) 00149 { 00150 T alpha = *fac2; 00151 if (options2 & (1 << 0)) 00152 alpha = -alpha; 00153 if (options2 & (1 << 1)) 00154 alpha = ((T)(1)) / alpha; 00155 00156 T beta = fac3; 00157 if (options3 & (1 << 0)) 00158 beta = -beta; 00159 if (options3 & (1 << 1)) 00160 beta = ((T)(1)) / beta; 00161 00162 *s1 = *s2 * alpha + *s3 * beta; 00163 } 00164 00165 // alpha and beta on CPU 00166 template <typename T> 00167 __global__ void asbs_kernel(T * s1, 00168 T fac2, unsigned int options2, const T * s2, 00169 T fac3, unsigned int options3, const T * s3) 00170 { 00171 T alpha = fac2; 00172 if (options2 & (1 << 0)) 00173 alpha = -alpha; 00174 if (options2 & (1 << 1)) 00175 alpha = ((T)(1)) / alpha; 00176 00177 T beta = fac3; 00178 if (options3 & (1 << 0)) 00179 beta = -beta; 00180 if (options3 & (1 << 1)) 00181 beta = ((T)(1)) / beta; 00182 00183 *s1 = *s2 * alpha + *s3 * beta; 00184 } 00185 00186 00187 template <typename S1, 00188 typename S2, typename ScalarType1, 00189 typename S3, typename ScalarType2> 00190 typename viennacl::enable_if< viennacl::is_scalar<S1>::value 00191 && viennacl::is_scalar<S2>::value 00192 && viennacl::is_scalar<S3>::value 00193 && viennacl::is_any_scalar<ScalarType1>::value 00194 && viennacl::is_any_scalar<ScalarType2>::value 00195 >::type 00196 asbs(S1 & s1, 00197 S2 const & s2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, 00198 S3 const & s3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) 00199 { 00200 typedef typename viennacl::result_of::cpu_value_type<S1>::type value_type; 00201 00202 unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha); 00203 unsigned int options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta); 00204 00205 value_type temporary_alpha = 0; 00206 if (viennacl::is_cpu_scalar<ScalarType1>::value) 00207 temporary_alpha = alpha; 00208 00209 value_type temporary_beta = 0; 00210 if (viennacl::is_cpu_scalar<ScalarType2>::value) 00211 temporary_beta = beta; 00212 00213 asbs_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1), 00214 detail::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)), 00215 options_alpha, 00216 detail::cuda_arg<value_type>(s2), 00217 detail::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)), 00218 options_beta, 00219 detail::cuda_arg<value_type>(s3) ); 00220 VIENNACL_CUDA_LAST_ERROR_CHECK("asbs_kernel"); 00221 } 00222 00224 00225 // alpha and beta on GPU 00226 template <typename T> 00227 __global__ void asbs_s_kernel(T * s1, 00228 const T * fac2, unsigned int options2, const T * s2, 00229 const T * fac3, unsigned int options3, const T * s3) 00230 { 00231 T alpha = *fac2; 00232 if (options2 & (1 << 0)) 00233 alpha = -alpha; 00234 if (options2 & (1 << 1)) 00235 alpha = ((T)(1)) / alpha; 00236 00237 T beta = *fac3; 00238 if (options3 & (1 << 0)) 00239 beta = -beta; 00240 if (options3 & (1 << 1)) 00241 beta = ((T)(1)) / beta; 00242 00243 *s1 += *s2 * alpha + *s3 * beta; 00244 } 00245 00246 // alpha on CPU, beta on GPU 00247 template <typename T> 00248 __global__ void asbs_s_kernel(T * s1, 00249 T fac2, unsigned int options2, const T * s2, 00250 const T * fac3, unsigned int options3, const T * s3) 00251 { 00252 T alpha = fac2; 00253 if (options2 & (1 << 0)) 00254 alpha = -alpha; 00255 if (options2 & (1 << 1)) 00256 alpha = ((T)(1)) / alpha; 00257 00258 T beta = *fac3; 00259 if (options3 & (1 << 0)) 00260 beta = -beta; 00261 if (options3 & (1 << 1)) 00262 beta = ((T)(1)) / beta; 00263 00264 *s1 += *s2 * alpha + *s3 * beta; 00265 } 00266 00267 // alpha on GPU, beta on CPU 00268 template <typename T> 00269 __global__ void asbs_s_kernel(T * s1, 00270 const T * fac2, unsigned int options2, const T * s2, 00271 T fac3, unsigned int options3, const T * s3) 00272 { 00273 T alpha = *fac2; 00274 if (options2 & (1 << 0)) 00275 alpha = -alpha; 00276 if (options2 & (1 << 1)) 00277 alpha = ((T)(1)) / alpha; 00278 00279 T beta = fac3; 00280 if (options3 & (1 << 0)) 00281 beta = -beta; 00282 if (options3 & (1 << 1)) 00283 beta = ((T)(1)) / beta; 00284 00285 *s1 += *s2 * alpha + *s3 * beta; 00286 } 00287 00288 // alpha and beta on CPU 00289 template <typename T> 00290 __global__ void asbs_s_kernel(T * s1, 00291 T fac2, unsigned int options2, const T * s2, 00292 T fac3, unsigned int options3, const T * s3) 00293 { 00294 T alpha = fac2; 00295 if (options2 & (1 << 0)) 00296 alpha = -alpha; 00297 if (options2 & (1 << 1)) 00298 alpha = ((T)(1)) / alpha; 00299 00300 T beta = fac3; 00301 if (options3 & (1 << 0)) 00302 beta = -beta; 00303 if (options3 & (1 << 1)) 00304 beta = ((T)(1)) / beta; 00305 00306 *s1 += *s2 * alpha + *s3 * beta; 00307 } 00308 00309 00310 template <typename S1, 00311 typename S2, typename ScalarType1, 00312 typename S3, typename ScalarType2> 00313 typename viennacl::enable_if< viennacl::is_scalar<S1>::value 00314 && viennacl::is_scalar<S2>::value 00315 && viennacl::is_scalar<S3>::value 00316 && viennacl::is_any_scalar<ScalarType1>::value 00317 && viennacl::is_any_scalar<ScalarType2>::value 00318 >::type 00319 asbs_s(S1 & s1, 00320 S2 const & s2, ScalarType1 const & alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, 00321 S3 const & s3, ScalarType2 const & beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta) 00322 { 00323 typedef typename viennacl::result_of::cpu_value_type<S1>::type value_type; 00324 00325 unsigned int options_alpha = detail::make_options(len_alpha, reciprocal_alpha, flip_sign_alpha); 00326 unsigned int options_beta = detail::make_options(len_beta, reciprocal_beta, flip_sign_beta); 00327 00328 value_type temporary_alpha = 0; 00329 if (viennacl::is_cpu_scalar<ScalarType1>::value) 00330 temporary_alpha = alpha; 00331 00332 value_type temporary_beta = 0; 00333 if (viennacl::is_cpu_scalar<ScalarType2>::value) 00334 temporary_beta = beta; 00335 00336 std::cout << "Launching asbs_s_kernel..." << std::endl; 00337 asbs_s_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1), 00338 detail::cuda_arg<value_type>(detail::arg_reference(alpha, temporary_alpha)), 00339 options_alpha, 00340 detail::cuda_arg<value_type>(s2), 00341 detail::cuda_arg<value_type>(detail::arg_reference(beta, temporary_beta)), 00342 options_beta, 00343 detail::cuda_arg<value_type>(s3) ); 00344 VIENNACL_CUDA_LAST_ERROR_CHECK("asbs_s_kernel"); 00345 } 00346 00348 00349 template <typename T> 00350 __global__ void scalar_swap_kernel(T * s1, T * s2) 00351 { 00352 T tmp = *s2; 00353 *s2 = *s1; 00354 *s1 = tmp; 00355 } 00356 00362 template <typename S1, typename S2> 00363 typename viennacl::enable_if< viennacl::is_scalar<S1>::value 00364 && viennacl::is_scalar<S2>::value 00365 >::type 00366 swap(S1 & s1, S2 & s2) 00367 { 00368 typedef typename viennacl::result_of::cpu_value_type<S1>::type value_type; 00369 00370 scalar_swap_kernel<<<1, 1>>>(detail::cuda_arg<value_type>(s1),detail::cuda_arg<value_type>(s2)); 00371 } 00372 00373 00374 00375 } //namespace single_threaded 00376 } //namespace linalg 00377 } //namespace viennacl 00378 00379 00380 #endif