ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP 00002 #define VIENNACL_LINALG_OPENCL_KERNELS_SCALAR_HPP 00003 00004 #include "viennacl/tools/tools.hpp" 00005 #include "viennacl/ocl/kernel.hpp" 00006 #include "viennacl/ocl/platform.hpp" 00007 #include "viennacl/ocl/utils.hpp" 00008 00011 namespace viennacl 00012 { 00013 namespace linalg 00014 { 00015 namespace opencl 00016 { 00017 namespace kernels 00018 { 00019 00021 00023 enum asbs_scalar_type 00024 { 00025 VIENNACL_ASBS_NONE = 0, // scalar does not exist/contribute 00026 VIENNACL_ASBS_CPU, 00027 VIENNACL_ASBS_GPU 00028 }; 00029 00031 struct asbs_config 00032 { 00033 asbs_config() : with_stride_and_range(true), a(VIENNACL_ASBS_CPU), b(VIENNACL_ASBS_NONE) {} 00034 00035 bool with_stride_and_range; 00036 std::string assign_op; 00037 asbs_scalar_type a; 00038 asbs_scalar_type b; 00039 }; 00040 00041 // just returns the assignment string 00042 template <typename StringType> 00043 void generate_asbs_impl3(StringType & source, char sign_a, char sign_b, asbs_config const & cfg, bool mult_alpha, bool mult_beta) 00044 { 00045 source.append(" *s1 "); source.append(cfg.assign_op); source.append(1, sign_a); source.append(" *s2 "); 00046 if (mult_alpha) 00047 source.append("* alpha "); 00048 else 00049 source.append("/ alpha "); 00050 if (cfg.b != VIENNACL_ASBS_NONE) 00051 { 00052 source.append(1, sign_b); source.append(" *s3 "); 00053 if (mult_beta) 00054 source.append("* beta"); 00055 else 00056 source.append("/ beta"); 00057 } 00058 source.append("; \n"); 00059 } 00060 00061 template <typename StringType> 00062 void generate_asbs_impl2(StringType & source, char sign_a, char sign_b, asbs_config const & cfg) 00063 { 00064 source.append(" if (options2 & (1 << 1)) { \n"); 00065 if (cfg.b != VIENNACL_ASBS_NONE) 00066 { 00067 source.append(" if (options3 & (1 << 1)) \n"); 00068 generate_asbs_impl3(source, sign_a, sign_b, cfg, false, false); 00069 source.append(" else \n"); 00070 generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true); 00071 } 00072 else 00073 generate_asbs_impl3(source, sign_a, sign_b, cfg, false, true); 00074 source.append(" } else { \n"); 00075 if (cfg.b != VIENNACL_ASBS_NONE) 00076 { 00077 source.append(" if (options3 & (1 << 1)) \n"); 00078 generate_asbs_impl3(source, sign_a, sign_b, cfg, true, false); 00079 source.append(" else \n"); 00080 generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true); 00081 } 00082 else 00083 generate_asbs_impl3(source, sign_a, sign_b, cfg, true, true); 00084 source.append(" } \n"); 00085 00086 } 00087 00088 template <typename StringType> 00089 void generate_asbs_impl(StringType & source, std::string const & numeric_string, asbs_config const & cfg) 00090 { 00091 source.append("__kernel void as"); 00092 if (cfg.b != VIENNACL_ASBS_NONE) 00093 source.append("bs"); 00094 if (cfg.assign_op != "=") 00095 source.append("_s"); 00096 00097 if (cfg.a == VIENNACL_ASBS_CPU) 00098 source.append("_cpu"); 00099 else if (cfg.a == VIENNACL_ASBS_GPU) 00100 source.append("_gpu"); 00101 00102 if (cfg.b == VIENNACL_ASBS_CPU) 00103 source.append("_cpu"); 00104 else if (cfg.b == VIENNACL_ASBS_GPU) 00105 source.append("_gpu"); 00106 source.append("( \n"); 00107 source.append(" __global "); source.append(numeric_string); source.append(" * s1, \n"); 00108 source.append(" \n"); 00109 if (cfg.a == VIENNACL_ASBS_CPU) 00110 { 00111 source.append(" "); source.append(numeric_string); source.append(" fac2, \n"); 00112 } 00113 else if (cfg.a == VIENNACL_ASBS_GPU) 00114 { 00115 source.append(" __global "); source.append(numeric_string); source.append(" * fac2, \n"); 00116 } 00117 source.append(" unsigned int options2, \n"); // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse 00118 source.append(" __global const "); source.append(numeric_string); source.append(" * s2"); 00119 00120 if (cfg.b != VIENNACL_ASBS_NONE) 00121 { 00122 source.append(", \n\n"); 00123 if (cfg.b == VIENNACL_ASBS_CPU) 00124 { 00125 source.append(" "); source.append(numeric_string); source.append(" fac3, \n"); 00126 } 00127 else if (cfg.b == VIENNACL_ASBS_GPU) 00128 { 00129 source.append(" __global "); source.append(numeric_string); source.append(" * fac3, \n"); 00130 } 00131 source.append(" unsigned int options3, \n"); // 0: no action, 1: flip sign, 2: take inverse, 3: flip sign and take inverse 00132 source.append(" __global const "); source.append(numeric_string); source.append(" * s3"); 00133 } 00134 source.append(") \n{ \n"); 00135 00136 if (cfg.a == VIENNACL_ASBS_CPU) 00137 { 00138 source.append(" "); source.append(numeric_string); source.append(" alpha = fac2; \n"); 00139 } 00140 else if (cfg.a == VIENNACL_ASBS_GPU) 00141 { 00142 source.append(" "); source.append(numeric_string); source.append(" alpha = fac2[0]; \n"); 00143 } 00144 source.append(" \n"); 00145 00146 if (cfg.b == VIENNACL_ASBS_CPU) 00147 { 00148 source.append(" "); source.append(numeric_string); source.append(" beta = fac3; \n"); 00149 } 00150 else if (cfg.b == VIENNACL_ASBS_GPU) 00151 { 00152 source.append(" "); source.append(numeric_string); source.append(" beta = fac3[0]; \n"); 00153 } 00154 00155 source.append(" if (options2 & (1 << 0)) { \n"); 00156 if (cfg.b != VIENNACL_ASBS_NONE) 00157 { 00158 source.append(" if (options3 & (1 << 0)) { \n"); 00159 generate_asbs_impl2(source, '-', '-', cfg); 00160 source.append(" } else { \n"); 00161 generate_asbs_impl2(source, '-', '+', cfg); 00162 source.append(" } \n"); 00163 } 00164 else 00165 generate_asbs_impl2(source, '-', '+', cfg); 00166 source.append(" } else { \n"); 00167 if (cfg.b != VIENNACL_ASBS_NONE) 00168 { 00169 source.append(" if (options3 & (1 << 0)) { \n"); 00170 generate_asbs_impl2(source, '+', '-', cfg); 00171 source.append(" } else { \n"); 00172 generate_asbs_impl2(source, '+', '+', cfg); 00173 source.append(" } \n"); 00174 } 00175 else 00176 generate_asbs_impl2(source, '+', '+', cfg); 00177 00178 source.append(" } \n"); 00179 source.append("} \n"); 00180 } 00181 00182 template <typename StringType> 00183 void generate_asbs(StringType & source, std::string const & numeric_string) 00184 { 00185 asbs_config cfg; 00186 cfg.assign_op = "="; 00187 cfg.with_stride_and_range = true; 00188 00189 // as 00190 cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg); 00191 cfg.b = VIENNACL_ASBS_NONE; cfg.a = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg); 00192 00193 // asbs 00194 cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg); 00195 cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg); 00196 cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg); 00197 cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg); 00198 00199 // asbs 00200 cfg.assign_op = "+="; 00201 00202 cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg); 00203 cfg.a = VIENNACL_ASBS_CPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg); 00204 cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_CPU; generate_asbs_impl(source, numeric_string, cfg); 00205 cfg.a = VIENNACL_ASBS_GPU; cfg.b = VIENNACL_ASBS_GPU; generate_asbs_impl(source, numeric_string, cfg); 00206 } 00207 00208 template <typename StringType> 00209 void generate_scalar_swap(StringType & source, std::string const & numeric_string) 00210 { 00211 source.append("__kernel void swap( \n"); 00212 source.append(" __global "); source.append(numeric_string); source.append(" * s1, \n"); 00213 source.append(" __global "); source.append(numeric_string); source.append(" * s2) \n"); 00214 source.append("{ \n"); 00215 source.append(" "); source.append(numeric_string); source.append(" tmp = *s2; \n"); 00216 source.append(" *s2 = *s1; \n"); 00217 source.append(" *s1 = tmp; \n"); 00218 source.append("} \n"); 00219 } 00220 00222 00223 // main kernel class 00225 template <class TYPE> 00226 struct scalar 00227 { 00228 static std::string program_name() 00229 { 00230 return viennacl::ocl::type_to_string<TYPE>::apply() + "_scalar"; 00231 } 00232 00233 static void init(viennacl::ocl::context & ctx) 00234 { 00235 viennacl::ocl::DOUBLE_PRECISION_CHECKER<TYPE>::apply(ctx); 00236 std::string numeric_string = viennacl::ocl::type_to_string<TYPE>::apply(); 00237 00238 static std::map<cl_context, bool> init_done; 00239 if (!init_done[ctx.handle().get()]) 00240 { 00241 std::string source; 00242 source.reserve(8192); 00243 00244 viennacl::ocl::append_double_precision_pragma<TYPE>(ctx, source); 00245 00246 // fully parametrized kernels: 00247 generate_asbs(source, numeric_string); 00248 generate_scalar_swap(source, numeric_string); 00249 00250 00251 std::string prog_name = program_name(); 00252 #ifdef VIENNACL_BUILD_INFO 00253 std::cout << "Creating program " << prog_name << std::endl; 00254 #endif 00255 ctx.add_program(source, prog_name); 00256 init_done[ctx.handle().get()] = true; 00257 } //if 00258 } //init 00259 }; 00260 00261 } // namespace kernels 00262 } // namespace opencl 00263 } // namespace linalg 00264 } // namespace viennacl 00265 #endif 00266