ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP 00002 #define VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP 00003 00004 #include "viennacl/tools/tools.hpp" 00005 #include "viennacl/ocl/kernel.hpp" 00006 #include "viennacl/ocl/platform.hpp" 00007 #include "viennacl/ocl/utils.hpp" 00008 00011 namespace viennacl 00012 { 00013 namespace linalg 00014 { 00015 namespace opencl 00016 { 00017 namespace kernels 00018 { 00019 00021 00022 00023 // Postprocessing phase of Bluestein algorithm 00024 template <typename StringType> 00025 void generate_fft_bluestein_post(StringType & source, std::string const & numeric_string) 00026 { 00027 source.append("__kernel void bluestein_post(__global "); source.append(numeric_string); source.append("2 *Z, \n"); 00028 source.append(" __global "); source.append(numeric_string); source.append("2 *out, \n"); 00029 source.append(" unsigned int size) \n"); 00030 source.append("{ \n"); 00031 source.append(" unsigned int glb_id = get_global_id(0); \n"); 00032 source.append(" unsigned int glb_sz = get_global_size(0); \n"); 00033 00034 source.append(" unsigned int double_size = size << 1; \n"); 00035 source.append(" "); source.append(numeric_string); source.append(" sn_a, cs_a; \n"); 00036 source.append(" const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n"); 00037 00038 source.append(" for(unsigned int i = glb_id; i < size; i += glb_sz) { \n"); 00039 source.append(" unsigned int rm = i * i % (double_size); \n"); 00040 source.append(" "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * (-NUM_PI); \n"); 00041 00042 source.append(" sn_a = sincos(angle, &cs_a); \n"); 00043 00044 source.append(" "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n"); 00045 source.append(" out[i] = ("); source.append(numeric_string); source.append("2)(Z[i].x * b_i.x - Z[i].y * b_i.y, Z[i].x * b_i.y + Z[i].y * b_i.x); \n"); 00046 source.append(" } \n"); 00047 source.append("} \n"); 00048 } 00049 00050 // Preprocessing phase of Bluestein algorithm 00051 template <typename StringType> 00052 void generate_fft_bluestein_pre(StringType & source, std::string const & numeric_string) 00053 { 00054 source.append("__kernel void bluestein_pre(__global "); source.append(numeric_string); source.append("2 *input, \n"); 00055 source.append(" __global "); source.append(numeric_string); source.append("2 *A, \n"); 00056 source.append(" __global "); source.append(numeric_string); source.append("2 *B, \n"); 00057 source.append(" unsigned int size, \n"); 00058 source.append(" unsigned int ext_size \n"); 00059 source.append(" ) { \n"); 00060 source.append(" unsigned int glb_id = get_global_id(0); \n"); 00061 source.append(" unsigned int glb_sz = get_global_size(0); \n"); 00062 00063 source.append(" unsigned int double_size = size << 1; \n"); 00064 00065 source.append(" "); source.append(numeric_string); source.append(" sn_a, cs_a; \n"); 00066 source.append(" const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n"); 00067 00068 source.append(" for(unsigned int i = glb_id; i < size; i += glb_sz) { \n"); 00069 source.append(" unsigned int rm = i * i % (double_size); \n"); 00070 source.append(" "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * NUM_PI; \n"); 00071 00072 source.append(" sn_a = sincos(-angle, &cs_a); \n"); 00073 00074 source.append(" "); source.append(numeric_string); source.append("2 a_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n"); 00075 source.append(" "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, -sn_a); \n"); 00076 00077 source.append(" A[i] = ("); source.append(numeric_string); source.append("2)(input[i].x * a_i.x - input[i].y * a_i.y, input[i].x * a_i.y + input[i].y * a_i.x); \n"); 00078 source.append(" B[i] = b_i; \n"); 00079 00080 // very bad instruction, to be fixed 00081 source.append(" if(i) \n"); 00082 source.append(" B[ext_size - i] = b_i; \n"); 00083 source.append(" } \n"); 00084 source.append("} \n"); 00085 } 00086 00088 template <typename StringType> 00089 void generate_fft_complex_to_real(StringType & source, std::string const & numeric_string) 00090 { 00091 source.append("__kernel void complex_to_real(__global "); source.append(numeric_string); source.append("2 *in, \n"); 00092 source.append(" __global "); source.append(numeric_string); source.append(" *out, \n"); 00093 source.append(" unsigned int size) { \n"); 00094 source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n"); 00095 source.append(" out[i] = in[i].x; \n"); 00096 source.append("} \n"); 00097 } 00098 00100 template <typename StringType> 00101 void generate_fft_div_vec_scalar(StringType & source, std::string const & numeric_string) 00102 { 00103 source.append("__kernel void fft_div_vec_scalar(__global "); source.append(numeric_string); source.append("2 *input1, \n"); 00104 source.append(" unsigned int size, \n"); 00105 source.append(" "); source.append(numeric_string); source.append(" factor) { \n"); 00106 source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n"); 00107 source.append(" input1[i] /= factor; \n"); 00108 source.append("} \n"); 00109 } 00110 00112 template <typename StringType> 00113 void generate_fft_mult_vec(StringType & source, std::string const & numeric_string) 00114 { 00115 source.append("__kernel void fft_mult_vec(__global const "); source.append(numeric_string); source.append("2 *input1, \n"); 00116 source.append(" __global const "); source.append(numeric_string); source.append("2 *input2, \n"); 00117 source.append(" __global "); source.append(numeric_string); source.append("2 *output, \n"); 00118 source.append(" unsigned int size) { \n"); 00119 source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n"); 00120 source.append(" "); source.append(numeric_string); source.append("2 in1 = input1[i]; \n"); 00121 source.append(" "); source.append(numeric_string); source.append("2 in2 = input2[i]; \n"); 00122 00123 source.append(" output[i] = ("); source.append(numeric_string); source.append("2)(in1.x * in2.x - in1.y * in2.y, in1.x * in2.y + in1.y * in2.x); \n"); 00124 source.append(" } \n"); 00125 source.append("} \n"); 00126 } 00127 00129 template <typename StringType> 00130 void generate_fft_real_to_complex(StringType & source, std::string const & numeric_string) 00131 { 00132 source.append("__kernel void real_to_complex(__global "); source.append(numeric_string); source.append(" *in, \n"); 00133 source.append(" __global "); source.append(numeric_string); source.append("2 *out, \n"); 00134 source.append(" unsigned int size) { \n"); 00135 source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n"); 00136 source.append(" "); source.append(numeric_string); source.append("2 val = 0; \n"); 00137 source.append(" val.x = in[i]; \n"); 00138 source.append(" out[i] = val; \n"); 00139 source.append(" } \n"); 00140 source.append("} \n"); 00141 } 00142 00144 template <typename StringType> 00145 void generate_fft_reverse_inplace(StringType & source, std::string const & numeric_string) 00146 { 00147 source.append("__kernel void reverse_inplace(__global "); source.append(numeric_string); source.append(" *vec, uint size) { \n"); 00148 source.append(" for(uint i = get_global_id(0); i < (size >> 1); i+=get_global_size(0)) { \n"); 00149 source.append(" "); source.append(numeric_string); source.append(" val1 = vec[i]; \n"); 00150 source.append(" "); source.append(numeric_string); source.append(" val2 = vec[size - i - 1]; \n"); 00151 00152 source.append(" vec[i] = val2; \n"); 00153 source.append(" vec[size - i - 1] = val1; \n"); 00154 source.append(" } \n"); 00155 source.append("} \n"); 00156 } 00157 00159 template <typename StringType> 00160 void generate_fft_transpose(StringType & source, std::string const & numeric_string) 00161 { 00162 source.append("__kernel void transpose(__global "); source.append(numeric_string); source.append("2 *input, \n"); 00163 source.append(" __global "); source.append(numeric_string); source.append("2 *output, \n"); 00164 source.append(" unsigned int row_num, \n"); 00165 source.append(" unsigned int col_num) { \n"); 00166 source.append(" unsigned int size = row_num * col_num; \n"); 00167 source.append(" for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n"); 00168 source.append(" unsigned int row = i / col_num; \n"); 00169 source.append(" unsigned int col = i - row*col_num; \n"); 00170 00171 source.append(" unsigned int new_pos = col * row_num + row; \n"); 00172 00173 source.append(" output[new_pos] = input[i]; \n"); 00174 source.append(" } \n"); 00175 source.append("} \n"); 00176 } 00177 00179 template <typename StringType> 00180 void generate_fft_transpose_inplace(StringType & source, std::string const & numeric_string) 00181 { 00182 source.append("__kernel void transpose_inplace(__global "); source.append(numeric_string); source.append("2* input, \n"); 00183 source.append(" unsigned int row_num, \n"); 00184 source.append(" unsigned int col_num) { \n"); 00185 source.append(" unsigned int size = row_num * col_num; \n"); 00186 source.append(" for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n"); 00187 source.append(" unsigned int row = i / col_num; \n"); 00188 source.append(" unsigned int col = i - row*col_num; \n"); 00189 00190 source.append(" unsigned int new_pos = col * row_num + row; \n"); 00191 00192 source.append(" if(i < new_pos) { \n"); 00193 source.append(" "); source.append(numeric_string); source.append("2 val = input[i]; \n"); 00194 source.append(" input[i] = input[new_pos]; \n"); 00195 source.append(" input[new_pos] = val; \n"); 00196 source.append(" } \n"); 00197 source.append(" } \n"); 00198 source.append("} \n"); 00199 } 00200 00202 template <typename StringType> 00203 void generate_fft_vandermonde_prod(StringType & source, std::string const & numeric_string) 00204 { 00205 source.append("__kernel void vandermonde_prod(__global "); source.append(numeric_string); source.append(" *vander, \n"); 00206 source.append(" __global "); source.append(numeric_string); source.append(" *vector, \n"); 00207 source.append(" __global "); source.append(numeric_string); source.append(" *result, \n"); 00208 source.append(" uint size) { \n"); 00209 source.append(" for(uint i = get_global_id(0); i < size; i+= get_global_size(0)) { \n"); 00210 source.append(" "); source.append(numeric_string); source.append(" mul = vander[i]; \n"); 00211 source.append(" "); source.append(numeric_string); source.append(" pwr = 1; \n"); 00212 source.append(" "); source.append(numeric_string); source.append(" val = 0; \n"); 00213 00214 source.append(" for(uint j = 0; j < size; j++) { \n"); 00215 source.append(" val = val + pwr * vector[j]; \n"); 00216 source.append(" pwr *= mul; \n"); 00217 source.append(" } \n"); 00218 00219 source.append(" result[i] = val; \n"); 00220 source.append(" } \n"); 00221 source.append("} \n"); 00222 } 00223 00225 template <typename StringType> 00226 void generate_fft_zero2(StringType & source, std::string const & numeric_string) 00227 { 00228 source.append("__kernel void zero2(__global "); source.append(numeric_string); source.append("2 *input1, \n"); 00229 source.append(" __global "); source.append(numeric_string); source.append("2 *input2, \n"); 00230 source.append(" unsigned int size) { \n"); 00231 source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n"); 00232 source.append(" input1[i] = 0; \n"); 00233 source.append(" input2[i] = 0; \n"); 00234 source.append(" } \n"); 00235 source.append("} \n"); 00236 } 00237 00239 00240 // main kernel class 00242 template <class NumericT> 00243 struct fft 00244 { 00245 static std::string program_name() 00246 { 00247 return viennacl::ocl::type_to_string<NumericT>::apply() + "_fft"; 00248 } 00249 00250 static void init(viennacl::ocl::context & ctx) 00251 { 00252 viennacl::ocl::DOUBLE_PRECISION_CHECKER<NumericT>::apply(ctx); 00253 std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply(); 00254 00255 static std::map<cl_context, bool> init_done; 00256 if (!init_done[ctx.handle().get()]) 00257 { 00258 std::string source; 00259 source.reserve(8192); 00260 00261 viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source); 00262 00263 // unary operations 00264 if (numeric_string == "float" || numeric_string == "double") 00265 { 00266 generate_fft_bluestein_post(source, numeric_string); 00267 generate_fft_bluestein_pre(source, numeric_string); 00268 generate_fft_complex_to_real(source, numeric_string); 00269 generate_fft_div_vec_scalar(source, numeric_string); 00270 generate_fft_mult_vec(source, numeric_string); 00271 generate_fft_real_to_complex(source, numeric_string); 00272 generate_fft_reverse_inplace(source, numeric_string); 00273 generate_fft_transpose(source, numeric_string); 00274 generate_fft_transpose_inplace(source, numeric_string); 00275 generate_fft_vandermonde_prod(source, numeric_string); 00276 generate_fft_zero2(source, numeric_string); 00277 } 00278 00279 std::string prog_name = program_name(); 00280 #ifdef VIENNACL_BUILD_INFO 00281 std::cout << "Creating program " << prog_name << std::endl; 00282 #endif 00283 ctx.add_program(source, prog_name); 00284 init_done[ctx.handle().get()] = true; 00285 } //if 00286 } //init 00287 }; 00288 00289 } // namespace kernels 00290 } // namespace opencl 00291 } // namespace linalg 00292 } // namespace viennacl 00293 #endif 00294