ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_GENERATOR_GENERATE_HPP 00002 #define VIENNACL_GENERATOR_GENERATE_HPP 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00021 00026 #include <cstring> 00027 #include <vector> 00028 #include <typeinfo> 00029 00030 #include "viennacl/scheduler/forwards.h" 00031 #include "viennacl/generator/forwards.h" 00032 00033 #include "viennacl/generator/profiles.hpp" 00034 #include "viennacl/generator/statement_representation_functor.hpp" 00035 #include "viennacl/generator/set_arguments_functor.hpp" 00036 #include "viennacl/generator/map_functor.hpp" 00037 00038 #include "viennacl/tools/tools.hpp" 00039 00040 namespace viennacl{ 00041 00042 namespace generator{ 00043 00047 class code_generator{ 00048 public: 00050 typedef std::pair<expression_type, vcl_size_t> forced_profile_key_type; 00051 private: 00052 typedef std::pair<expression_descriptor, generator::profile_base::statements_type> representation_node_type; 00053 typedef std::vector<representation_node_type> statements_type; 00054 typedef std::map<forced_profile_key_type, tools::shared_ptr<profile_base> > forced_profiles_type; 00055 00060 static bool is_flow_transposed(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node){ 00061 viennacl::scheduler::statement::container_type const & expr = statement.array(); 00062 if(root_node.op.type==viennacl::scheduler::OPERATION_UNARY_TRANS_TYPE) 00063 return root_node.lhs.subtype==viennacl::scheduler::DENSE_ROW_MATRIX_TYPE; 00064 else{ 00065 bool res = root_node.lhs.subtype==viennacl::scheduler::DENSE_COL_MATRIX_TYPE || root_node.rhs.subtype==viennacl::scheduler::DENSE_COL_MATRIX_TYPE; 00066 if(root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00067 res = res || is_lhs_flow_transposed(statement, expr[root_node.lhs.node_index]); 00068 if(root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00069 res = res || is_lhs_flow_transposed(statement, expr[root_node.rhs.node_index]); 00070 return res; 00071 } 00072 } 00073 00075 static bool is_lhs_flow_transposed(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node){ 00076 scheduler::statement::container_type const & expr = statement.array(); 00077 if(root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00078 return is_flow_transposed(statement, expr[root_node.lhs.node_index]); 00079 else 00080 return root_node.lhs.subtype==viennacl::scheduler::DENSE_COL_MATRIX_TYPE; 00081 } 00082 00084 static bool is_rhs_flow_transposed(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node){ 00085 viennacl::scheduler::statement::container_type const & expr = statement.array(); 00086 if(root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00087 return is_flow_transposed(statement, expr[root_node.rhs.node_index]); 00088 else 00089 return root_node.rhs.subtype==viennacl::scheduler::DENSE_COL_MATRIX_TYPE; 00090 } 00091 00093 static void fill_expression_descriptor_scalar(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, expression_descriptor & descriptor){ 00094 viennacl::scheduler::statement::container_type const & expr = statement.array(); 00095 bool is_invalid = (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE) 00096 || (descriptor.type_family==SCALAR_REDUCE_FAMILY && root_node.op.type == viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE); 00097 if(is_invalid){ 00098 descriptor.type_family = INVALID_EXPRESSION_FAMILY; 00099 descriptor.type = INVALID_EXPRESSION_TYPE; 00100 } 00101 else if(root_node.op.type==viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE){ 00102 descriptor.type_family = SCALAR_REDUCE_FAMILY; 00103 descriptor.type = SCALAR_REDUCE_TYPE; 00104 } 00105 if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00106 fill_expression_descriptor_scalar(statement, expr[root_node.lhs.node_index],descriptor); 00107 if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00108 fill_expression_descriptor_scalar(statement, expr[root_node.rhs.node_index],descriptor); 00109 } 00110 00112 static void fill_expression_descriptor_vector(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, expression_descriptor & descriptor){ 00113 viennacl::scheduler::statement::container_type const & expr = statement.array(); 00114 bool is_invalid = (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE) 00115 || (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE) 00116 || (descriptor.type_family==VECTOR_REDUCE_FAMILY && root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE); 00117 if(is_invalid){ 00118 descriptor.type_family=INVALID_EXPRESSION_FAMILY; 00119 descriptor.type=INVALID_EXPRESSION_TYPE; 00120 } 00121 else if(root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE){ 00122 descriptor.type_family=VECTOR_REDUCE_FAMILY; 00123 if(is_lhs_flow_transposed(statement,root_node)) 00124 descriptor.type=VECTOR_REDUCE_Tx_TYPE; 00125 else 00126 descriptor.type=VECTOR_REDUCE_Nx_TYPE; 00127 } 00128 if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00129 fill_expression_descriptor_vector(statement, expr[root_node.lhs.node_index],descriptor); 00130 if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00131 fill_expression_descriptor_vector(statement, expr[root_node.rhs.node_index],descriptor); 00132 } 00133 00135 static void fill_expression_descriptor_matrix(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, expression_descriptor & descriptor){ 00136 viennacl::scheduler::statement::container_type const & expr = statement.array(); 00137 bool is_invalid = (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_INNER_PROD_TYPE) 00138 || (root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_VEC_PROD_TYPE) 00139 || (descriptor.type_family==MATRIX_PRODUCT_FAMILY && root_node.op.type == viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE); 00140 if(is_invalid){ 00141 descriptor.type_family=INVALID_EXPRESSION_FAMILY; 00142 descriptor.type=INVALID_EXPRESSION_TYPE; 00143 } 00144 else if(root_node.op.type==viennacl::scheduler::OPERATION_BINARY_MAT_MAT_PROD_TYPE){ 00145 descriptor.type_family=MATRIX_PRODUCT_FAMILY; 00146 bool lhs_trans = is_lhs_flow_transposed(statement,root_node); 00147 bool rhs_trans = is_rhs_flow_transposed(statement,root_node); 00148 if(!lhs_trans && !rhs_trans) 00149 descriptor.type=MATRIX_PRODUCT_NN_TYPE; 00150 else if(lhs_trans && !rhs_trans) 00151 descriptor.type=MATRIX_PRODUCT_TN_TYPE; 00152 else if(!lhs_trans && rhs_trans) 00153 descriptor.type=MATRIX_PRODUCT_NT_TYPE; 00154 else if(lhs_trans && rhs_trans) 00155 descriptor.type=MATRIX_PRODUCT_TT_TYPE; 00156 00157 } 00158 if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.lhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00159 fill_expression_descriptor_matrix(statement, expr[root_node.lhs.node_index],descriptor); 00160 if(descriptor.type_family!=INVALID_EXPRESSION_FAMILY && root_node.rhs.type_family==viennacl::scheduler::COMPOSITE_OPERATION_FAMILY) 00161 fill_expression_descriptor_matrix(statement, expr[root_node.rhs.node_index],descriptor); 00162 } 00163 00165 void fill_descriptor(viennacl::scheduler::statement const & statement, viennacl::scheduler::statement_node const & root_node, expression_descriptor & descriptor){ 00166 viennacl::scheduler::statement_node_type_family lhs_family = root_node.lhs.type_family; 00167 descriptor.scalartype_size = utils::call_on_element(root_node.lhs, utils::scalartype_size_fun()); 00168 if(lhs_family==viennacl::scheduler::VECTOR_TYPE_FAMILY){ 00169 descriptor.type_family = VECTOR_SAXPY_FAMILY; 00170 descriptor.type = VECTOR_SAXPY_TYPE; 00171 fill_expression_descriptor_vector(statement,root_node,descriptor); 00172 } 00173 else if(lhs_family==viennacl::scheduler::MATRIX_TYPE_FAMILY){ 00174 descriptor.type_family = MATRIX_SAXPY_FAMILY; 00175 descriptor.type = MATRIX_SAXPY_TYPE; 00176 fill_expression_descriptor_matrix(statement,root_node,descriptor); 00177 } 00178 else if(lhs_family==viennacl::scheduler::SCALAR_TYPE_FAMILY){ 00179 descriptor.type_family = SCALAR_SAXPY_FAMILY; 00180 descriptor.type = SCALAR_SAXPY_TYPE; 00181 fill_expression_descriptor_scalar(statement,root_node,descriptor); 00182 } 00183 } 00184 00189 template<class StatementsType> 00190 void set_expression_arguments(profile_base const & profile, unsigned int device_offset, StatementsType const & statements, unsigned int & kernel_id, viennacl::ocl::program & p, std::list<viennacl::ocl::kernel *> & kernels) const { 00191 for(vcl_size_t i = 0 ; i < profile.num_kernels() ; ++i){ 00192 //add kernel name 00193 char str[32]; 00194 std::sprintf(str,"kernel_%d_%d",device_offset,kernel_id); 00195 viennacl::ocl::kernel & kernel = p.get_kernel(str); 00196 kernels.push_back(&kernel); 00197 unsigned int current_arg = 0; 00198 //Configure ND Range and enqueue arguments 00199 profile.configure_range_enqueue_arguments(i, statements, kernel, current_arg); 00200 std::set<void *> memory; 00201 for(typename StatementsType::const_iterator it = statements.begin() ; it != statements.end() ; ++it){ 00202 detail::traverse(it->first, it->second, detail::set_arguments_functor(memory,current_arg,kernel)); 00203 } 00204 ++kernel_id; 00205 } 00206 } 00207 00209 profile_base const & get_profile(viennacl::ocl::device const & device, expression_descriptor const & descriptor) const { 00210 forced_profiles_type::const_iterator it = forced_profiles_.find(std::make_pair(descriptor.type, descriptor.scalartype_size)); 00211 if(it != forced_profiles_.end()) 00212 return *it->second; 00213 return *profiles::get(device,descriptor); 00214 } 00215 00216 public: 00217 00219 code_generator(viennacl::ocl::context const & ctx = viennacl::ocl::current_context()) : ctx_(ctx){ 00220 statements_.reserve(16); 00221 } 00222 00224 template<class T> 00225 void force_profile(forced_profile_key_type key, T const & t){ 00226 forced_profiles_.insert(std::pair<forced_profile_key_type, tools::shared_ptr<profile_base> >(key, tools::shared_ptr<profile_base>(new T(t)))); 00227 } 00228 00232 bool add(scheduler::statement const & statement, scheduler::statement_node const & root_node) { 00233 expression_descriptor descriptor; 00234 fill_descriptor(statement, root_node, descriptor); 00235 if(descriptor.type_family==INVALID_EXPRESSION_FAMILY) 00236 return false; 00237 if(statements_.empty()) 00238 statements_.push_back(std::make_pair(descriptor,profile_base::statements_type(1,std::make_pair(statement, root_node)))); 00239 else 00240 if(statements_.back().first == descriptor) 00241 statements_.back().second.push_back(std::make_pair(statement, root_node)); 00242 else 00243 statements_.push_back(std::make_pair(descriptor,profile_base::statements_type(1,std::make_pair(statement, root_node)))); 00244 return true; 00245 } 00246 00248 void configure_program(viennacl::ocl::program & p, std::list<viennacl::ocl::kernel *> & kernels) const { 00249 unsigned int kernel_id = 0; 00250 std::vector<viennacl::ocl::device>::const_iterator found = std::find(ctx_.devices().begin(),ctx_.devices().end(),ctx_.current_device()); 00251 for(statements_type::const_iterator it = statements_.begin() ; it != statements_.end() ; ++it) 00252 set_expression_arguments(get_profile(ctx_.current_device(), it->first), static_cast<unsigned int>(std::distance(ctx_.devices().begin(), found)), it->second, kernel_id, p, kernels); 00253 } 00254 00256 void make_program_name(char * program_name) const { 00257 unsigned int current_arg = 0; 00258 void* memory[64] = {NULL}; 00259 for(statements_type::const_iterator it = statements_.begin() ; it != statements_.end() ; ++it){ 00260 for(profile_base::statements_type::const_iterator iit = it->second.begin() ; iit != it->second.end() ; ++iit){ 00261 detail::traverse(iit->first, iit->second, detail::statement_representation_functor(memory, current_arg, program_name)); 00262 } 00263 } 00264 *program_name='\0'; 00265 } 00266 00268 std::string make_opencl_program_string() const { 00269 utils::kernel_generation_stream stream; 00270 00271 //Headers generation 00272 stream << "#if defined(cl_khr_fp64)\n"; 00273 stream << "# pragma OPENCL EXTENSION cl_khr_fp64: enable\n"; 00274 stream << "#elif defined(cl_amd_fp64)\n"; 00275 stream << "# pragma OPENCL EXTENSION cl_amd_fp64: enable\n"; 00276 stream << "#endif\n"; 00277 stream << std::endl; 00278 00279 vcl_size_t device_offset =0; 00280 for(std::vector<viennacl::ocl::device>::const_iterator it = ctx_.devices().begin() ; it != ctx_.devices().end() ; ++it) 00281 for(statements_type::const_iterator iit = statements_.begin() ; iit != statements_.end() ; ++iit) 00282 get_profile(*it,iit->first)(stream,device_offset++,iit->second); 00283 00284 return stream.str(); 00285 } 00286 00291 std::string make_cuda_program_string() const { 00292 //Creates OpenCL string with #ifdef and attributes 00293 utils::kernel_generation_stream stream; 00294 vcl_size_t device_offset =0; 00295 for(std::vector<viennacl::ocl::device>::const_iterator it = ctx_.devices().begin() ; it != ctx_.devices().end() ; ++it) 00296 for(statements_type::const_iterator iit = statements_.begin() ; iit != statements_.end() ; ++iit) 00297 get_profile(*it,iit->first)(stream,device_offset++,iit->second); 00298 std::string res = stream.str(); 00299 00300 viennacl::tools::find_and_replace(res,"__attribute__","//__attribute__"); 00301 00302 //Pointer 00303 viennacl::tools::find_and_replace(res, "__global float*", "float*"); 00304 viennacl::tools::find_and_replace(res, "__local float*", "float*"); 00305 00306 viennacl::tools::find_and_replace(res, "__global double*", "double*"); 00307 viennacl::tools::find_and_replace(res, "__local double*", "double*"); 00308 00309 //Qualifiers 00310 viennacl::tools::find_and_replace(res,"__global","__device__"); 00311 viennacl::tools::find_and_replace(res,"__kernel","__global__"); 00312 viennacl::tools::find_and_replace(res,"__constant","__constant__"); 00313 viennacl::tools::find_and_replace(res,"__local","__shared__"); 00314 00315 //Indexing 00316 viennacl::tools::find_and_replace(res,"get_num_groups(0)","gridDim.x"); 00317 viennacl::tools::find_and_replace(res,"get_num_groups(1)","gridDim.y"); 00318 00319 viennacl::tools::find_and_replace(res,"get_local_size(0)","blockDim.x"); 00320 viennacl::tools::find_and_replace(res,"get_local_size(1)","blockDim.y"); 00321 00322 viennacl::tools::find_and_replace(res,"get_group_id(0)","blockIdx.x"); 00323 viennacl::tools::find_and_replace(res,"get_group_id(1)","blockIdx.y"); 00324 00325 viennacl::tools::find_and_replace(res,"get_local_id(0)","threadIdx.x"); 00326 viennacl::tools::find_and_replace(res,"get_local_id(1)","threadIdx.y"); 00327 00328 viennacl::tools::find_and_replace(res,"get_global_id(0)","(blockIdx.x*blockDim.x + threadIdx.x)"); 00329 viennacl::tools::find_and_replace(res,"get_global_id(1)","(blockIdx.y*blockDim.y + threadIdx.y)"); 00330 00331 //Synchronization 00332 viennacl::tools::find_and_replace(res,"barrier(CLK_LOCAL_MEM_FENCE)","__syncthreads()"); 00333 viennacl::tools::find_and_replace(res,"barrier(CLK_GLOBAL_MEM_FENCE)","__syncthreads()"); 00334 00335 00336 return res; 00337 } 00338 00339 private: 00340 statements_type statements_; 00341 viennacl::ocl::context const & ctx_; 00342 forced_profiles_type forced_profiles_; 00343 }; 00344 00351 inline viennacl::ocl::program & get_configured_program(viennacl::generator::code_generator const & generator, std::list<viennacl::ocl::kernel*> & kernels, bool force_recompilation = false){ 00352 char* program_name = new char[256]; 00353 generator.make_program_name(program_name); 00354 if(force_recompilation) 00355 viennacl::ocl::current_context().delete_program(program_name); 00356 if(!viennacl::ocl::current_context().has_program(program_name)){ 00357 std::string source_code = generator.make_opencl_program_string(); 00358 #ifdef VIENNACL_DEBUG_BUILD 00359 std::cout << "Building " << program_name << "..." << std::endl; 00360 std::cout << source_code << std::endl; 00361 #endif 00362 viennacl::ocl::current_context().add_program(source_code, program_name); 00363 } 00364 viennacl::ocl::program & p = viennacl::ocl::current_context().get_program(program_name); 00365 generator.configure_program(p, kernels); 00366 delete[] program_name; 00367 00368 return p; 00369 } 00370 00372 inline void enqueue(viennacl::generator::code_generator const & generator, bool force_recompilation = false){ 00373 std::list<viennacl::ocl::kernel*> kernels; 00374 get_configured_program(generator, kernels, force_recompilation); 00375 for(std::list<viennacl::ocl::kernel*>::iterator it = kernels.begin() ; it != kernels.end() ; ++it){ 00376 viennacl::ocl::enqueue(**it, (*it)->context().get_queue()); 00377 } 00378 } 00379 00381 inline std::string get_opencl_program_string(viennacl::scheduler::statement const & s){ 00382 generator::code_generator gen; 00383 gen.add(s,s.array()[0]); 00384 return gen.make_opencl_program_string(); 00385 } 00386 00388 inline std::string get_cuda_device_code(viennacl::scheduler::statement const & s){ 00389 generator::code_generator gen; 00390 gen.add(s, s.array()[0]); 00391 return gen.make_cuda_program_string(); 00392 } 00393 00395 inline void generate_enqueue_statement(viennacl::scheduler::statement const & s, scheduler::statement_node const & root_node){ 00396 generator::code_generator gen; 00397 gen.add(s,root_node); 00398 viennacl::generator::enqueue(gen); 00399 } 00400 00402 inline void generate_enqueue_statement(viennacl::scheduler::statement const & s){ 00403 generate_enqueue_statement(s, s.array()[0]); 00404 } 00405 00406 } 00407 } 00408 #endif