1 #ifndef VIENNACL_DEVICE_SPECIFIC_EXECUTION_HANDLER_HPP
2 #define VIENNACL_DEVICE_SPECIFIC_EXECUTION_HANDLER_HPP
35 namespace device_specific
41 typedef std::map< std::string, tools::shared_ptr<template_base> >
container_type;
44 std::string append_prefix(std::string
const & str)
49 std::string define_extension(std::string
const & ext)
52 return (ext.length() > 1) ? std::string(
"#pragma OPENCL EXTENSION " + ext +
" : enable\n") : std::string(
"\n");
55 void init_program_compiler(std::string
const & name,
bool force_recompilation)
57 lazy_programs_.push_back(lazy_program_compiler(&ctx_, name, force_recompilation));
64 lazy_programs_.reserve(2);
65 init_program_compiler(program_name_base +
"_0", force_recompilation);
66 init_program_compiler(program_name_base +
"_1", force_recompilation);
71 if (kernels_.insert(container_type::value_type(key, T.
clone())).second)
73 std::vector<std::string> sources = kernels_.at(key)->generate(append_prefix(key), statements, device_);
74 assert(sources.size()<=2);
75 for (
unsigned int i = 0; i < sources.size(); ++i)
76 lazy_programs_[i].
add(sources[i]);
82 return kernels_.at(key).get();
88 template_pointer->enqueue(append_prefix(key), lazy_programs_, statements);
95 std::vector<std::string> program_names_;
96 std::vector<lazy_program_compiler> lazy_programs_;
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
A class representing a compute device (e.g. a GPU)
void add(std::string const &key, template_base const &T, statements_container const &statements)
Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
virtual tools::shared_ptr< template_base > clone() const =0
execution_handler(std::string const &program_name_base, viennacl::ocl::context &ctx, viennacl::ocl::device const &device, bool force_recompilation=false)
Helper for compiling a program lazily.
template_base * template_of(std::string const &key)
std::string double_support_extension() const
ViennaCL convenience function: Returns the device extension which enables double precision (usually c...
void execute(container_type::key_type const &key, statements_container const &statements)
std::map< std::string, tools::shared_ptr< template_base > > container_type