ViennaCL - The Vienna Computing Library  1.6.2
Free open-source GPU-accelerated linear algebra and solver library.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
execution_handler.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_DEVICE_SPECIFIC_EXECUTION_HANDLER_HPP
2 #define VIENNACL_DEVICE_SPECIFIC_EXECUTION_HANDLER_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 
26 #include <map>
27 
29 
32 
33 namespace viennacl
34 {
35 namespace device_specific
36 {
37 
39 {
40 public:
41  typedef std::map< std::string, tools::shared_ptr<template_base> > container_type;
42 
43 private:
44  std::string append_prefix(std::string const & str)
45  {
46  return "_" + str;
47  }
48 
49  std::string define_extension(std::string const & ext)
50  {
51  // Note: On devices without double precision support, 'ext' is an empty string.
52  return (ext.length() > 1) ? std::string("#pragma OPENCL EXTENSION " + ext + " : enable\n") : std::string("\n");
53  }
54 
55  void init_program_compiler(std::string const & name, bool force_recompilation)
56  {
57  lazy_programs_.push_back(lazy_program_compiler(&ctx_, name, force_recompilation));
58  lazy_programs_.back().add(define_extension(device_.double_support_extension()));
59  }
60 
61 public:
62  execution_handler(std::string const & program_name_base, viennacl::ocl::context & ctx, viennacl::ocl::device const & device, bool force_recompilation = false) : ctx_(ctx), device_(device), program_names_(2)
63  {
64  lazy_programs_.reserve(2);
65  init_program_compiler(program_name_base + "_0", force_recompilation);
66  init_program_compiler(program_name_base + "_1", force_recompilation);
67  }
68 
69  void add(std::string const & key, template_base const & T, statements_container const & statements)
70  {
71  if (kernels_.insert(container_type::value_type(key, T.clone())).second)
72  {
73  std::vector<std::string> sources = kernels_.at(key)->generate(append_prefix(key), statements, device_);
74  assert(sources.size()<=2);
75  for (unsigned int i = 0; i < sources.size(); ++i)
76  lazy_programs_[i].add(sources[i]);
77  }
78  }
79 
80  template_base * template_of(std::string const & key)
81  {
82  return kernels_.at(key).get();
83  }
84 
85  void execute(container_type::key_type const & key, statements_container const & statements)
86  {
87  tools::shared_ptr<template_base> & template_pointer = kernels_.at(key);
88  template_pointer->enqueue(append_prefix(key), lazy_programs_, statements);
89  }
90 
91 private:
93  viennacl::ocl::device const & device_;
94  container_type kernels_;
95  std::vector<std::string> program_names_;
96  std::vector<lazy_program_compiler> lazy_programs_;
97 };
98 
99 }
100 }
101 #endif
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:54
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
void add(std::string const &key, template_base const &T, statements_container const &statements)
Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
virtual tools::shared_ptr< template_base > clone() const =0
execution_handler(std::string const &program_name_base, viennacl::ocl::context &ctx, viennacl::ocl::device const &device, bool force_recompilation=false)
Helper for compiling a program lazily.
template_base * template_of(std::string const &key)
A shared pointer class similar to boost::shared_ptr. Reimplemented in order to avoid a Boost-dependen...
Definition: shared_ptr.hpp:83
std::string double_support_extension() const
ViennaCL convenience function: Returns the device extension which enables double precision (usually c...
Definition: device.hpp:967
void execute(container_type::key_type const &key, statements_container const &statements)
std::map< std::string, tools::shared_ptr< template_base > > container_type