1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
32 template<
typename NumericT,
typename ScalarT>
36 namespace ds = device_specific;
37 ds::statements_container::data_type statements;
38 for (
unsigned int i = 0; i < vector_num; ++i)
40 handler.add(prefix, ds::reduction_template(parameters), ds::statements_container(statements,ds::statements_container::INDEPENDENT));
47 template<
typename NumericT>
52 template<
typename ScalarT1,
typename ScalarT2>
57 namespace ds = device_specific;
58 handler.
add(prefix +
"0000", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
false, z, b,
false,
false));
59 handler.
add(prefix +
"1000", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
false, z, b,
false,
false));
60 handler.
add(prefix +
"0100", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
true, z, b,
false,
false));
61 handler.
add(prefix +
"1100", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
true, z, b,
false,
false));
64 handler.
add(prefix +
"0010", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
false, z, b,
true,
false));
65 handler.
add(prefix +
"1010", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
false, z, b,
true,
false));
66 handler.
add(prefix +
"0110", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
true, z, b,
true,
false));
67 handler.
add(prefix +
"1110", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
true, z, b,
true,
false));
69 handler.
add(prefix +
"0001", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
false, z, b,
false,
true));
70 handler.
add(prefix +
"1001", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
false, z, b,
false,
true));
71 handler.
add(prefix +
"0101", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
true, z, b,
false,
true));
72 handler.
add(prefix +
"1101", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
true, z, b,
false,
true));
74 handler.
add(prefix +
"0011", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
false, z, b,
true,
true));
75 handler.
add(prefix +
"1011", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
false, z, b,
true,
true));
76 handler.
add(prefix +
"0111", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
false,
true, z, b,
true,
true));
77 handler.
add(prefix +
"1111", ds::vector_axpy_template(parameters),
scheduler::preset::avbv(ASSIGN_OP, x, y, a,
true,
true, z, b,
true,
true));
81 template<
typename ScalarT>
87 generate_avbv_impl2(handler, prefix +
"hv_", parameters, ASSIGN_OP, x, y, ha, (
viennacl::vector<NumericT>*)NULL, (NumericT*)NULL);
88 generate_avbv_impl2(handler, prefix +
"dv_", parameters, ASSIGN_OP, x, y, da, (
viennacl::vector<NumericT>*)NULL, (NumericT*)NULL);
91 generate_avbv_impl2(handler, prefix +
"hvhv_", parameters, ASSIGN_OP, x, y, ha, z, hb);
92 generate_avbv_impl2(handler, prefix +
"dvhv_", parameters, ASSIGN_OP, x, y, da, z, hb);
93 generate_avbv_impl2(handler, prefix +
"hvdv_", parameters, ASSIGN_OP, x, y, ha, z, db);
94 generate_avbv_impl2(handler, prefix +
"dvdv_", parameters, ASSIGN_OP, x, y, da, z, db);
100 static std::map<cl_context, device_specific::execution_handler> handlers_map;
102 if (handlers_map.find(h) == handlers_map.end())
106 namespace ds = viennacl::device_specific;
109 ds::execution_handler & handler = handlers_map.at(h);
120 ds::vector_axpy_template::parameters_type
vector_axpy_params = ds::builtin_database::vector_axpy_params<NumericT>(device);
121 ds::reduction_template::parameters_type
reduction_params = ds::builtin_database::reduction_params<NumericT>(device);
123 generate_avbv_impl(handler,
"assign_", vector_axpy_params,
scheduler::OPERATION_BINARY_ASSIGN_TYPE, &x, &y, &ha, &da, &z, &hb, &db);
124 generate_avbv_impl(handler,
"ip_add_", vector_axpy_params,
scheduler::OPERATION_BINARY_INPLACE_ADD_TYPE, &x, &y, &ha, &da, &z, &hb, &db);
130 generate_inner_prod_impl(handler,
"inner_prod", reduction_params, 1, &x, &y, &da);
133 bool is_float_or_double = is_floating_point<NumericT>::value;
134 if (is_float_or_double)
142 return handlers_map.at(h);
148 template<
typename NumericT>
154 static std::map<cl_context, device_specific::execution_handler> handlers_map;
156 if (handlers_map.find(h) == handlers_map.end())
160 namespace ds = viennacl::device_specific;
164 ds::execution_handler & handler = handlers_map.at(h);
166 ds::reduction_template::parameters_type
reduction_params = ds::builtin_database::reduction_params<NumericT>(device);
174 generate_inner_prod_impl(handler,
"inner_prod_1", reduction_params, 1, &x, &y, &da);
175 generate_inner_prod_impl(handler,
"inner_prod_2", reduction_params, 2, &x, &y, &da);
176 generate_inner_prod_impl(handler,
"inner_prod_3", reduction_params, 3, &x, &y, &da);
177 generate_inner_prod_impl(handler,
"inner_prod_4", reduction_params, 4, &x, &y, &da);
178 generate_inner_prod_impl(handler,
"inner_prod_8", reduction_params, 8, &x, &y, &da);
180 return handlers_map.at(h);
186 template<
typename NumericT>
193 static std::map<cl_context, device_specific::execution_handler> handlers_map;
195 if (handlers_map.find(h) == handlers_map.end())
199 namespace ds = viennacl::device_specific;
200 using namespace scheduler;
206 ds::execution_handler & handler = handlers_map.at(h);
207 ds::vector_axpy_template::parameters_type
vector_axpy_params = ds::builtin_database::vector_axpy_params<NumericT>(device);
214 #define VIENNACL_ADD_UNARY(OPTYPE) handler.add(operator_string(OPTYPE), ds::vector_axpy_template(vector_axpy_params),scheduler::preset::unary_element_op(&x, &y, OPTYPE))
215 if (numeric_string ==
"float" || numeric_string ==
"double")
238 #undef VIENNACL_ADD_UNARY
241 #define VIENNACL_ADD_BINARY(OPTYPE) handler.add(operator_string(OPTYPE), ds::vector_axpy_template(vector_axpy_params),scheduler::preset::binary_element_op(&x, &y, &z, OPTYPE))
244 if (numeric_string ==
"float" || numeric_string ==
"double")
248 #undef VIENNACL_ADD_BINARY
251 return handlers_map.at(h);
viennacl::ocl::device const & current_device() const
Returns the current device.
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
statement inner_prod(ScalarT const *s, vector_base< NumericT > const *x, vector_base< NumericT > const *y)
Some helper routines for reading/writing/printing scheduler expressions.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
reduction_template::parameters_type const & reduction_params(ocl::device const &device)
Provides OpenCL-related utilities.
A class representing a compute device (e.g. a GPU)
void add(std::string const &key, template_base const &T, statements_container const &statements)
statement max(scalar< NumericT > const *s, vector_base< NumericT > const *x)
scheduler::statement avbv(scheduler::operation_node_type ASSIGN_OP, NumericT const *x, NumericT const *y, ScalarT1 const *a, bool flip_a, bool reciprocal_a, NumericT const *z, ScalarT2 const *b, bool flip_b, bool reciprocal_b)
statement norm_2(scalar< NumericT > const *s, vector_base< NumericT > const *x)
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
statement sum(scalar< NumericT > const *s, vector_base< NumericT > const *x)
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
statement min(scalar< NumericT > const *s, vector_base< NumericT > const *x)
static void apply(viennacl::ocl::context const &)
const OCL_TYPE & get() const
Class for representing non-strided subvectors of a bigger vector x.
vector_axpy_template::parameters_type const & vector_axpy_params(ocl::device const &device)
statement index_norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
statement norm_1(scalar< NumericT > const *s, vector_base< NumericT > const *x)
statement norm_inf(scalar< NumericT > const *s, vector_base< NumericT > const *x)
Provides the datastructures for dealing with a single statement such as 'x = y + z;'.
Proxy classes for vectors.
operation_node_type
Enumeration for identifying the possible operations.
Main kernel class for generating OpenCL kernels for elementwise operations other than addition and su...
Representation of an OpenCL kernel in ViennaCL.
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
device_specific::statements_container swap(NumericT const *x, NumericT const *y)
A range class that refers to an interval [start, stop), where 'start' is included, and 'stop' is excluded.
static device_specific::execution_handler & execution_handler(viennacl::ocl::context &ctx)
#define VIENNACL_ADD_UNARY(OPTYPE)
scheduler::statement assign_cpu(vector_base< NumericT > const *x, implicit_vector_base< NumericT > const *y)
device_specific::statements_container plane_rotation(vector_base< NumericT > const *x, vector_base< NumericT > const *y, NumericT const *a, NumericT const *b)
const char * operator_string(scheduler::operation_node_type type)
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Helper class for converting a type to its string representation.
#define VIENNACL_ADD_BINARY(OPTYPE)
Helper for handling fallbacks, lazy compilation, input-dependent kernels, etc.
reduction_parameters parameters_type