ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_CUDA_MISC_OPERATIONS_HPP_ 00002 #define VIENNACL_LINALG_CUDA_MISC_OPERATIONS_HPP_ 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00025 #include "viennacl/forwards.h" 00026 #include "viennacl/scalar.hpp" 00027 #include "viennacl/vector.hpp" 00028 #include "viennacl/tools/tools.hpp" 00029 #include "viennacl/linalg/cuda/common.hpp" 00030 00031 00032 namespace viennacl 00033 { 00034 namespace linalg 00035 { 00036 namespace cuda 00037 { 00038 00039 namespace detail 00040 { 00041 00042 template <typename T> 00043 __global__ void level_scheduling_substitute_kernel( 00044 const unsigned int * row_index_array, 00045 const unsigned int * row_indices, 00046 const unsigned int * column_indices, 00047 const T * elements, 00048 T * vec, 00049 unsigned int size) 00050 { 00051 for (unsigned int row = blockDim.x * blockIdx.x + threadIdx.x; 00052 row < size; 00053 row += gridDim.x * blockDim.x) 00054 { 00055 unsigned int eq_row = row_index_array[row]; 00056 T vec_entry = vec[eq_row]; 00057 unsigned int row_end = row_indices[row+1]; 00058 00059 for (unsigned int j = row_indices[row]; j < row_end; ++j) 00060 vec_entry -= vec[column_indices[j]] * elements[j]; 00061 00062 vec[eq_row] = vec_entry; 00063 } 00064 } 00065 00066 00067 00068 template <typename ScalarType> 00069 void level_scheduling_substitute(vector<ScalarType> & vec, 00070 viennacl::backend::mem_handle const & row_index_array, 00071 viennacl::backend::mem_handle const & row_buffer, 00072 viennacl::backend::mem_handle const & col_buffer, 00073 viennacl::backend::mem_handle const & element_buffer, 00074 vcl_size_t num_rows 00075 ) 00076 { 00077 level_scheduling_substitute_kernel<<<128, 128>>>(detail::cuda_arg<unsigned int>(row_index_array.cuda_handle()), 00078 detail::cuda_arg<unsigned int>(row_buffer.cuda_handle()), 00079 detail::cuda_arg<unsigned int>(col_buffer.cuda_handle()), 00080 detail::cuda_arg<ScalarType>(element_buffer.cuda_handle()), 00081 detail::cuda_arg<ScalarType>(vec), 00082 static_cast<unsigned int>(num_rows) 00083 ); 00084 } 00085 00086 } 00087 00088 } // namespace cuda 00089 } //namespace linalg 00090 } //namespace viennacl 00091 00092 00093 #endif