ViennaCL - The Vienna Computing Library
1.5.2
|
00001 #ifndef VIENNACL_LINALG_SVD_HPP 00002 #define VIENNACL_LINALG_SVD_HPP 00003 00004 /* ========================================================================= 00005 Copyright (c) 2010-2014, Institute for Microelectronics, 00006 Institute for Analysis and Scientific Computing, 00007 TU Wien. 00008 Portions of this software are copyright by UChicago Argonne, LLC. 00009 00010 ----------------- 00011 ViennaCL - The Vienna Computing Library 00012 ----------------- 00013 00014 Project Head: Karl Rupp rupp@iue.tuwien.ac.at 00015 00016 (A list of authors and contributors can be found in the PDF manual) 00017 00018 License: MIT (X11), see file LICENSE in the base directory 00019 ============================================================================= */ 00020 00028 // Note: Boost.uBLAS is required at the moment 00029 #include <boost/numeric/ublas/vector.hpp> 00030 #include <boost/numeric/ublas/matrix.hpp> 00031 00032 00033 #include <cmath> 00034 00035 #include "viennacl/matrix.hpp" 00036 #include "viennacl/linalg/opencl/kernels/svd.hpp" 00037 #include "viennacl/linalg/qr-method-common.hpp" 00038 00039 namespace viennacl 00040 { 00041 namespace linalg 00042 { 00043 00044 namespace detail 00045 { 00046 00047 template<typename MatrixType, typename VectorType> 00048 void givens_prev(MatrixType & matrix, 00049 VectorType & tmp1, 00050 VectorType & tmp2, 00051 int n, 00052 int l, 00053 int k 00054 ) 00055 { 00056 typedef typename MatrixType::value_type ScalarType; 00057 typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType; 00058 00059 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context()); 00060 viennacl::ocl::kernel & kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<CPU_ScalarType>::program_name(), SVD_GIVENS_PREV_KERNEL); 00061 00062 kernel.global_work_size(0, viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size1(matrix), 256)); 00063 kernel.local_work_size(0, 256); 00064 00065 viennacl::ocl::enqueue(kernel( 00066 matrix, 00067 tmp1, 00068 tmp2, 00069 static_cast<cl_uint>(n), 00070 static_cast<cl_uint>(matrix.internal_size1()), 00071 static_cast<cl_uint>(l + 1), 00072 static_cast<cl_uint>(k + 1) 00073 )); 00074 } 00075 00076 00077 template<typename MatrixType, typename VectorType> 00078 void change_signs(MatrixType& matrix, VectorType& signs, int n) 00079 { 00080 typedef typename MatrixType::value_type ScalarType; 00081 typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType; 00082 00083 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(matrix).context()); 00084 viennacl::ocl::kernel & kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<CPU_ScalarType>::program_name(), SVD_INVERSE_SIGNS_KERNEL); 00085 00086 kernel.global_work_size(0, viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size1(matrix), 16)); 00087 kernel.global_work_size(1, viennacl::tools::align_to_multiple<vcl_size_t>(viennacl::traits::size2(matrix), 16)); 00088 00089 kernel.local_work_size(0, 16); 00090 kernel.local_work_size(1, 16); 00091 00092 viennacl::ocl::enqueue(kernel( 00093 matrix, 00094 signs, 00095 static_cast<cl_uint>(n), 00096 static_cast<cl_uint>(matrix.internal_size1()) 00097 )); 00098 } 00099 00100 template<typename MatrixType, typename CPU_VectorType> 00101 void svd_qr_shift(MatrixType & vcl_u, 00102 MatrixType & vcl_v, 00103 CPU_VectorType & q, 00104 CPU_VectorType & e) 00105 { 00106 typedef typename MatrixType::value_type ScalarType; 00107 typedef typename viennacl::result_of::cpu_value_type<ScalarType>::type CPU_ScalarType; 00108 00109 int n = static_cast<int>(q.size()); 00110 int m = static_cast<int>(vcl_u.size1()); 00111 00112 detail::transpose(vcl_u); 00113 detail::transpose(vcl_v); 00114 00115 std::vector<CPU_ScalarType> signs_v(n, 1); 00116 std::vector<CPU_ScalarType> cs1(n), ss1(n), cs2(n), ss2(n); 00117 00118 viennacl::vector<CPU_ScalarType> tmp1(n), tmp2(n); 00119 00120 bool goto_test_conv = false; 00121 00122 for (int k = n - 1; k >= 0; k--) 00123 { 00124 // std::cout << "K = " << k << std::endl; 00125 00126 vcl_size_t iter = 0; 00127 for (iter = 0; iter < detail::ITER_MAX; iter++) 00128 { 00129 // test for split 00130 int l; 00131 for (l = k; l >= 0; l--) 00132 { 00133 goto_test_conv = false; 00134 if (std::fabs(e[l]) <= detail::EPS) 00135 { 00136 // set it 00137 goto_test_conv = true; 00138 break; 00139 } 00140 00141 if (std::fabs(q[l - 1]) <= detail::EPS) 00142 { 00143 // goto 00144 break; 00145 } 00146 } 00147 00148 if (!goto_test_conv) 00149 { 00150 CPU_ScalarType c = 0.0; 00151 CPU_ScalarType s = 1.0; 00152 00153 //int l1 = l - 1; 00154 //int l2 = k; 00155 00156 for (int i = l; i <= k; i++) 00157 { 00158 CPU_ScalarType f = s * e[i]; 00159 e[i] = c * e[i]; 00160 00161 if (std::fabs(f) <= detail::EPS) 00162 { 00163 //l2 = i - 1; 00164 break; 00165 } 00166 00167 CPU_ScalarType g = q[i]; 00168 CPU_ScalarType h = detail::pythag(f, g); 00169 q[i] = h; 00170 c = g / h; 00171 s = -f / h; 00172 00173 cs1[i] = c; 00174 ss1[i] = s; 00175 } 00176 00177 // std::cout << "Hitted!" << l1 << " " << l2 << "\n"; 00178 00179 // for(int i = l; i <= l2; i++) 00180 // { 00181 // for (int j = 0; j < m; j++) 00182 // { 00183 // CPU_ScalarType y = u(j, l1); 00184 // CPU_ScalarType z = u(j, i); 00185 // u(j, l1) = y * cs1[i] + z * ss1[i]; 00186 // u(j, i) = -y * ss1[i] + z * cs1[i]; 00187 // } 00188 // } 00189 } 00190 00191 CPU_ScalarType z = q[k]; 00192 00193 if (l == k) 00194 { 00195 if (z < 0) 00196 { 00197 q[k] = -z; 00198 00199 signs_v[k] *= -1; 00200 } 00201 00202 break; 00203 } 00204 00205 if (iter >= detail::ITER_MAX - 1) 00206 break; 00207 00208 CPU_ScalarType x = q[l]; 00209 CPU_ScalarType y = q[k - 1]; 00210 CPU_ScalarType g = e[k - 1]; 00211 CPU_ScalarType h = e[k]; 00212 CPU_ScalarType f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2 * h * y); 00213 00214 g = detail::pythag<CPU_ScalarType>(f, 1); 00215 00216 if (f < 0) { 00217 f = ((x - z) * (x + z) + h * (y / (f - g) - h)) / x; 00218 } else { 00219 f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x; 00220 } 00221 00222 CPU_ScalarType c = 1; 00223 CPU_ScalarType s = 1; 00224 00225 for (vcl_size_t i = l + 1; i <= static_cast<vcl_size_t>(k); i++) 00226 { 00227 g = e[i]; 00228 y = q[i]; 00229 h = s * g; 00230 g = c * g; 00231 CPU_ScalarType z = detail::pythag(f, h); 00232 e[i - 1] = z; 00233 c = f / z; 00234 s = h / z; 00235 f = x * c + g * s; 00236 g = -x * s + g * c; 00237 h = y * s; 00238 y = y * c; 00239 00240 cs1[i] = c; 00241 ss1[i] = s; 00242 00243 z = detail::pythag(f, h); 00244 q[i - 1] = z; 00245 c = f / z; 00246 s = h / z; 00247 f = c * g + s * y; 00248 x = -s * g + c * y; 00249 00250 cs2[i] = c; 00251 ss2[i] = s; 00252 } 00253 00254 { 00255 viennacl::copy(cs1, tmp1); 00256 viennacl::copy(ss1, tmp2); 00257 00258 givens_prev(vcl_v, tmp1, tmp2, n, l, k); 00259 } 00260 00261 { 00262 viennacl::copy(cs2, tmp1); 00263 viennacl::copy(ss2, tmp2); 00264 00265 givens_prev(vcl_u, tmp1, tmp2, m, l, k); 00266 } 00267 00268 e[l] = 0.0; 00269 e[k] = f; 00270 q[k] = x; 00271 } 00272 00273 } 00274 00275 00276 viennacl::copy(signs_v, tmp1); 00277 change_signs(vcl_v, tmp1, n); 00278 00279 // transpose singular matrices again 00280 detail::transpose(vcl_u); 00281 detail::transpose(vcl_v); 00282 } 00283 00284 00285 /*template <typename SCALARTYPE, unsigned int ALIGNMENT> 00286 bool householder_c(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & A, 00287 viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & Q, 00288 viennacl::vector<SCALARTYPE, ALIGNMENT> & D, 00289 vcl_size_t start) 00290 { 00291 00292 vcl_size_t row_start = start; 00293 vcl_size_t col_start = start; 00294 00295 if(row_start + 1 >= A.size1()) 00296 return false; 00297 00298 std::vector<SCALARTYPE> tmp(A.size1(), 0); 00299 00300 copy_vec(A, D, row_start, col_start, true); 00301 fast_copy(D.begin(), D.begin() + (A.size1() - row_start), tmp.begin() + row_start); 00302 00303 detail::householder_vector(tmp, row_start); 00304 00305 fast_copy(tmp, D); 00306 00307 viennacl::ocl::kernel & kernel = viennacl::ocl::get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_COL_KERNEL); 00308 00309 //kernel.global_work_size(0, A.size1() << 1); 00310 00311 viennacl::ocl::enqueue(kernel( 00312 A, 00313 Q, 00314 D, 00315 static_cast<cl_uint>(row_start), 00316 static_cast<cl_uint>(col_start), 00317 static_cast<cl_uint>(A.size1()), 00318 static_cast<cl_uint>(A.size2()), 00319 static_cast<cl_uint>(A.internal_size2()), 00320 static_cast<cl_uint>(Q.internal_size2()), 00321 viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE))) 00322 )); 00323 00324 return true; 00325 }*/ 00326 00327 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00328 bool householder_c(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& A, 00329 viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& Q, 00330 viennacl::vector<SCALARTYPE, ALIGNMENT>& D, 00331 vcl_size_t row_start, vcl_size_t col_start) 00332 { 00333 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); 00334 00335 if(row_start + 1 >= A.size1()) 00336 return false; 00337 00338 prepare_householder_vector(A, D, A.size1(), row_start, col_start, row_start, true); 00339 00340 { 00341 viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL); 00342 00343 viennacl::ocl::enqueue(kernel( 00344 A, 00345 D, 00346 static_cast<cl_uint>(row_start), 00347 static_cast<cl_uint>(col_start), 00348 static_cast<cl_uint>(A.size1()), 00349 static_cast<cl_uint>(A.size2()), 00350 static_cast<cl_uint>(A.internal_size2()), 00351 viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE))) 00352 )); 00353 } 00354 00355 { 00356 viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_UPDATE_QL_KERNEL); 00357 00358 viennacl::ocl::enqueue(kernel( 00359 Q, 00360 D, 00361 static_cast<cl_uint>(A.size1()), 00362 static_cast<cl_uint>(A.size2()), 00363 static_cast<cl_uint>(Q.internal_size2()), 00364 viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE))) 00365 )); 00366 } 00367 00368 return true; 00369 } 00370 00371 /* 00372 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00373 bool householder_r(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& A, 00374 viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT>& Q, 00375 viennacl::vector<SCALARTYPE, ALIGNMENT>& S, 00376 vcl_size_t start) 00377 { 00378 00379 vcl_size_t row_start = start; 00380 vcl_size_t col_start = start + 1; 00381 00382 if(col_start + 1 >= A.size2()) 00383 return false; 00384 00385 std::vector<SCALARTYPE> tmp(A.size2(), 0); 00386 00387 copy_vec(A, S, row_start, col_start, false); 00388 fast_copy(S.begin(), 00389 S.begin() + (A.size2() - col_start), 00390 tmp.begin() + col_start); 00391 00392 detail::householder_vector(tmp, col_start); 00393 fast_copy(tmp, S); 00394 00395 viennacl::ocl::kernel& kernel = viennacl::ocl::get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_ROW_KERNEL); 00396 00397 viennacl::ocl::enqueue(kernel( 00398 A, 00399 Q, 00400 S, 00401 static_cast<cl_uint>(row_start), 00402 static_cast<cl_uint>(col_start), 00403 static_cast<cl_uint>(A.size1()), 00404 static_cast<cl_uint>(A.size2()), 00405 static_cast<cl_uint>(A.internal_size2()), 00406 static_cast<cl_uint>(Q.internal_size2()), 00407 viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE))) 00408 )); 00409 return true; 00410 } */ 00411 00412 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00413 bool householder_r(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & A, 00414 viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & Q, 00415 viennacl::vector<SCALARTYPE, ALIGNMENT>& D, 00416 vcl_size_t row_start, vcl_size_t col_start) 00417 { 00418 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); 00419 00420 if(col_start + 1 >= A.size2()) 00421 return false; 00422 00423 prepare_householder_vector(A, D, A.size2(), row_start, col_start, col_start, false); 00424 00425 { 00426 viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL); 00427 00428 viennacl::ocl::enqueue(kernel( 00429 A, 00430 D, 00431 static_cast<cl_uint>(row_start), 00432 static_cast<cl_uint>(col_start), 00433 static_cast<cl_uint>(A.size1()), 00434 static_cast<cl_uint>(A.size2()), 00435 static_cast<cl_uint>(A.internal_size2()), 00436 viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE))) 00437 )); 00438 } 00439 00440 { 00441 viennacl::ocl::kernel& kernel = ctx.get_kernel(viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::program_name(), SVD_HOUSEHOLDER_UPDATE_QR_KERNEL); 00442 00443 viennacl::ocl::enqueue(kernel( 00444 Q, 00445 D, 00446 static_cast<cl_uint>(A.size1()), 00447 static_cast<cl_uint>(A.size2()), 00448 static_cast<cl_uint>(Q.internal_size2()), 00449 viennacl::ocl::local_mem(static_cast<cl_uint>(128 * sizeof(SCALARTYPE))) 00450 )); 00451 } 00452 00453 return true; 00454 } 00455 00456 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00457 void bidiag(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & Ai, 00458 viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & QL, 00459 viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & QR) 00460 { 00461 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(QL).context()); 00462 00463 vcl_size_t row_num = Ai.size1(); 00464 vcl_size_t col_num = Ai.size2(); 00465 00466 vcl_size_t to = std::min(row_num, col_num); 00467 vcl_size_t big_to = std::max(row_num, col_num); 00468 00469 //for storing householder vector 00470 viennacl::vector<SCALARTYPE, ALIGNMENT> hh_vector(big_to); 00471 00472 QL = viennacl::identity_matrix<SCALARTYPE>(QL.size1(), ctx); 00473 QR = viennacl::identity_matrix<SCALARTYPE>(QR.size1(), ctx); 00474 00475 for(vcl_size_t i = 0; i < to; i++) 00476 { 00477 householder_c(Ai, QL, hh_vector, i, i); 00478 householder_r(Ai, QR, hh_vector, i, i+1); 00479 } 00480 } 00481 00482 } // namespace detail 00483 00484 00491 template <typename SCALARTYPE, unsigned int ALIGNMENT> 00492 void svd(viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & A, 00493 viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & QL, 00494 viennacl::matrix<SCALARTYPE, row_major, ALIGNMENT> & QR) 00495 { 00496 viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(A).context()); 00497 viennacl::linalg::opencl::kernels::svd<SCALARTYPE>::init(ctx); 00498 00499 vcl_size_t row_num = A.size1(); 00500 vcl_size_t col_num = A.size2(); 00501 00502 vcl_size_t to = std::min(row_num, col_num); 00503 00504 00505 //viennacl::vector<SCALARTYPE, ALIGNMENT> d(to); 00506 //viennacl::vector<SCALARTYPE, ALIGNMENT> s(to + 1); 00507 00508 // first stage 00509 detail::bidiag(A, QL, QR); 00510 00511 // second stage 00512 //std::vector<SCALARTYPE> dh(to, 0); 00513 //std::vector<SCALARTYPE> sh(to + 1, 0); 00514 boost::numeric::ublas::vector<SCALARTYPE> dh = boost::numeric::ublas::scalar_vector<SCALARTYPE>(to, 0); 00515 boost::numeric::ublas::vector<SCALARTYPE> sh = boost::numeric::ublas::scalar_vector<SCALARTYPE>(to + 1, 0); 00516 00517 detail::bidiag_pack(A, dh, sh); 00518 00519 detail::svd_qr_shift( QL, QR, dh, sh); 00520 00521 // Write resulting diagonal matrix with singular values to A: 00522 boost::numeric::ublas::matrix<SCALARTYPE> h_Sigma(row_num, col_num); 00523 h_Sigma.clear(); 00524 00525 for (vcl_size_t i = 0; i < to; i++) 00526 h_Sigma(i, i) = dh[i]; 00527 00528 copy(h_Sigma, A); 00529 } 00530 } 00531 } 00532 #endif