Actual source code: dsnhep.c

slepc-3.13.1 2020-04-12
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
 15: {

 19:   DSAllocateMat_Private(ds,DS_MAT_A);
 20:   DSAllocateMat_Private(ds,DS_MAT_Q);
 21:   PetscFree(ds->perm);
 22:   PetscMalloc1(ld,&ds->perm);
 23:   PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
 24:   return(0);
 25: }

 27: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
 28: {
 29:   PetscErrorCode    ierr;
 30:   PetscViewerFormat format;

 33:   PetscViewerGetFormat(viewer,&format);
 34:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);
 35:   DSViewMat(ds,viewer,DS_MAT_A);
 36:   if (ds->state>DS_STATE_INTERMEDIATE) { DSViewMat(ds,viewer,DS_MAT_Q); }
 37:   if (ds->mat[DS_MAT_X]) { DSViewMat(ds,viewer,DS_MAT_X); }
 38:   if (ds->mat[DS_MAT_Y]) { DSViewMat(ds,viewer,DS_MAT_Y); }
 39:   return(0);
 40: }

 42: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
 43: {
 45:   PetscInt       i,j;
 46:   PetscBLASInt   info,ld,n,n1,lwork,inc=1;
 47:   PetscScalar    sdummy,done=1.0,zero=0.0;
 48:   PetscReal      *sigma;
 49:   PetscBool      iscomplex = PETSC_FALSE;
 50:   PetscScalar    *A = ds->mat[DS_MAT_A];
 51:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
 52:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
 53:   PetscScalar    *W;

 56:   if (left) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for left vectors");
 57:   PetscBLASIntCast(ds->n,&n);
 58:   PetscBLASIntCast(ds->ld,&ld);
 59:   n1 = n+1;
 60:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
 61:   if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
 62:   DSAllocateWork_Private(ds,5*ld,6*ld,0);
 63:   DSAllocateMat_Private(ds,DS_MAT_W);
 64:   W = ds->mat[DS_MAT_W];
 65:   lwork = 5*ld;
 66:   sigma = ds->rwork+5*ld;

 68:   /* build A-w*I in W */
 69:   for (j=0;j<n;j++)
 70:     for (i=0;i<=n;i++)
 71:       W[i+j*ld] = A[i+j*ld];
 72:   for (i=0;i<n;i++)
 73:     W[i+i*ld] -= A[(*k)+(*k)*ld];

 75:   /* compute SVD of W */
 76: #if !defined(PETSC_USE_COMPLEX)
 77:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
 78: #else
 79:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
 80: #endif
 81:   SlepcCheckLapackInfo("gesvd",info);

 83:   /* the smallest singular value is the new error estimate */
 84:   if (rnorm) *rnorm = sigma[n-1];

 86:   /* update vector with right singular vector associated to smallest singular value,
 87:      accumulating the transformation matrix Q */
 88:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
 89:   return(0);
 90: }

 92: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
 93: {
 95:   PetscInt       i;

 98:   for (i=0;i<ds->n;i++) {
 99:     DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
100:   }
101:   return(0);
102: }

104: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
105: {
107:   PetscInt       i;
108:   PetscBLASInt   mm=1,mout,info,ld,n,*select,inc = 1;
109:   PetscScalar    tmp,done=1.0,zero=0.0;
110:   PetscReal      norm;
111:   PetscBool      iscomplex = PETSC_FALSE;
112:   PetscScalar    *A = ds->mat[DS_MAT_A];
113:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
114:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
115:   PetscScalar    *Y;

118:   PetscBLASIntCast(ds->n,&n);
119:   PetscBLASIntCast(ds->ld,&ld);
120:   DSAllocateWork_Private(ds,0,0,ld);
121:   select = ds->iwork;
122:   for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;

124:   /* compute k-th eigenvector Y of A */
125:   Y = X+(*k)*ld;
126:   select[*k] = (PetscBLASInt)PETSC_TRUE;
127: #if !defined(PETSC_USE_COMPLEX)
128:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
129:   mm = iscomplex? 2: 1;
130:   if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
131:   DSAllocateWork_Private(ds,3*ld,0,0);
132:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
133: #else
134:   DSAllocateWork_Private(ds,2*ld,ld,0);
135:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
136: #endif
137:   SlepcCheckLapackInfo("trevc",info);
138:   if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");

140:   /* accumulate and normalize eigenvectors */
141:   if (ds->state>=DS_STATE_CONDENSED) {
142:     PetscArraycpy(ds->work,Y,mout*ld);
143:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
144: #if !defined(PETSC_USE_COMPLEX)
145:     if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
146: #endif
147:     norm = BLASnrm2_(&n,Y,&inc);
148: #if !defined(PETSC_USE_COMPLEX)
149:     if (iscomplex) {
150:       tmp = BLASnrm2_(&n,Y+ld,&inc);
151:       norm = SlepcAbsEigenvalue(norm,tmp);
152:     }
153: #endif
154:     tmp = 1.0 / norm;
155:     PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
156: #if !defined(PETSC_USE_COMPLEX)
157:     if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
158: #endif
159:   }

161:   /* set output arguments */
162:   if (iscomplex) (*k)++;
163:   if (rnorm) {
164:     if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
165:     else *rnorm = PetscAbsScalar(Y[n-1]);
166:   }
167:   return(0);
168: }

170: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
171: {
173:   PetscInt       i;
174:   PetscBLASInt   n,ld,mout,info,inc = 1;
175:   PetscBool      iscomplex;
176:   PetscScalar    *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
177:   PetscReal      norm;
178:   const char     *side,*back;

181:   PetscBLASIntCast(ds->n,&n);
182:   PetscBLASIntCast(ds->ld,&ld);
183:   if (left) {
184:     X = NULL;
185:     Y = ds->mat[DS_MAT_Y];
186:     side = "L";
187:   } else {
188:     X = ds->mat[DS_MAT_X];
189:     Y = NULL;
190:     side = "R";
191:   }
192:   Z = left? Y: X;
193:   if (ds->state>=DS_STATE_CONDENSED) {
194:     /* DSSolve() has been called, backtransform with matrix Q */
195:     back = "B";
196:     PetscArraycpy(Z,ds->mat[DS_MAT_Q],ld*ld);
197:   } else back = "A";
198: #if !defined(PETSC_USE_COMPLEX)
199:   DSAllocateWork_Private(ds,3*ld,0,0);
200:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
201: #else
202:   DSAllocateWork_Private(ds,2*ld,ld,0);
203:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
204: #endif
205:   SlepcCheckLapackInfo("trevc",info);

207:   /* normalize eigenvectors */
208:   for (i=0;i<n;i++) {
209:     iscomplex = (i<n-1 && A[i+1+i*ld]!=0.0)? PETSC_TRUE: PETSC_FALSE;
210:     norm = BLASnrm2_(&n,Z+i*ld,&inc);
211: #if !defined(PETSC_USE_COMPLEX)
212:     if (iscomplex) {
213:       tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
214:       norm = SlepcAbsEigenvalue(norm,tmp);
215:     }
216: #endif
217:     tmp = 1.0 / norm;
218:     PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
219: #if !defined(PETSC_USE_COMPLEX)
220:     if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
221: #endif
222:     if (iscomplex) i++;
223:   }
224:   return(0);
225: }

227: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
228: {

232:   switch (mat) {
233:     case DS_MAT_X:
234:       if (ds->refined) {
235:         if (!ds->extrarow) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Refined vectors require activating the extra row");
236:         if (j) {
237:           DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
238:         } else {
239:           DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
240:         }
241:       } else {
242:         if (j) {
243:           DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
244:         } else {
245:           DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
246:         }
247:       }
248:       break;
249:     case DS_MAT_Y:
250:       if (ds->refined) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
251:       if (j) {
252:         DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
253:       } else {
254:         DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
255:       }
256:       break;
257:     case DS_MAT_U:
258:     case DS_MAT_VT:
259:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
260:       break;
261:     default:
262:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
263:   }
264:   if (ds->state < DS_STATE_CONDENSED) {
265:     DSSetState(ds,DS_STATE_CONDENSED);
266:   }
267:   return(0);
268: }

270: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
271: {
273:   PetscInt       i;
274:   PetscBLASInt   info,n,ld,mout,lwork,*selection;
275:   PetscScalar    *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
276: #if !defined(PETSC_USE_COMPLEX)
277:   PetscBLASInt   *iwork,liwork;
278: #endif

281:   if (!k) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_WRONG,"Must supply argument k");
282:   PetscBLASIntCast(ds->n,&n);
283:   PetscBLASIntCast(ds->ld,&ld);
284: #if !defined(PETSC_USE_COMPLEX)
285:   lwork = n;
286:   liwork = 1;
287:   DSAllocateWork_Private(ds,lwork,0,liwork+n);
288:   work = ds->work;
289:   lwork = ds->lwork;
290:   selection = ds->iwork;
291:   iwork = ds->iwork + n;
292:   liwork = ds->liwork - n;
293: #else
294:   lwork = 1;
295:   DSAllocateWork_Private(ds,lwork,0,n);
296:   work = ds->work;
297:   selection = ds->iwork;
298: #endif
299:   /* Compute the selected eigenvalue to be in the leading position */
300:   DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
301:   PetscArrayzero(selection,n);
302:   for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
303: #if !defined(PETSC_USE_COMPLEX)
304:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
305: #else
306:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
307: #endif
308:   SlepcCheckLapackInfo("trsen",info);
309:   *k = mout;
310:   return(0);
311: }

313: static PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
314: {
316:   PetscScalar    re;
317:   PetscInt       i,j,pos,result;
318:   PetscBLASInt   ifst,ilst,info,n,ld;
319:   PetscScalar    *T = ds->mat[DS_MAT_A];
320:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
321: #if !defined(PETSC_USE_COMPLEX)
322:   PetscScalar    *work,im;
323: #endif

326:   PetscBLASIntCast(ds->n,&n);
327:   PetscBLASIntCast(ds->ld,&ld);
328: #if !defined(PETSC_USE_COMPLEX)
329:   DSAllocateWork_Private(ds,ld,0,0);
330:   work = ds->work;
331: #endif
332:   /* selection sort */
333:   for (i=ds->l;i<n-1;i++) {
334:     re = wr[i];
335: #if !defined(PETSC_USE_COMPLEX)
336:     im = wi[i];
337: #endif
338:     pos = 0;
339:     j=i+1; /* j points to the next eigenvalue */
340: #if !defined(PETSC_USE_COMPLEX)
341:     if (im != 0) j=i+2;
342: #endif
343:     /* find minimum eigenvalue */
344:     for (;j<n;j++) {
345: #if !defined(PETSC_USE_COMPLEX)
346:       SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
347: #else
348:       SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
349: #endif
350:       if (result > 0) {
351:         re = wr[j];
352: #if !defined(PETSC_USE_COMPLEX)
353:         im = wi[j];
354: #endif
355:         pos = j;
356:       }
357: #if !defined(PETSC_USE_COMPLEX)
358:       if (wi[j] != 0) j++;
359: #endif
360:     }
361:     if (pos) {
362:       /* interchange blocks */
363:       PetscBLASIntCast(pos+1,&ifst);
364:       PetscBLASIntCast(i+1,&ilst);
365: #if !defined(PETSC_USE_COMPLEX)
366:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
367: #else
368:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
369: #endif
370:       SlepcCheckLapackInfo("trexc",info);
371:       /* recover original eigenvalues from T matrix */
372:       for (j=i;j<n;j++) {
373:         wr[j] = T[j+j*ld];
374: #if !defined(PETSC_USE_COMPLEX)
375:         if (j<n-1 && T[j+1+j*ld] != 0.0) {
376:           /* complex conjugate eigenvalue */
377:           wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
378:                   PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
379:           wr[j+1] = wr[j];
380:           wi[j+1] = -wi[j];
381:           j++;
382:         } else {
383:           wi[j] = 0.0;
384:         }
385: #endif
386:       }
387:     }
388: #if !defined(PETSC_USE_COMPLEX)
389:     if (wi[i] != 0) i++;
390: #endif
391:   }
392:   return(0);
393: }

395: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
396: {

400:   if (!rr || wr == rr) {
401:     DSSort_NHEP_Total(ds,wr,wi);
402:   } else {
403:     DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
404:   }
405:   return(0);
406: }

408: static PetscErrorCode DSSortWithPermutation_NHEP(DS ds,PetscInt *perm,PetscScalar *wr,PetscScalar *wi)
409: {
411:   PetscInt       i,j,pos,inc=1;
412:   PetscBLASInt   ifst,ilst,info,n,ld;
413:   PetscScalar    *T = ds->mat[DS_MAT_A];
414:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
415: #if !defined(PETSC_USE_COMPLEX)
416:   PetscScalar    *work;
417: #endif

420:   PetscBLASIntCast(ds->n,&n);
421:   PetscBLASIntCast(ds->ld,&ld);
422: #if !defined(PETSC_USE_COMPLEX)
423:   DSAllocateWork_Private(ds,ld,0,0);
424:   work = ds->work;
425: #endif
426:   for (i=ds->l;i<n-1;i++) {
427:     pos = perm[i];
428: #if !defined(PETSC_USE_COMPLEX)
429:     inc = (pos<n-1 && T[pos+1+pos*ld] != 0.0)? 2: 1;
430: #endif
431:     if (pos!=i) {
432: #if !defined(PETSC_USE_COMPLEX)
433:       if ((T[pos+(pos-1)*ld] != 0.0 && perm[i+1]!=pos-1) || (T[pos+1+pos*ld] != 0.0 && perm[i+1]!=pos+1))
434:  SETERRQ1(PETSC_COMM_SELF,1,"Invalid permutation due to a 2x2 block at position %D",pos);
435: #endif

437:       /* interchange blocks */
438:       PetscBLASIntCast(pos+1,&ifst);
439:       PetscBLASIntCast(i+1,&ilst);
440: #if !defined(PETSC_USE_COMPLEX)
441:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
442: #else
443:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
444: #endif
445:       SlepcCheckLapackInfo("trexc",info);
446:       for (j=i+1;j<n;j++) {
447:         if (perm[j]>=i && perm[j]<pos) perm[j]+=inc;
448:       }
449:       perm[i] = i;
450:       if (inc==2) perm[i+1] = i+1;
451:     }
452:     if (inc==2) i++;
453:   }
454:   /* recover original eigenvalues from T matrix */
455:   for (j=ds->l;j<n;j++) {
456:     wr[j] = T[j+j*ld];
457: #if !defined(PETSC_USE_COMPLEX)
458:     if (j<n-1 && T[j+1+j*ld] != 0.0) {
459:       /* complex conjugate eigenvalue */
460:       wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
461:       wr[j+1] = wr[j];
462:       wi[j+1] = -wi[j];
463:       j++;
464:     } else {
465:       wi[j] = 0.0;
466:     }
467: #endif
468:   }
469:   return(0);
470: }

472: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
473: {
475:   PetscInt       i;
476:   PetscBLASInt   n,ld,incx=1;
477:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;

480:   PetscBLASIntCast(ds->n,&n);
481:   PetscBLASIntCast(ds->ld,&ld);
482:   A  = ds->mat[DS_MAT_A];
483:   Q  = ds->mat[DS_MAT_Q];
484:   DSAllocateWork_Private(ds,2*ld,0,0);
485:   x = ds->work;
486:   y = ds->work+ld;
487:   for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
488:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
489:   for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
490:   ds->k = n;
491:   return(0);
492: }

494: /*
495:    Reduce a matrix A to upper Hessenberg form Q'*A*Q, where Q is an orthogonal matrix.
496:    The result overwrites A. Matrix A has the form

498:          [ S | * ]
499:      A = [-------]
500:          [ R | H ]

502:   where S is an upper (quasi-)triangular matrix of order k, H is an upper Hessenberg
503:   matrix of order n-k, and R is all zeros except the first row (the arrow).
504:   The algorithm uses elementary reflectors to annihilate entries in the arrow, and
505:   then proceeds upwards.
506:   If ilo>1, then it is assumed that the first ilo-1 entries of the arrow are zero, and
507:   hence the first ilo-1 rows and columns of Q are set to the identity matrix.

509:   Required workspace is 2*n.
510: */
511: static PetscErrorCode ArrowHessenberg(PetscBLASInt n,PetscBLASInt k,PetscBLASInt ilo,PetscScalar *A,PetscBLASInt lda,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *work)
512: {
513:   PetscBLASInt i,j,n0,m,inc=1,incn=-1;
514:   PetscScalar  t,*v=work,*w=work+n,tau,tauc;

517:   m = n-ilo+1;
518:   for (i=k;i>ilo;i--) {
519:     for (j=0;j<=i-ilo;j++) v[j] = A[i+(i-j-1)*lda];  /* _larfg does not allow negative inc, so use buffer */
520:     n0 = i-ilo+1;
521:     PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,v,v+1,&inc,&tau));
522:     for (j=1;j<=i-ilo;j++) v[j] = PetscConj(v[j]);
523:     tauc = PetscConj(tau);
524:     A[i+(i-1)*lda] = v[0];
525:     v[0] = 1.0;
526:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&i,&n0,v,&incn,&tauc,A+(ilo-1)*lda,&lda,w));
527:     for (j=1;j<=i-ilo;j++) A[i+(i-j-1)*lda] = 0.0;
528:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,A+ilo-1+(ilo-1)*lda,&lda,w));
529:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,Q+ilo-1+(ilo-1)*ldq,&ldq,w));
530:   }
531:   /* trivial in-place transposition of Q */
532:   for (j=ilo-1;j<k;j++) {
533:     for (i=j;i<k;i++) {
534:       t = Q[i+j*ldq];
535:       if (i!=j) Q[i+j*ldq] = PetscConj(Q[j+i*ldq]);
536:       Q[j+i*ldq] = PetscConj(t);
537:     }
538:   }
539:   return(0);
540: }

542: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
543: {
545:   PetscScalar    *work,*tau;
546:   PetscInt       i,j;
547:   PetscBLASInt   ilo,lwork,info,n,k,ld;
548:   PetscScalar    *A = ds->mat[DS_MAT_A];
549:   PetscScalar    *Q = ds->mat[DS_MAT_Q];

552: #if !defined(PETSC_USE_COMPLEX)
554: #endif
555:   PetscBLASIntCast(ds->n,&n);
556:   PetscBLASIntCast(ds->ld,&ld);
557:   PetscBLASIntCast(ds->l+1,&ilo);
558:   PetscBLASIntCast(ds->k,&k);
559:   DSAllocateWork_Private(ds,ld+6*ld,0,0);
560:   tau  = ds->work;
561:   work = ds->work+ld;
562:   lwork = 6*ld;

564:   /* initialize orthogonal matrix */
565:   PetscArrayzero(Q,ld*ld);
566:   for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
567:   if (n==1) { /* quick return */
568:     wr[0] = A[0];
569:     wi[0] = 0.0;
570:     return(0);
571:   }

573:   /* reduce to upper Hessenberg form */
574:   if (ds->state<DS_STATE_INTERMEDIATE) {
575:     if (PETSC_FALSE && k>0) {
576:       ArrowHessenberg(n,k,ilo,A,ld,Q,ld,work);
577:     } else {
578:       PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
579:       SlepcCheckLapackInfo("gehrd",info);
580:       for (j=0;j<n-1;j++) {
581:         for (i=j+2;i<n;i++) {
582:           Q[i+j*ld] = A[i+j*ld];
583:           A[i+j*ld] = 0.0;
584:         }
585:       }
586:       PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
587:       SlepcCheckLapackInfo("orghr",info);
588:     }
589:   }

591:   /* compute the (real) Schur form */
592: #if !defined(PETSC_USE_COMPLEX)
593:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
594:   for (j=0;j<ds->l;j++) {
595:     if (j==n-1 || A[j+1+j*ld] == 0.0) {
596:       /* real eigenvalue */
597:       wr[j] = A[j+j*ld];
598:       wi[j] = 0.0;
599:     } else {
600:       /* complex eigenvalue */
601:       wr[j] = A[j+j*ld];
602:       wr[j+1] = A[j+j*ld];
603:       wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
604:               PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
605:       wi[j+1] = -wi[j];
606:       j++;
607:     }
608:   }
609: #else
610:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
611:   if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
612: #endif
613:   SlepcCheckLapackInfo("hseqr",info);
614:   return(0);
615: }

617: PetscErrorCode DSSynchronize_NHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
618: {
620:   PetscInt       ld=ds->ld,l=ds->l,k;
621:   PetscMPIInt    n,rank,off=0,size,ldn;

624:   k = (ds->n-l)*ld;
625:   if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
626:   if (eigr) k += ds->n-l;
627:   if (eigi) k += ds->n-l;
628:   DSAllocateWork_Private(ds,k,0,0);
629:   PetscMPIIntCast(k*sizeof(PetscScalar),&size);
630:   PetscMPIIntCast(ds->n-l,&n);
631:   PetscMPIIntCast(ld*(ds->n-l),&ldn);
632:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
633:   if (!rank) {
634:     MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
635:     if (ds->state>DS_STATE_RAW) {
636:       MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
637:     }
638:     if (eigr) {
639:       MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
640:     }
641:     if (eigi) {
642:       MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
643:     }
644:   }
645:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
646:   if (rank) {
647:     MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
648:     if (ds->state>DS_STATE_RAW) {
649:       MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
650:     }
651:     if (eigr) {
652:       MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
653:     }
654:     if (eigi) {
655:       MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
656:     }
657:   }
658:   return(0);
659: }

661: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
662: {
663:   PetscInt    i,newn,ld=ds->ld,l=ds->l;
664:   PetscScalar *A;

667:   if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
668:   A = ds->mat[DS_MAT_A];
669:   /* be careful not to break a diagonal 2x2 block */
670:   if (A[n+(n-1)*ld]==0.0) newn = n;
671:   else {
672:     if (n<ds->n-1) newn = n+1;
673:     else newn = n-1;
674:   }
675:   if (ds->extrarow && ds->k==ds->n) {
676:     /* copy entries of extra row to the new position, then clean last row */
677:     for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
678:     for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
679:   }
680:   ds->k = 0;
681:   ds->n = newn;
682:   return(0);
683: }

685: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
686: {
688:   PetscScalar    *work;
689:   PetscReal      *rwork;
690:   PetscBLASInt   *ipiv;
691:   PetscBLASInt   lwork,info,n,ld;
692:   PetscReal      hn,hin;
693:   PetscScalar    *A;

696:   PetscBLASIntCast(ds->n,&n);
697:   PetscBLASIntCast(ds->ld,&ld);
698:   lwork = 8*ld;
699:   DSAllocateWork_Private(ds,lwork,ld,ld);
700:   work  = ds->work;
701:   rwork = ds->rwork;
702:   ipiv  = ds->iwork;

704:   /* use workspace matrix W to avoid overwriting A */
705:   DSAllocateMat_Private(ds,DS_MAT_W);
706:   A = ds->mat[DS_MAT_W];
707:   PetscArraycpy(A,ds->mat[DS_MAT_A],ds->ld*ds->ld);

709:   /* norm of A */
710:   if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
711:   else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);

713:   /* norm of inv(A) */
714:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
715:   SlepcCheckLapackInfo("getrf",info);
716:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
717:   SlepcCheckLapackInfo("getri",info);
718:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

720:   *cond = hn*hin;
721:   return(0);
722: }

724: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
725: {
727:   PetscInt       i,j;
728:   PetscBLASInt   *ipiv,info,n,ld,one=1,ncol;
729:   PetscScalar    *A,*B,*Q,*g=gin,*ghat;
730:   PetscScalar    done=1.0,dmone=-1.0,dzero=0.0;
731:   PetscReal      gnorm;

734:   PetscBLASIntCast(ds->n,&n);
735:   PetscBLASIntCast(ds->ld,&ld);
736:   A  = ds->mat[DS_MAT_A];

738:   if (!recover) {

740:     DSAllocateWork_Private(ds,0,0,ld);
741:     ipiv = ds->iwork;
742:     if (!g) {
743:       DSAllocateWork_Private(ds,ld,0,0);
744:       g = ds->work;
745:     }
746:     /* use workspace matrix W to factor A-tau*eye(n) */
747:     DSAllocateMat_Private(ds,DS_MAT_W);
748:     B = ds->mat[DS_MAT_W];
749:     PetscArraycpy(B,A,ld*ld);

751:     /* Vector g initialy stores b = beta*e_n^T */
752:     PetscArrayzero(g,n);
753:     g[n-1] = beta;

755:     /* g = (A-tau*eye(n))'\b */
756:     for (i=0;i<n;i++) B[i+i*ld] -= tau;
757:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
758:     SlepcCheckLapackInfo("getrf",info);
759:     PetscLogFlops(2.0*n*n*n/3.0);
760:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
761:     SlepcCheckLapackInfo("getrs",info);
762:     PetscLogFlops(2.0*n*n-n);

764:     /* A = A + g*b' */
765:     for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta;

767:   } else { /* recover */

769:     DSAllocateWork_Private(ds,ld,0,0);
770:     ghat = ds->work;
771:     Q    = ds->mat[DS_MAT_Q];

773:     /* g^ = -Q(:,idx)'*g */
774:     PetscBLASIntCast(ds->l+ds->k,&ncol);
775:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));

777:     /* A = A + g^*b' */
778:     for (i=0;i<ds->l+ds->k;i++)
779:       for (j=ds->l;j<ds->l+ds->k;j++)
780:         A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;

782:     /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
783:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
784:   }

786:   /* Compute gamma factor */
787:   if (gamma) {
788:     gnorm = 0.0;
789:     for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
790:     *gamma = PetscSqrtReal(1.0+gnorm);
791:   }
792:   return(0);
793: }

795: SLEPC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
796: {
798:   ds->ops->allocate      = DSAllocate_NHEP;
799:   ds->ops->view          = DSView_NHEP;
800:   ds->ops->vectors       = DSVectors_NHEP;
801:   ds->ops->solve[0]      = DSSolve_NHEP;
802:   ds->ops->sort          = DSSort_NHEP;
803:   ds->ops->sortperm      = DSSortWithPermutation_NHEP;
804:   ds->ops->synchronize   = DSSynchronize_NHEP;
805:   ds->ops->truncate      = DSTruncate_NHEP;
806:   ds->ops->update        = DSUpdateExtraRow_NHEP;
807:   ds->ops->cond          = DSCond_NHEP;
808:   ds->ops->transharm     = DSTranslateHarmonic_NHEP;
809:   return(0);
810: }