Actual source code: dsghiep.c
slepc-3.13.1 2020-04-12
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: PetscErrorCode DSAllocate_GHIEP(DS ds,PetscInt ld)
15: {
19: DSAllocateMat_Private(ds,DS_MAT_A);
20: DSAllocateMat_Private(ds,DS_MAT_B);
21: DSAllocateMat_Private(ds,DS_MAT_Q);
22: DSAllocateMatReal_Private(ds,DS_MAT_T);
23: DSAllocateMatReal_Private(ds,DS_MAT_D);
24: PetscFree(ds->perm);
25: PetscMalloc1(ld,&ds->perm);
26: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
27: return(0);
28: }
30: PetscErrorCode DSSwitchFormat_GHIEP(DS ds,PetscBool tocompact)
31: {
33: PetscReal *T,*S;
34: PetscScalar *A,*B;
35: PetscInt i,n,ld;
38: A = ds->mat[DS_MAT_A];
39: B = ds->mat[DS_MAT_B];
40: T = ds->rmat[DS_MAT_T];
41: S = ds->rmat[DS_MAT_D];
42: n = ds->n;
43: ld = ds->ld;
44: if (tocompact) { /* switch from dense (arrow) to compact storage */
45: PetscArrayzero(T,3*ld);
46: PetscArrayzero(S,ld);
47: for (i=0;i<n-1;i++) {
48: T[i] = PetscRealPart(A[i+i*ld]);
49: T[ld+i] = PetscRealPart(A[i+1+i*ld]);
50: S[i] = PetscRealPart(B[i+i*ld]);
51: }
52: T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
53: S[n-1] = PetscRealPart(B[n-1+(n-1)*ld]);
54: for (i=ds->l;i<ds->k;i++) T[2*ld+i] = PetscRealPart(A[ds->k+i*ld]);
55: } else { /* switch from compact (arrow) to dense storage */
56: PetscArrayzero(A,ld*ld);
57: PetscArrayzero(B,ld*ld);
58: for (i=0;i<n-1;i++) {
59: A[i+i*ld] = T[i];
60: A[i+1+i*ld] = T[ld+i];
61: A[i+(i+1)*ld] = T[ld+i];
62: B[i+i*ld] = S[i];
63: }
64: A[n-1+(n-1)*ld] = T[n-1];
65: B[n-1+(n-1)*ld] = S[n-1];
66: for (i=ds->l;i<ds->k;i++) {
67: A[ds->k+i*ld] = T[2*ld+i];
68: A[i+ds->k*ld] = T[2*ld+i];
69: }
70: }
71: return(0);
72: }
74: PetscErrorCode DSView_GHIEP(DS ds,PetscViewer viewer)
75: {
76: PetscErrorCode ierr;
77: PetscViewerFormat format;
78: PetscInt i,j;
79: PetscReal value;
80: const char *methodname[] = {
81: "QR + Inverse Iteration",
82: "HZ method",
83: "QR"
84: };
85: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
88: PetscViewerGetFormat(viewer,&format);
89: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
90: if (ds->method<nmeth) {
91: PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
92: }
93: return(0);
94: }
95: if (ds->compact) {
96: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
97: if (format == PETSC_VIEWER_ASCII_MATLAB) {
98: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
99: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
100: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
101: for (i=0;i<ds->n;i++) {
102: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,(double)*(ds->rmat[DS_MAT_T]+i));
103: }
104: for (i=0;i<ds->n-1;i++) {
105: if (*(ds->rmat[DS_MAT_T]+ds->ld+i) !=0 && i!=ds->k-1) {
106: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+2,i+1,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
107: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+2,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
108: }
109: }
110: for (i = ds->l;i<ds->k;i++) {
111: if (*(ds->rmat[DS_MAT_T]+2*ds->ld+i)) {
112: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",ds->k+1,i+1,(double)*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
113: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,ds->k+1,(double)*(ds->rmat[DS_MAT_T]+2*ds->ld+i));
114: }
115: }
116: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_A]);
118: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",ds->n,ds->n);
119: PetscViewerASCIIPrintf(viewer,"omega = zeros(%D,3);\n",3*ds->n);
120: PetscViewerASCIIPrintf(viewer,"omega = [\n");
121: for (i=0;i<ds->n;i++) {
122: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,(double)*(ds->rmat[DS_MAT_D]+i));
123: }
124: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(omega);\n",DSMatName[DS_MAT_B]);
126: } else {
127: PetscViewerASCIIPrintf(viewer,"T\n");
128: for (i=0;i<ds->n;i++) {
129: for (j=0;j<ds->n;j++) {
130: if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
131: else if (i==j+1 || j==i+1) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
132: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+2*ds->ld+PetscMin(i,j));
133: else value = 0.0;
134: PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
135: }
136: PetscViewerASCIIPrintf(viewer,"\n");
137: }
138: PetscViewerASCIIPrintf(viewer,"omega\n");
139: for (i=0;i<ds->n;i++) {
140: for (j=0;j<ds->n;j++) {
141: if (i==j) value = *(ds->rmat[DS_MAT_D]+i);
142: else value = 0.0;
143: PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
144: }
145: PetscViewerASCIIPrintf(viewer,"\n");
146: }
147: }
148: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
149: PetscViewerFlush(viewer);
150: } else {
151: DSViewMat(ds,viewer,DS_MAT_A);
152: DSViewMat(ds,viewer,DS_MAT_B);
153: }
154: if (ds->state>DS_STATE_INTERMEDIATE) { DSViewMat(ds,viewer,DS_MAT_Q); }
155: return(0);
156: }
158: static PetscErrorCode DSVectors_GHIEP_Eigen_Some(DS ds,PetscInt *idx,PetscReal *rnorm)
159: {
161: PetscReal b[4],M[4],d1,d2,s1,s2,e;
162: PetscReal scal1,scal2,wr1,wr2,wi,ep,norm;
163: PetscScalar *Q,*X,Y[4],alpha,zeroS = 0.0;
164: PetscInt k;
165: PetscBLASInt two = 2,n_,ld,one=1;
166: #if !defined(PETSC_USE_COMPLEX)
167: PetscBLASInt four=4;
168: #endif
171: X = ds->mat[DS_MAT_X];
172: Q = ds->mat[DS_MAT_Q];
173: k = *idx;
174: PetscBLASIntCast(ds->n,&n_);
175: PetscBLASIntCast(ds->ld,&ld);
176: if (k < ds->n-1) e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ld+k):PetscRealPart(*(ds->mat[DS_MAT_A]+(k+1)+ld*k));
177: else e = 0.0;
178: if (e == 0.0) { /* Real */
179: if (ds->state>=DS_STATE_CONDENSED) {
180: PetscArraycpy(X+k*ld,Q+k*ld,ld);
181: } else {
182: PetscArrayzero(X+k*ds->ld,ds->ld);
183: X[k+k*ds->ld] = 1.0;
184: }
185: if (rnorm) *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
186: } else { /* 2x2 block */
187: if (ds->compact) {
188: s1 = *(ds->rmat[DS_MAT_D]+k);
189: d1 = *(ds->rmat[DS_MAT_T]+k);
190: s2 = *(ds->rmat[DS_MAT_D]+k+1);
191: d2 = *(ds->rmat[DS_MAT_T]+k+1);
192: } else {
193: s1 = PetscRealPart(*(ds->mat[DS_MAT_B]+k*ld+k));
194: d1 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+k*ld));
195: s2 = PetscRealPart(*(ds->mat[DS_MAT_B]+(k+1)*ld+k+1));
196: d2 = PetscRealPart(*(ds->mat[DS_MAT_A]+k+1+(k+1)*ld));
197: }
198: M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
199: b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
200: ep = LAPACKlamch_("S");
201: /* Compute eigenvalues of the block */
202: PetscStackCallBLAS("LAPACKlag2",LAPACKlag2_(M,&two,b,&two,&ep,&scal1,&scal2,&wr1,&wr2,&wi));
203: if (wi==0.0) SETERRQ(PETSC_COMM_SELF,1,"Real block in DSVectors_GHIEP");
204: else { /* Complex eigenvalues */
205: if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
206: wr1 /= scal1;
207: wi /= scal1;
208: #if !defined(PETSC_USE_COMPLEX)
209: if (SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
210: Y[0] = wr1-s2*d2; Y[1] = s2*e; Y[2] = wi; Y[3] = 0.0;
211: } else {
212: Y[0] = s1*e; Y[1] = wr1-s1*d1; Y[2] = 0.0; Y[3] = wi;
213: }
214: norm = BLASnrm2_(&four,Y,&one);
215: norm = 1.0/norm;
216: if (ds->state >= DS_STATE_CONDENSED) {
217: alpha = norm;
218: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&two,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&two,&zeroS,X+k*ld,&ld));
219: if (rnorm) *rnorm = SlepcAbsEigenvalue(X[ds->n-1+k*ld],X[ds->n-1+(k+1)*ld]);
220: } else {
221: PetscArrayzero(X+k*ld,2*ld);
222: X[k*ld+k] = Y[0]*norm;
223: X[k*ld+k+1] = Y[1]*norm;
224: X[(k+1)*ld+k] = Y[2]*norm;
225: X[(k+1)*ld+k+1] = Y[3]*norm;
226: }
227: #else
228: if (SlepcAbs(s1*d1-wr1,wi)<SlepcAbs(s2*d2-wr1,wi)) {
229: Y[0] = PetscCMPLX(wr1-s2*d2,wi);
230: Y[1] = s2*e;
231: } else {
232: Y[0] = s1*e;
233: Y[1] = PetscCMPLX(wr1-s1*d1,wi);
234: }
235: norm = BLASnrm2_(&two,Y,&one);
236: norm = 1.0/norm;
237: if (ds->state >= DS_STATE_CONDENSED) {
238: alpha = norm;
239: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&two,&alpha,ds->mat[DS_MAT_Q]+k*ld,&ld,Y,&one,&zeroS,X+k*ld,&one));
240: if (rnorm) *rnorm = PetscAbsScalar(X[ds->n-1+k*ld]);
241: } else {
242: PetscArrayzero(X+k*ld,2*ld);
243: X[k*ld+k] = Y[0]*norm;
244: X[k*ld+k+1] = Y[1]*norm;
245: }
246: X[(k+1)*ld+k] = PetscConj(X[k*ld+k]);
247: X[(k+1)*ld+k+1] = PetscConj(X[k*ld+k+1]);
248: #endif
249: (*idx)++;
250: }
251: }
252: return(0);
253: }
255: PetscErrorCode DSVectors_GHIEP(DS ds,DSMatType mat,PetscInt *k,PetscReal *rnorm)
256: {
257: PetscInt i;
258: PetscReal e;
262: switch (mat) {
263: case DS_MAT_X:
264: case DS_MAT_Y:
265: if (k) {
266: DSVectors_GHIEP_Eigen_Some(ds,k,rnorm);
267: } else {
268: for (i=0; i<ds->n; i++) {
269: e = (ds->compact)?*(ds->rmat[DS_MAT_T]+ds->ld+i):PetscRealPart(*(ds->mat[DS_MAT_A]+(i+1)+ds->ld*i));
270: if (e == 0.0) { /* real */
271: if (ds->state >= DS_STATE_CONDENSED) {
272: PetscArraycpy(ds->mat[mat]+i*ds->ld,ds->mat[DS_MAT_Q]+i*ds->ld,ds->ld);
273: } else {
274: PetscArrayzero(ds->mat[mat]+i*ds->ld,ds->ld);
275: *(ds->mat[mat]+i+i*ds->ld) = 1.0;
276: }
277: } else {
278: DSVectors_GHIEP_Eigen_Some(ds,&i,rnorm);
279: }
280: }
281: }
282: break;
283: case DS_MAT_U:
284: case DS_MAT_VT:
285: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
286: break;
287: default:
288: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
289: }
290: return(0);
291: }
293: /*
294: Extract the eigenvalues contained in the block-diagonal of the indefinite problem.
295: Only the index range n0..n1 is processed.
296: */
297: PetscErrorCode DSGHIEPComplexEigs(DS ds,PetscInt n0,PetscInt n1,PetscScalar *wr,PetscScalar *wi)
298: {
299: PetscInt k,ld;
300: PetscBLASInt two=2;
301: PetscScalar *A,*B;
302: PetscReal *D,*T;
303: PetscReal b[4],M[4],d1,d2,s1,s2,e;
304: PetscReal scal1,scal2,ep,wr1,wr2,wi1;
307: ld = ds->ld;
308: A = ds->mat[DS_MAT_A];
309: B = ds->mat[DS_MAT_B];
310: D = ds->rmat[DS_MAT_D];
311: T = ds->rmat[DS_MAT_T];
312: for (k=n0;k<n1;k++) {
313: if (k < n1-1) e = (ds->compact)?T[ld+k]:PetscRealPart(A[(k+1)+ld*k]);
314: else e = 0.0;
315: if (e==0.0) { /* real eigenvalue */
316: wr[k] = (ds->compact)?T[k]/D[k]:A[k+k*ld]/B[k+k*ld];
317: #if !defined(PETSC_USE_COMPLEX)
318: wi[k] = 0.0 ;
319: #endif
320: } else { /* diagonal block */
321: if (ds->compact) {
322: s1 = D[k];
323: d1 = T[k];
324: s2 = D[k+1];
325: d2 = T[k+1];
326: } else {
327: s1 = PetscRealPart(B[k*ld+k]);
328: d1 = PetscRealPart(A[k+k*ld]);
329: s2 = PetscRealPart(B[(k+1)*ld+k+1]);
330: d2 = PetscRealPart(A[k+1+(k+1)*ld]);
331: }
332: M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
333: b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
334: ep = LAPACKlamch_("S");
335: /* Compute eigenvalues of the block */
336: PetscStackCallBLAS("LAPACKlag2",LAPACKlag2_(M,&two,b,&two,&ep,&scal1,&scal2,&wr1,&wr2,&wi1));
337: if (scal1<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
338: if (wi1==0.0) { /* Real eigenvalues */
339: if (scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
340: wr[k] = wr1/scal1; wr[k+1] = wr2/scal2;
341: #if !defined(PETSC_USE_COMPLEX)
342: wi[k] = wi[k+1] = 0.0;
343: #endif
344: } else { /* Complex eigenvalues */
345: #if !defined(PETSC_USE_COMPLEX)
346: wr[k] = wr1/scal1;
347: wr[k+1] = wr[k];
348: wi[k] = wi1/scal1;
349: wi[k+1] = -wi[k];
350: #else
351: wr[k] = PetscCMPLX(wr1,wi1)/scal1;
352: wr[k+1] = PetscConj(wr[k]);
353: #endif
354: }
355: k++;
356: }
357: }
358: #if defined(PETSC_USE_COMPLEX)
359: if (wi) {
360: for (k=n0;k<n1;k++) wi[k] = 0.0;
361: }
362: #endif
363: return(0);
364: }
366: PetscErrorCode DSSort_GHIEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
367: {
369: PetscInt n,i,*perm;
370: PetscReal *d,*e,*s;
373: #if !defined(PETSC_USE_COMPLEX)
375: #endif
376: n = ds->n;
377: d = ds->rmat[DS_MAT_T];
378: e = d + ds->ld;
379: s = ds->rmat[DS_MAT_D];
380: DSAllocateWork_Private(ds,ds->ld,ds->ld,0);
381: perm = ds->perm;
382: if (!rr) {
383: rr = wr;
384: ri = wi;
385: }
386: DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_TRUE);
387: if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_TRUE); }
388: PetscArraycpy(ds->work,wr,n);
389: for (i=ds->l;i<n;i++) wr[i] = *(ds->work+perm[i]);
390: #if !defined(PETSC_USE_COMPLEX)
391: PetscArraycpy(ds->work,wi,n);
392: for (i=ds->l;i<n;i++) wi[i] = *(ds->work+perm[i]);
393: #endif
394: PetscArraycpy(ds->rwork,s,n);
395: for (i=ds->l;i<n;i++) s[i] = *(ds->rwork+perm[i]);
396: PetscArraycpy(ds->rwork,d,n);
397: for (i=ds->l;i<n;i++) d[i] = *(ds->rwork+perm[i]);
398: PetscArraycpy(ds->rwork,e,n-1);
399: PetscArrayzero(e+ds->l,n-1-ds->l);
400: for (i=ds->l;i<n-1;i++) {
401: if (perm[i]<n-1) e[i] = *(ds->rwork+perm[i]);
402: }
403: if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE); }
404: DSPermuteColumns_Private(ds,ds->l,n,DS_MAT_Q,perm);
405: return(0);
406: }
408: /*
409: Get eigenvectors with inverse iteration.
410: The system matrix is in Hessenberg form.
411: */
412: PetscErrorCode DSGHIEPInverseIteration(DS ds,PetscScalar *wr,PetscScalar *wi)
413: {
415: PetscInt i,off;
416: PetscBLASInt *select,*infoC,ld,n1,mout,info;
417: PetscScalar *A,*B,*H,*X;
418: PetscReal *s,*d,*e;
419: #if defined(PETSC_USE_COMPLEX)
420: PetscInt j;
421: #endif
424: PetscBLASIntCast(ds->ld,&ld);
425: PetscBLASIntCast(ds->n-ds->l,&n1);
426: DSAllocateWork_Private(ds,ld*ld+2*ld,ld,2*ld);
427: DSAllocateMat_Private(ds,DS_MAT_W);
428: A = ds->mat[DS_MAT_A];
429: B = ds->mat[DS_MAT_B];
430: H = ds->mat[DS_MAT_W];
431: s = ds->rmat[DS_MAT_D];
432: d = ds->rmat[DS_MAT_T];
433: e = d + ld;
434: select = ds->iwork;
435: infoC = ds->iwork + ld;
436: off = ds->l+ds->l*ld;
437: if (ds->compact) {
438: H[off] = d[ds->l]*s[ds->l];
439: H[off+ld] = e[ds->l]*s[ds->l];
440: for (i=ds->l+1;i<ds->n-1;i++) {
441: H[i+(i-1)*ld] = e[i-1]*s[i];
442: H[i+i*ld] = d[i]*s[i];
443: H[i+(i+1)*ld] = e[i]*s[i];
444: }
445: H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
446: H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
447: } else {
448: s[ds->l] = PetscRealPart(B[off]);
449: H[off] = A[off]*s[ds->l];
450: H[off+ld] = A[off+ld]*s[ds->l];
451: for (i=ds->l+1;i<ds->n-1;i++) {
452: s[i] = PetscRealPart(B[i+i*ld]);
453: H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
454: H[i+i*ld] = A[i+i*ld]*s[i];
455: H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
456: }
457: s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
458: H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
459: H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
460: }
461: DSAllocateMat_Private(ds,DS_MAT_X);
462: X = ds->mat[DS_MAT_X];
463: for (i=0;i<n1;i++) select[i] = 1;
464: #if !defined(PETSC_USE_COMPLEX)
465: PetscStackCallBLAS("LAPACKhsein",LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,wi+ds->l,NULL,&ld,X+off,&ld,&n1,&mout,ds->work,NULL,infoC,&info));
466: #else
467: PetscStackCallBLAS("LAPACKhsein",LAPACKhsein_("R","N","N",select,&n1,H+off,&ld,wr+ds->l,NULL,&ld,X+off,&ld,&n1,&mout,ds->work,ds->rwork,NULL,infoC,&info));
469: /* Separate real and imaginary part of complex eigenvectors */
470: for (j=ds->l;j<ds->n;j++) {
471: if (PetscAbsReal(PetscImaginaryPart(wr[j])) > PetscAbsScalar(wr[j])*PETSC_SQRT_MACHINE_EPSILON) {
472: for (i=ds->l;i<ds->n;i++) {
473: X[i+(j+1)*ds->ld] = PetscImaginaryPart(X[i+j*ds->ld]);
474: X[i+j*ds->ld] = PetscRealPart(X[i+j*ds->ld]);
475: }
476: j++;
477: }
478: }
479: #endif
480: SlepcCheckLapackInfo("hsein",info);
481: DSGHIEPOrthogEigenv(ds,DS_MAT_X,wr,wi,PETSC_TRUE);
482: return(0);
483: }
485: /*
486: Undo 2x2 blocks that have real eigenvalues.
487: */
488: PetscErrorCode DSGHIEPRealBlocks(DS ds)
489: {
491: PetscInt i;
492: PetscReal e,d1,d2,s1,s2,ss1,ss2,t,dd,ss;
493: PetscReal maxy,ep,scal1,scal2,snorm;
494: PetscReal *T,*D,b[4],M[4],wr1,wr2,wi;
495: PetscScalar *A,*B,Y[4],oneS = 1.0,zeroS = 0.0;
496: PetscBLASInt m,two=2,ld;
497: PetscBool isreal;
500: PetscBLASIntCast(ds->ld,&ld);
501: PetscBLASIntCast(ds->n-ds->l,&m);
502: A = ds->mat[DS_MAT_A];
503: B = ds->mat[DS_MAT_B];
504: T = ds->rmat[DS_MAT_T];
505: D = ds->rmat[DS_MAT_D];
506: DSAllocateWork_Private(ds,2*m,0,0);
507: for (i=ds->l;i<ds->n-1;i++) {
508: e = (ds->compact)?T[ld+i]:PetscRealPart(A[(i+1)+ld*i]);
509: if (e != 0.0) { /* 2x2 block */
510: if (ds->compact) {
511: s1 = D[i];
512: d1 = T[i];
513: s2 = D[i+1];
514: d2 = T[i+1];
515: } else {
516: s1 = PetscRealPart(B[i*ld+i]);
517: d1 = PetscRealPart(A[i*ld+i]);
518: s2 = PetscRealPart(B[(i+1)*ld+i+1]);
519: d2 = PetscRealPart(A[(i+1)*ld+i+1]);
520: }
521: isreal = PETSC_FALSE;
522: if (s1==s2) { /* apply a Jacobi rotation to compute the eigendecomposition */
523: dd = d1-d2;
524: if (2*PetscAbsReal(e) <= dd) {
525: t = 2*e/dd;
526: t = t/(1 + PetscSqrtReal(1+t*t));
527: } else {
528: t = dd/(2*e);
529: ss = (t>=0)?1.0:-1.0;
530: t = ss/(PetscAbsReal(t)+PetscSqrtReal(1+t*t));
531: }
532: Y[0] = 1/PetscSqrtReal(1 + t*t); Y[3] = Y[0]; /* c */
533: Y[1] = Y[0]*t; Y[2] = -Y[1]; /* s */
534: wr1 = d1+t*e; wr2 = d2-t*e;
535: ss1 = s1; ss2 = s2;
536: isreal = PETSC_TRUE;
537: } else {
538: ss1 = 1.0; ss2 = 1.0,
539: M[0] = d1; M[1] = e; M[2] = e; M[3]= d2;
540: b[0] = s1; b[1] = 0.0; b[2] = 0.0; b[3] = s2;
541: ep = LAPACKlamch_("S");
543: /* Compute eigenvalues of the block */
544: PetscStackCallBLAS("LAPACKlag2",LAPACKlag2_(M,&two,b,&two,&ep,&scal1,&scal2,&wr1,&wr2,&wi));
545: if (wi==0.0) { /* Real eigenvalues */
546: isreal = PETSC_TRUE;
547: if (scal1<ep||scal2<ep) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Nearly infinite eigenvalue");
548: wr1 /= scal1;
549: wr2 /= scal2;
550: if (PetscAbsReal(s1*d1-wr1)<PetscAbsReal(s2*d2-wr1)) {
551: Y[0] = wr1-s2*d2;
552: Y[1] = s2*e;
553: } else {
554: Y[0] = s1*e;
555: Y[1] = wr1-s1*d1;
556: }
557: /* normalize with a signature*/
558: maxy = PetscMax(PetscAbsScalar(Y[0]),PetscAbsScalar(Y[1]));
559: scal1 = PetscRealPart(Y[0])/maxy;
560: scal2 = PetscRealPart(Y[1])/maxy;
561: snorm = scal1*scal1*s1 + scal2*scal2*s2;
562: if (snorm<0) { ss1 = -1.0; snorm = -snorm; }
563: snorm = maxy*PetscSqrtReal(snorm);
564: Y[0] = Y[0]/snorm;
565: Y[1] = Y[1]/snorm;
566: if (PetscAbsReal(s1*d1-wr2)<PetscAbsReal(s2*d2-wr2)) {
567: Y[2] = wr2-s2*d2;
568: Y[3] = s2*e;
569: } else {
570: Y[2] = s1*e;
571: Y[3] = wr2-s1*d1;
572: }
573: maxy = PetscMax(PetscAbsScalar(Y[2]),PetscAbsScalar(Y[3]));
574: scal1 = PetscRealPart(Y[2])/maxy;
575: scal2 = PetscRealPart(Y[3])/maxy;
576: snorm = scal1*scal1*s1 + scal2*scal2*s2;
577: if (snorm<0) { ss2 = -1.0; snorm = -snorm; }
578: snorm = maxy*PetscSqrtReal(snorm); Y[2] = Y[2]/snorm; Y[3] = Y[3]/snorm;
579: }
580: wr1 *= ss1; wr2 *= ss2;
581: }
582: if (isreal) {
583: if (ds->compact) {
584: D[i] = ss1;
585: T[i] = wr1;
586: D[i+1] = ss2;
587: T[i+1] = wr2;
588: T[ld+i] = 0.0;
589: } else {
590: B[i*ld+i] = ss1;
591: A[i*ld+i] = wr1;
592: B[(i+1)*ld+i+1] = ss2;
593: A[(i+1)*ld+i+1] = wr2;
594: A[(i+1)+ld*i] = 0.0;
595: A[i+ld*(i+1)] = 0.0;
596: }
597: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&m,&two,&two,&oneS,ds->mat[DS_MAT_Q]+ds->l+i*ld,&ld,Y,&two,&zeroS,ds->work,&m));
598: PetscArraycpy(ds->mat[DS_MAT_Q]+ds->l+i*ld,ds->work,m);
599: PetscArraycpy(ds->mat[DS_MAT_Q]+ds->l+(i+1)*ld,ds->work+m,m);
600: }
601: i++;
602: }
603: }
604: return(0);
605: }
607: PetscErrorCode DSSolve_GHIEP_QR_II(DS ds,PetscScalar *wr,PetscScalar *wi)
608: {
610: PetscInt i,off;
611: PetscBLASInt n1,ld,one,info,lwork;
612: PetscScalar *H,*A,*B,*Q;
613: PetscReal *d,*e,*s;
614: #if defined(PETSC_USE_COMPLEX)
615: PetscInt j;
616: #endif
619: #if !defined(PETSC_USE_COMPLEX)
621: #endif
622: one = 1;
623: PetscBLASIntCast(ds->n-ds->l,&n1);
624: PetscBLASIntCast(ds->ld,&ld);
625: off = ds->l + ds->l*ld;
626: A = ds->mat[DS_MAT_A];
627: B = ds->mat[DS_MAT_B];
628: Q = ds->mat[DS_MAT_Q];
629: d = ds->rmat[DS_MAT_T];
630: e = ds->rmat[DS_MAT_T] + ld;
631: s = ds->rmat[DS_MAT_D];
632: DSAllocateWork_Private(ds,ld*ld,2*ld,ld*2);
633: lwork = ld*ld;
635: /* Quick return if possible */
636: if (n1 == 1) {
637: for (i=0;i<=ds->l;i++) Q[i+i*ld] = 1.0;
638: DSGHIEPComplexEigs(ds,0,ds->l,wr,wi);
639: if (!ds->compact) {
640: d[ds->l] = PetscRealPart(A[off]);
641: s[ds->l] = PetscRealPart(B[off]);
642: }
643: wr[ds->l] = d[ds->l]/s[ds->l];
644: if (wi) wi[ds->l] = 0.0;
645: return(0);
646: }
647: /* Reduce to pseudotriadiagonal form */
648: DSIntermediate_GHIEP(ds);
650: /* Compute Eigenvalues (QR)*/
651: DSAllocateMat_Private(ds,DS_MAT_W);
652: H = ds->mat[DS_MAT_W];
653: if (ds->compact) {
654: H[off] = d[ds->l]*s[ds->l];
655: H[off+ld] = e[ds->l]*s[ds->l];
656: for (i=ds->l+1;i<ds->n-1;i++) {
657: H[i+(i-1)*ld] = e[i-1]*s[i];
658: H[i+i*ld] = d[i]*s[i];
659: H[i+(i+1)*ld] = e[i]*s[i];
660: }
661: H[ds->n-1+(ds->n-2)*ld] = e[ds->n-2]*s[ds->n-1];
662: H[ds->n-1+(ds->n-1)*ld] = d[ds->n-1]*s[ds->n-1];
663: } else {
664: s[ds->l] = PetscRealPart(B[off]);
665: H[off] = A[off]*s[ds->l];
666: H[off+ld] = A[off+ld]*s[ds->l];
667: for (i=ds->l+1;i<ds->n-1;i++) {
668: s[i] = PetscRealPart(B[i+i*ld]);
669: H[i+(i-1)*ld] = A[i+(i-1)*ld]*s[i];
670: H[i+i*ld] = A[i+i*ld]*s[i];
671: H[i+(i+1)*ld] = A[i+(i+1)*ld]*s[i];
672: }
673: s[ds->n-1] = PetscRealPart(B[ds->n-1+(ds->n-1)*ld]);
674: H[ds->n-1+(ds->n-2)*ld] = A[ds->n-1+(ds->n-2)*ld]*s[ds->n-1];
675: H[ds->n-1+(ds->n-1)*ld] = A[ds->n-1+(ds->n-1)*ld]*s[ds->n-1];
676: }
678: #if !defined(PETSC_USE_COMPLEX)
679: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr+ds->l,wi+ds->l,NULL,&ld,ds->work,&lwork,&info));
680: #else
681: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("E","N",&n1,&one,&n1,H+off,&ld,wr+ds->l,NULL,&ld,ds->work,&lwork,&info));
682: for (i=ds->l;i<ds->n;i++) if (PetscAbsReal(PetscImaginaryPart(wr[i]))<10*PETSC_MACHINE_EPSILON) wr[i] = PetscRealPart(wr[i]);
683: /* Sort to have consecutive conjugate pairs */
684: for (i=ds->l;i<ds->n;i++) {
685: j=i+1;
686: while (j<ds->n && (PetscAbsScalar(wr[i]-PetscConj(wr[j]))>PetscAbsScalar(wr[i])*PETSC_SQRT_MACHINE_EPSILON)) j++;
687: if (j==ds->n) {
688: if (PetscAbsReal(PetscImaginaryPart(wr[i]))<PetscAbsScalar(wr[i])*PETSC_SQRT_MACHINE_EPSILON) wr[i]=PetscRealPart(wr[i]);
689: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Found complex without conjugate pair");
690: } else { /* complex eigenvalue */
691: wr[j] = wr[i+1];
692: if (PetscImaginaryPart(wr[i])<0) wr[i] = PetscConj(wr[i]);
693: wr[i+1] = PetscConj(wr[i]);
694: i++;
695: }
696: }
697: #endif
698: SlepcCheckLapackInfo("hseqr",info);
699: /* Compute Eigenvectors with Inverse Iteration */
700: DSGHIEPInverseIteration(ds,wr,wi);
702: /* Recover eigenvalues from diagonal */
703: DSGHIEPComplexEigs(ds,0,ds->l,wr,wi);
704: #if defined(PETSC_USE_COMPLEX)
705: if (wi) {
706: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
707: }
708: #endif
709: return(0);
710: }
712: PetscErrorCode DSSolve_GHIEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
713: {
715: PetscInt i,j,off,nwu=0,n,lw,lwr,nwru=0;
716: PetscBLASInt n_,ld,info,lwork,ilo,ihi;
717: PetscScalar *H,*A,*B,*Q,*X;
718: PetscReal *d,*s,*scale,nrm,*rcde,*rcdv;
719: #if defined(PETSC_USE_COMPLEX)
720: PetscInt k;
721: #endif
724: #if !defined(PETSC_USE_COMPLEX)
726: #endif
727: n = ds->n-ds->l;
728: PetscBLASIntCast(n,&n_);
729: PetscBLASIntCast(ds->ld,&ld);
730: off = ds->l + ds->l*ld;
731: A = ds->mat[DS_MAT_A];
732: B = ds->mat[DS_MAT_B];
733: Q = ds->mat[DS_MAT_Q];
734: d = ds->rmat[DS_MAT_T];
735: s = ds->rmat[DS_MAT_D];
736: lw = 14*ld+ld*ld;
737: lwr = 7*ld;
738: DSAllocateWork_Private(ds,lw,lwr,0);
739: scale = ds->rwork+nwru;
740: nwru += ld;
741: rcde = ds->rwork+nwru;
742: nwru += ld;
743: rcdv = ds->rwork+nwru;
744: nwru += ld;
745: /* Quick return if possible */
746: if (n_ == 1) {
747: for (i=0;i<=ds->l;i++) Q[i+i*ld] = 1.0;
748: DSGHIEPComplexEigs(ds,0,ds->l,wr,wi);
749: if (!ds->compact) {
750: d[ds->l] = PetscRealPart(A[off]);
751: s[ds->l] = PetscRealPart(B[off]);
752: }
753: wr[ds->l] = d[ds->l]/s[ds->l];
754: if (wi) wi[ds->l] = 0.0;
755: return(0);
756: }
758: /* Form pseudo-symmetric matrix */
759: H = ds->work+nwu;
760: nwu += n*n;
761: PetscArrayzero(H,n*n);
762: if (ds->compact) {
763: for (i=0;i<n-1;i++) {
764: H[i+i*n] = s[ds->l+i]*d[ds->l+i];
765: H[i+1+i*n] = s[ds->l+i+1]*d[ld+ds->l+i];
766: H[i+(i+1)*n] = s[ds->l+i]*d[ld+ds->l+i];
767: }
768: H[n-1+(n-1)*n] = s[ds->l+n-1]*d[ds->l+n-1];
769: for (i=0;i<ds->k-ds->l;i++) {
770: H[ds->k-ds->l+i*n] = s[ds->k]*d[2*ld+ds->l+i];
771: H[i+(ds->k-ds->l)*n] = s[i+ds->l]*d[2*ld+ds->l+i];
772: }
773: } else {
774: for (j=0;j<n;j++) {
775: for (i=0;i<n;i++) H[i+j*n] = B[off+i+i*ld]*A[off+i+j*ld];
776: }
777: }
779: /* Compute eigenpairs */
780: PetscBLASIntCast(lw-nwu,&lwork);
781: DSAllocateMat_Private(ds,DS_MAT_X);
782: X = ds->mat[DS_MAT_X];
783: #if !defined(PETSC_USE_COMPLEX)
784: PetscStackCallBLAS("LAPACKgeevx",LAPACKgeevx_("B","N","V","N",&n_,H,&n_,wr+ds->l,wi+ds->l,NULL,&ld,X+off,&ld,&ilo,&ihi,scale,&nrm,rcde,rcdv,ds->work+nwu,&lwork,NULL,&info));
785: #else
786: PetscStackCallBLAS("LAPACKgeevx",LAPACKgeevx_("B","N","V","N",&n_,H,&n_,wr+ds->l,NULL,&ld,X+off,&ld,&ilo,&ihi,scale,&nrm,rcde,rcdv,ds->work+nwu,&lwork,ds->rwork+nwru,&info));
788: /* Sort to have consecutive conjugate pairs
789: Separate real and imaginary part of complex eigenvectors*/
790: for (i=ds->l;i<ds->n;i++) {
791: j=i+1;
792: while (j<ds->n && (PetscAbsScalar(wr[i]-PetscConj(wr[j]))>PetscAbsScalar(wr[i])*PETSC_SQRT_MACHINE_EPSILON)) j++;
793: if (j==ds->n) {
794: if (PetscAbsReal(PetscImaginaryPart(wr[i]))<PetscAbsScalar(wr[i])*PETSC_SQRT_MACHINE_EPSILON) {
795: wr[i]=PetscRealPart(wr[i]); /* real eigenvalue */
796: for (k=ds->l;k<ds->n;k++) {
797: X[k+i*ds->ld] = PetscRealPart(X[k+i*ds->ld]);
798: }
799: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Found complex without conjugate pair");
800: } else { /* complex eigenvalue */
801: if (j!=i+1) {
802: wr[j] = wr[i+1];
803: PetscArraycpy(X+j*ds->ld,X+(i+1)*ds->ld,ds->ld);
804: }
805: if (PetscImaginaryPart(wr[i])<0) {
806: wr[i] = PetscConj(wr[i]);
807: for (k=ds->l;k<ds->n;k++) {
808: X[k+(i+1)*ds->ld] = -PetscImaginaryPart(X[k+i*ds->ld]);
809: X[k+i*ds->ld] = PetscRealPart(X[k+i*ds->ld]);
810: }
811: } else {
812: for (k=ds->l;k<ds->n;k++) {
813: X[k+(i+1)*ds->ld] = PetscImaginaryPart(X[k+i*ds->ld]);
814: X[k+i*ds->ld] = PetscRealPart(X[k+i*ds->ld]);
815: }
816: }
817: wr[i+1] = PetscConj(wr[i]);
818: i++;
819: }
820: }
821: #endif
822: SlepcCheckLapackInfo("geevx",info);
824: /* Compute real s-orthonormal basis */
825: DSGHIEPOrthogEigenv(ds,DS_MAT_X,wr,wi,PETSC_FALSE);
827: /* Recover eigenvalues from diagonal */
828: DSGHIEPComplexEigs(ds,0,ds->l,wr,wi);
829: #if defined(PETSC_USE_COMPLEX)
830: if (wi) {
831: for (i=ds->l;i<ds->n;i++) wi[i] = 0.0;
832: }
833: #endif
834: return(0);
835: }
837: PetscErrorCode DSSynchronize_GHIEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
838: {
840: PetscInt ld=ds->ld,l=ds->l,k=0,kr=0;
841: PetscMPIInt n,rank,off=0,size,ldn,ld3,ld_;
844: if (ds->compact) kr = 4*ld;
845: else k = 2*(ds->n-l)*ld;
846: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
847: if (eigr) k += (ds->n-l);
848: if (eigi) k += (ds->n-l);
849: DSAllocateWork_Private(ds,k+kr,0,0);
850: PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
851: PetscMPIIntCast(ds->n-l,&n);
852: PetscMPIIntCast(ld*(ds->n-l),&ldn);
853: PetscMPIIntCast(ld*3,&ld3);
854: PetscMPIIntCast(ld,&ld_);
855: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
856: if (!rank) {
857: if (ds->compact) {
858: MPI_Pack(ds->rmat[DS_MAT_T],ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
859: MPI_Pack(ds->rmat[DS_MAT_D],ld_,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
860: } else {
861: MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
862: MPI_Pack(ds->mat[DS_MAT_B]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
863: }
864: if (ds->state>DS_STATE_RAW) {
865: MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
866: }
867: if (eigr) {
868: MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
869: }
870: if (eigi) {
871: MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
872: }
873: }
874: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
875: if (rank) {
876: if (ds->compact) {
877: MPI_Unpack(ds->work,size,&off,ds->rmat[DS_MAT_T],ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
878: MPI_Unpack(ds->work,size,&off,ds->rmat[DS_MAT_D],ld_,MPIU_REAL,PetscObjectComm((PetscObject)ds));
879: } else {
880: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
881: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_B]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
882: }
883: if (ds->state>DS_STATE_RAW) {
884: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
885: }
886: if (eigr) {
887: MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
888: }
889: if (eigi) {
890: MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
891: }
892: }
893: return(0);
894: }
896: PetscErrorCode DSHermitian_GHIEP(DS ds,DSMatType m,PetscBool *flg)
897: {
899: if (m==DS_MAT_A || m==DS_MAT_B) *flg = PETSC_TRUE;
900: else *flg = PETSC_FALSE;
901: return(0);
902: }
904: SLEPC_EXTERN PetscErrorCode DSCreate_GHIEP(DS ds)
905: {
907: ds->ops->allocate = DSAllocate_GHIEP;
908: ds->ops->view = DSView_GHIEP;
909: ds->ops->vectors = DSVectors_GHIEP;
910: ds->ops->solve[0] = DSSolve_GHIEP_QR_II;
911: ds->ops->solve[1] = DSSolve_GHIEP_HZ;
912: ds->ops->solve[2] = DSSolve_GHIEP_QR;
913: ds->ops->sort = DSSort_GHIEP;
914: ds->ops->synchronize = DSSynchronize_GHIEP;
915: ds->ops->hermitian = DSHermitian_GHIEP;
916: return(0);
917: }