PetscInt main(PetscInt argc,char **args) { Mat A,A_dense,B; Vec *evecs; PetscBool flg,TestZHEEV=PETSC_TRUE,TestZHEEVX=PETSC_FALSE,TestZHEGV=PETSC_FALSE,TestZHEGVX=PETSC_FALSE; PetscErrorCode ierr; PetscBool isSymmetric; PetscScalar sigma,*arrayA,*arrayB,*evecs_array=NULL,*work; PetscReal *evals,*rwork; PetscMPIInt size; PetscInt m,i,j,nevs,il,iu,cklvl=2; PetscReal vl,vu,abstol=1.e-8; PetscBLASInt *iwork,*ifail,lwork,lierr,bn; PetscReal tols[2]; PetscInt nzeros[2],nz; PetscReal ratio; PetscScalar v,none = -1.0,sigma2,pfive = 0.5,*xa; PetscRandom rctx; PetscReal h2,sigma1 = 100.0; PetscInt dim,Ii,J,Istart,Iend,n = 6,its,use_random,one=1; PetscInitialize(&argc,&args,(char*)0,help); #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,1,"This example requires complex numbers"); #endif ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!"); ierr = PetscOptionsHasName(NULL, "-test_zheevx", &flg);CHKERRQ(ierr); if (flg) { TestZHEEV = PETSC_FALSE; TestZHEEVX = PETSC_TRUE; } ierr = PetscOptionsHasName(NULL, "-test_zhegv", &flg);CHKERRQ(ierr); if (flg) { TestZHEEV = PETSC_FALSE; TestZHEGV = PETSC_TRUE; } ierr = PetscOptionsHasName(NULL, "-test_zhegvx", &flg);CHKERRQ(ierr); if (flg) { TestZHEEV = PETSC_FALSE; TestZHEGVX = PETSC_TRUE; } ierr = PetscOptionsGetReal(NULL,"-sigma1",&sigma1,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr); dim = n*n; ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,dim,dim);CHKERRQ(ierr); ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = PetscOptionsHasName(NULL,"-norandom",&flg);CHKERRQ(ierr); if (flg) use_random = 0; else use_random = 1; if (use_random) { ierr = PetscRandomCreate(PETSC_COMM_SELF,&rctx);CHKERRQ(ierr); ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr); ierr = PetscRandomSetInterval(rctx,0.0,PETSC_i);CHKERRQ(ierr); } else { sigma2 = 10.0*PETSC_i; } h2 = 1.0/((n+1)*(n+1)); for (Ii=0; Ii<dim; Ii++) { v = -1.0; i = Ii/n; j = Ii - i*n; if (i>0) { J = Ii-n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); } if (i<n-1) { J = Ii+n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); } if (j>0) { J = Ii-1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); } if (j<n-1) { J = Ii+1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); } if (use_random) {ierr = PetscRandomGetValue(rctx,&sigma2);CHKERRQ(ierr);} v = 4.0 - sigma1*h2; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); } /* make A complex Hermitian */ v = sigma2*h2; Ii = 0; J = 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr); v = -sigma2*h2; ierr = MatSetValues(A,1,&J,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); if (use_random) {ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);} ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); m = n = dim; /* Check whether A is symmetric */ ierr = PetscOptionsHasName(NULL, "-check_symmetry", &flg);CHKERRQ(ierr); if (flg) { Mat Trans; ierr = MatTranspose(A,MAT_INITIAL_MATRIX, &Trans); ierr = MatEqual(A, Trans, &isSymmetric); if (!isSymmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"A must be symmetric"); ierr = MatDestroy(&Trans);CHKERRQ(ierr); } /* Convert aij matrix to MatSeqDense for LAPACK */ ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr); if (flg) { ierr = MatDuplicate(A,MAT_COPY_VALUES,&A_dense);CHKERRQ(ierr); } else { ierr = MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&A_dense);CHKERRQ(ierr); } ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,dim,dim);CHKERRQ(ierr); ierr = MatSetType(B,MATSEQDENSE);CHKERRQ(ierr); ierr = MatSetFromOptions(B);CHKERRQ(ierr); v = 1.0; for (Ii=0; Ii<dim; Ii++) { ierr = MatSetValues(B,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); } /* Solve standard eigenvalue problem: A*x = lambda*x */ /*===================================================*/ ierr = PetscBLASIntCast(2*n,&lwork);CHKERRQ(ierr); ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscReal),&evals);CHKERRQ(ierr); ierr = PetscMalloc(lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); ierr = MatDenseGetArray(A_dense,&arrayA);CHKERRQ(ierr); if (TestZHEEV) { /* test zheev() */ printf(" LAPACKsyev: compute all %d eigensolutions...\n",m); ierr = PetscMalloc((3*n-2)*sizeof(PetscReal),&rwork);CHKERRQ(ierr); LAPACKsyev_("V","U",&bn,arrayA,&bn,evals,work,&lwork,rwork,&lierr); ierr = PetscFree(rwork);CHKERRQ(ierr); evecs_array = arrayA; nevs = m; il =1; iu=m; } if (TestZHEEVX) { il = 1; ierr = PetscBLASIntCast((0.2*m),&iu);CHKERRQ(ierr); printf(" LAPACKsyevx: compute %d to %d-th eigensolutions...\n",il,iu); ierr = PetscMalloc((m*n+1)*sizeof(PetscScalar),&evecs_array);CHKERRQ(ierr); ierr = PetscMalloc((7*n+1)*sizeof(PetscReal),&rwork);CHKERRQ(ierr); ierr = PetscMalloc((5*n+1)*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); ierr = PetscMalloc((n+1)*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr); /* in the case "I", vl and vu are not referenced */ vl = 0.0; vu = 8.0; LAPACKsyevx_("V","I","U",&bn,arrayA,&bn,&vl,&vu,&il,&iu,&abstol,&nevs,evals,evecs_array,&n,work,&lwork,rwork,iwork,ifail,&lierr); ierr = PetscFree(iwork);CHKERRQ(ierr); ierr = PetscFree(ifail);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); } if (TestZHEGV) { printf(" LAPACKsygv: compute all %d eigensolutions...\n",m); ierr = PetscMalloc((3*n+1)*sizeof(PetscReal),&rwork);CHKERRQ(ierr); ierr = MatDenseGetArray(B,&arrayB);CHKERRQ(ierr); LAPACKsygv_(&one,"V","U",&bn,arrayA,&bn,arrayB,&bn,evals,work,&lwork,rwork,&lierr); evecs_array = arrayA; nevs = m; il = 1; iu=m; ierr = MatDenseRestoreArray(B,&arrayB);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); } if (TestZHEGVX) { il = 1; ierr = PetscBLASIntCast((0.2*m),&iu);CHKERRQ(ierr); printf(" LAPACKsygv: compute %d to %d-th eigensolutions...\n",il,iu); ierr = PetscMalloc((m*n+1)*sizeof(PetscScalar),&evecs_array);CHKERRQ(ierr); ierr = PetscMalloc((6*n+1)*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr); ifail = iwork + 5*n; ierr = PetscMalloc((7*n+1)*sizeof(PetscReal),&rwork);CHKERRQ(ierr); ierr = MatDenseGetArray(B,&arrayB);CHKERRQ(ierr); vl = 0.0; vu = 8.0; LAPACKsygvx_(&one,"V","I","U",&bn,arrayA,&bn,arrayB,&bn,&vl,&vu,&il,&iu,&abstol,&nevs,evals,evecs_array,&n,work,&lwork,rwork,iwork,ifail,&lierr); ierr = MatDenseRestoreArray(B,&arrayB);CHKERRQ(ierr); ierr = PetscFree(iwork);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); } ierr = MatDenseRestoreArray(A_dense,&arrayA);CHKERRQ(ierr); if (nevs <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED, "nev=%d, no eigensolution has found", nevs); /* View evals */ ierr = PetscOptionsHasName(NULL, "-eig_view", &flg);CHKERRQ(ierr); if (flg) { printf(" %d evals: \n",nevs); for (i=0; i<nevs; i++) printf("%d %G\n",i+il,evals[i]); } /* Check residuals and orthogonality */ ierr = PetscMalloc((nevs+1)*sizeof(Vec),&evecs);CHKERRQ(ierr); for (i=0; i<nevs; i++) { ierr = VecCreate(PETSC_COMM_SELF,&evecs[i]);CHKERRQ(ierr); ierr = VecSetSizes(evecs[i],PETSC_DECIDE,n);CHKERRQ(ierr); ierr = VecSetFromOptions(evecs[i]);CHKERRQ(ierr); ierr = VecPlaceArray(evecs[i],evecs_array+i*n);CHKERRQ(ierr); } tols[0] = 1.e-8; tols[1] = 1.e-8; ierr = CkEigenSolutions(cklvl,A,il-1,iu-1,evals,evecs,tols);CHKERRQ(ierr); for (i=0; i<nevs; i++) { ierr = VecDestroy(&evecs[i]);CHKERRQ(ierr);} ierr = PetscFree(evecs);CHKERRQ(ierr); /* Free work space. */ if (TestZHEEVX || TestZHEGVX) { ierr = PetscFree(evecs_array);CHKERRQ(ierr); } ierr = PetscFree(evals);CHKERRQ(ierr); ierr = PetscFree(work);CHKERRQ(ierr); ierr = MatDestroy(&A_dense);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
int getQ1(Mat *pA){ int i,j; PetscInt m,n; PetscBLASInt M,N,K,lda,ldwork,info; PetscScalar *tau,*work; PetscInt worksize; PetscErrorCode ierr; ierr = MatGetSize(*pA,&m,&n);CHKERRQ(ierr); PetscBLASIntCast(m,&M); PetscBLASIntCast(n,&N); worksize=m; PetscBLASIntCast(worksize,&ldwork); PetscMalloc1(m, &tau);//worksize,&work); PetscMalloc1(worksize,&work); K = N; /*full rank*/ lda = M ; //N - row domain M - col domain //ierr = PetscPrintf (PETSC_COMM_SELF,"L72\n");CHKERRQ(ierr); //ierr = MatView(*pA,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); //ierr = PetscPrintf (PETSC_COMM_SELF,"L74\n");CHKERRQ(ierr); PetscScalar *v;//[4]={0}; PetscInt *Is; //[4]={0,1,2,3}; PetscInt nC;// = 4; PetscReal arr[10*4]; for(i=0; i<10; i++){ ierr = MatGetRow(*pA,i,&nC,(const PetscInt **)&Is,(const PetscScalar **)&v); CHKERRQ(ierr); //ierr = PetscPrintf (PETSC_COMM_SELF,"get row %d %d\n", i, nC);CHKERRQ(ierr); for(j=0; j<nC; j++){ arr[i+10*Is[j]]=v[j]; } ierr = MatRestoreRow(*pA,i,&nC,(const PetscInt**)&Is,(const PetscScalar**)&v); CHKERRQ(ierr); } //ierr = PetscPrintf (PETSC_COMM_SELF,"new L72\n");CHKERRQ(ierr); //ierr = MatView(*pA,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); //ierr = PetscPrintf (PETSC_COMM_SELF,"new L74\n");CHKERRQ(ierr); //for(i=0;i<40; i++){ // ierr = PetscPrintf (PETSC_COMM_SELF,"arr[%d] = %f\n",i,arr[i]);CHKERRQ(ierr); //} /* Do QR */ PetscFPTrapPush(PETSC_FP_TRAP_OFF); LAPACKgeqrf_(&M,&N,arr,&lda,tau,work,&ldwork,&info); PetscFPTrapPop(); if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRF error"); /*Extract an explicit representation of Q */ //PetscMemcpy(Q,A,mstride*n*sizeof(PetscScalar)); LAPACKungqr_(&M,&N,&K,arr,&lda,tau,work,&ldwork,&info); if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR/xUNGQR error"); //ierr = PetscPrintf (PETSC_COMM_SELF,"\nWe store Q1 in arr:\n");CHKERRQ(ierr); //for(i=0;i<40; i++){ // ierr = PetscPrintf (PETSC_COMM_SELF,"arr[%d] = %f\n",i,arr[i]);CHKERRQ(ierr); //} for (i=0; i<10; i++) { for(j=0; j<4; j++) { ierr = MatSetValues(*pA,1,&i,1,&j,&arr[i+j*10],INSERT_VALUES);CHKERRQ(ierr); } } ierr = MatAssemblyBegin(*pA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(*pA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); //ierr = PetscPrintf (PETSC_COMM_SELF,"\nThe Q1 we are going to return is\n");CHKERRQ(ierr); //ierr = MatView(*pA,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); //ierr = PetscFree(arr);CHKERRQ(ierr); return 0; }
PetscErrorCode DSFunction_EXP_NHEP_PADE(DS ds) { #if defined(PETSC_MISSING_LAPACK_GESV) || defined(SLEPC_MISSING_LAPACK_LANGE) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/LANGE - Lapack routines are unavailable"); #else PetscErrorCode ierr; PetscBLASInt n,ld,ld2,*ipiv,info,inc=1; PetscInt j,k,odd; const PetscInt p=MAX_PADE; PetscReal c[MAX_PADE+1],s; PetscScalar scale,mone=-1.0,one=1.0,two=2.0,zero=0.0; PetscScalar *A,*A2,*Q,*P,*W,*aux; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); ld2 = ld*ld; ierr = DSAllocateWork_Private(ds,0,ld,ld);CHKERRQ(ierr); ipiv = ds->iwork; if (!ds->mat[DS_MAT_W]) { ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); } if (!ds->mat[DS_MAT_Z]) { ierr = DSAllocateMat_Private(ds,DS_MAT_Z);CHKERRQ(ierr); } A = ds->mat[DS_MAT_A]; A2 = ds->mat[DS_MAT_Z]; Q = ds->mat[DS_MAT_Q]; P = ds->mat[DS_MAT_F]; W = ds->mat[DS_MAT_W]; /* Pade' coefficients */ c[0] = 1.0; for (k=1;k<=p;k++) { c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k)); } /* Scaling */ s = LAPACKlange_("I",&n,&n,A,&ld,ds->rwork); if (s>0.5) { s = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0)) + 2); scale = PetscPowReal(2.0,(-1)*s); PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&scale,A,&inc)); } /* Horner evaluation */ PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,A,&ld,A,&ld,&zero,A2,&ld)); ierr = PetscMemzero(Q,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(P,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr); for (j=0;j<n;j++) { Q[j+j*ld] = c[p]; P[j+j*ld] = c[p-1]; } odd = 1; for (k=p-1;k>0;k--) { if (odd==1) { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld)); aux = Q; Q = W; W = aux; for (j=0;j<n;j++) Q[j+j*ld] = Q[j+j*ld] + c[k-1]; } else { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld)); aux = P; P = W; W = aux; for (j=0;j<n;j++) P[j+j*ld] = P[j+j*ld] + c[k-1]; } odd = 1-odd; } if (odd==1) { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A,&ld,&zero,W,&ld)); aux = Q; Q = W; W = aux; PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc)); PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info)); PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc)); for (j=0;j<n;j++) P[j+j*ld] = P[j+j*ld] + 1.0; PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc)); } else { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A,&ld,&zero,W,&ld)); aux = P; P = W; W = aux; PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc)); PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info)); PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc)); for (j=0;j<n;j++) P[j+j*ld] = P[j+j*ld] + 1.0; } for (k=1;k<=s;k++) { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld)); ierr = PetscMemcpy(P,W,ld2*sizeof(PetscScalar));CHKERRQ(ierr); } if (P!=ds->mat[DS_MAT_F]) { ierr = PetscMemcpy(ds->mat[DS_MAT_F],P,ld2*sizeof(PetscScalar));CHKERRQ(ierr); } PetscFunctionReturn(0); #endif }
PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig) { #if defined(PETSC_HAVE_ESSL) KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; PetscErrorCode ierr; PetscInt n = gmres->it + 1,N = gmres->max_k + 1; PetscInt i,*perm; PetscScalar *R = gmres->Rsvd; PetscScalar *cwork = R + N*N,sdummy; PetscReal *work,*realpart = gmres->Dsvd ; PetscBLASInt zero = 0,bn,bN,idummy,lwork; PetscFunctionBegin; bn = PetscBLASIntCast(n); bN = PetscBLASIntCast(N); idummy = -1; /* unused */ lwork = PetscBLASIntCast(5*N); if (nmax < n) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); *neig = n; if (!n) { PetscFunctionReturn(0); } /* copy R matrix to work space */ ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar)); CHKERRQ(ierr); /* compute eigenvalues */ /* for ESSL version need really cwork of length N (complex), 2N (real); already at least 5N of space has been allocated */ ierr = PetscMalloc(lwork*sizeof(PetscReal),&work); CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF); CHKERRQ(ierr); LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork); ierr = PetscFPTrapPop(); CHKERRQ(ierr); ierr = PetscFree(work); CHKERRQ(ierr); /* For now we stick with the convention of storing the real and imaginary components of evalues separately. But is this what we really want? */ ierr = PetscMalloc(n*sizeof(PetscInt),&perm); CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) for (i=0; i<n; i++) { realpart[i] = cwork[2*i]; perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm); CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = cwork[2*perm[i]]; c[i] = cwork[2*perm[i]+1]; } #else for (i=0; i<n; i++) { realpart[i] = PetscRealPart(cwork[i]); perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm); CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(cwork[perm[i]]); c[i] = PetscImaginaryPart(cwork[perm[i]]); } #endif ierr = PetscFree(perm); CHKERRQ(ierr); #elif defined(PETSC_MISSING_LAPACK_GEEV) PetscFunctionBegin; SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #elif !defined(PETSC_USE_COMPLEX) KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; PetscErrorCode ierr; PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; PetscBLASInt bn, bN, lwork, idummy, lierr; PetscScalar *R = gmres->Rsvd,*work = R + N*N; PetscScalar *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy; PetscFunctionBegin; bn = PetscBLASIntCast(n); bN = PetscBLASIntCast(N); lwork = PetscBLASIntCast(5*N); idummy = PetscBLASIntCast(N); if (nmax < n) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); *neig = n; if (!n) { PetscFunctionReturn(0); } /* copy R matrix to work space */ ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar)); CHKERRQ(ierr); /* compute eigenvalues */ ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF); CHKERRQ(ierr); LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); ierr = PetscFPTrapPop(); CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt),&perm); CHKERRQ(ierr); for (i=0; i<n; i++) { perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm); CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = realpart[perm[i]]; c[i] = imagpart[perm[i]]; } ierr = PetscFree(perm); CHKERRQ(ierr); #else KSP_GMRES *gmres = (KSP_GMRES*)ksp->data; PetscErrorCode ierr; PetscInt n = gmres->it + 1,N = gmres->max_k + 1,i,*perm; PetscScalar *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy; PetscBLASInt bn,bN,lwork,idummy,lierr; PetscFunctionBegin; bn = PetscBLASIntCast(n); bN = PetscBLASIntCast(N); lwork = PetscBLASIntCast(5*N); idummy = PetscBLASIntCast(N); if (nmax < n) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues"); *neig = n; if (!n) { PetscFunctionReturn(0); } /* copy R matrix to work space */ ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar)); CHKERRQ(ierr); /* compute eigenvalues */ ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF); CHKERRQ(ierr); LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr); if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine"); ierr = PetscFPTrapPop(); CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt),&perm); CHKERRQ(ierr); for (i=0; i<n; i++) { perm[i] = i; } for (i=0; i<n; i++) { r[i] = PetscRealPart(eigs[i]); } ierr = PetscSortRealWithPermutation(n,r,perm); CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(eigs[perm[i]]); c[i] = PetscImaginaryPart(eigs[perm[i]]); } ierr = PetscFree(perm); CHKERRQ(ierr); #endif PetscFunctionReturn(0); }
PetscErrorCode KSPAGMRESRodvec(KSP ksp, PetscInt nvec, PetscScalar *In, Vec Out) { KSP_AGMRES *agmres = (KSP_AGMRES*) ksp->data; MPI_Comm comm; PetscScalar *Qloc = agmres->Qloc; PetscScalar *sgn = agmres->sgn; PetscScalar *tloc = agmres->tloc; PetscMPIInt rank = agmres->rank; PetscMPIInt First = agmres->First, Last = agmres->Last; PetscMPIInt Iright = agmres->Iright, Ileft = agmres->Ileft; PetscScalar *y, *zloc; PetscErrorCode ierr; PetscInt nloc,d, len, i, j; PetscBLASInt bnvec,pas,blen; PetscInt dpt; PetscReal c, s, rho, zp, zq, yd, tt; MPI_Status status; PetscFunctionBegin; ierr = PetscBLASIntCast(nvec,&bnvec);CHKERRQ(ierr); ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr); pas = 1; ierr = VecGetLocalSize(VEC_V(0), &nloc);CHKERRQ(ierr); ierr = PetscMalloc1(nvec, &y);CHKERRQ(ierr); ierr = PetscMemcpy(y, In, nvec*sizeof(PetscScalar));CHKERRQ(ierr); ierr = VecGetArray(Out, &zloc);CHKERRQ(ierr); if (rank == Last) { for (i = 0; i < nvec; i++) y[i] = sgn[i] * y[i]; } for (i = 0; i < nloc; i++) zloc[i] = 0.0; if (agmres->size == 1) PetscStackCallBLAS("BLAScopy",BLAScopy_(&bnvec, y, &pas, &(zloc[0]), &pas)); else { for (d = nvec - 1; d >= 0; d--) { if (rank == First) { ierr = MPI_Recv(&(zloc[d]), 1, MPIU_SCALAR, Iright, agmres->tag, comm, &status);CHKERRQ(ierr); } else { for (j = nvec - 1; j >= d + 1; j--) { i = j - d; ierr = KSPAGMRESRoddecGivens(&c, &s, &(Qloc[j * nloc + i]), 0);CHKERRQ(ierr); zp = zloc[i-1]; zq = zloc[i]; zloc[i-1] = c * zp + s * zq; zloc[i] = -s * zp + c * zq; } ierr = KSPAGMRESRoddecGivens(&c, &s, &(Qloc[d * nloc]), 0);CHKERRQ(ierr); if (rank == Last) { zp = y[d]; zq = zloc[0]; y[d] = c * zp + s * zq; zloc[0] = -s * zp + c * zq; ierr = MPI_Send(&(y[d]), 1, MPIU_SCALAR, Ileft, agmres->tag, comm);CHKERRQ(ierr); } else { ierr = MPI_Recv(&yd, 1, MPIU_SCALAR, Iright, agmres->tag, comm, &status);CHKERRQ(ierr); zp = yd; zq = zloc[0]; yd = c * zp + s * zq; zloc[0] = -s * zp + c * zq; ierr = MPI_Send(&yd, 1, MPIU_SCALAR, Ileft, agmres->tag, comm);CHKERRQ(ierr); } } } } for (j = nvec - 1; j >= 0; j--) { dpt = j * nloc + j; if (tloc[j] != 0.0) { len = nloc - j; ierr = PetscBLASIntCast(len,&blen);CHKERRQ(ierr); rho = Qloc[dpt]; Qloc[dpt] = 1.0; tt = tloc[j] * (BLASdot_(&blen, &(Qloc[dpt]), &pas, &(zloc[j]), &pas)); PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blen, &tt, &(Qloc[dpt]), &pas, &(zloc[j]), &pas)); Qloc[dpt] = rho; } } ierr = VecRestoreArray(Out, &zloc);CHKERRQ(ierr); ierr = PetscFree(y);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma) { #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable"); #else PetscErrorCode ierr; PetscInt i,j; PetscBLASInt *ipiv,info,n,ld,one=1,ncol; PetscScalar *A,*B,*Q,*g=gin,*ghat; PetscScalar done=1.0,dmone=-1.0,dzero=0.0; PetscReal gnorm; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); A = ds->mat[DS_MAT_A]; if (!recover) { ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr); ipiv = ds->iwork; if (!g) { ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr); g = ds->work; } /* use workspace matrix W to factor A-tau*eye(n) */ ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); B = ds->mat[DS_MAT_W]; ierr = PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);CHKERRQ(ierr); /* Vector g initialy stores b = beta*e_n^T */ ierr = PetscMemzero(g,n*sizeof(PetscScalar));CHKERRQ(ierr); g[n-1] = beta; /* g = (A-tau*eye(n))'\b */ for (i=0;i<n;i++) B[i+i*ld] -= tau; PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info)); if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization"); if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization"); ierr = PetscLogFlops(2.0*n*n*n/3.0);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info)); if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve"); ierr = PetscLogFlops(2.0*n*n-n);CHKERRQ(ierr); /* A = A + g*b' */ for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta; } else { /* recover */ PetscValidPointer(g,6); ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr); ghat = ds->work; Q = ds->mat[DS_MAT_Q]; /* g^ = -Q(:,idx)'*g */ ierr = PetscBLASIntCast(ds->l+ds->k,&ncol);CHKERRQ(ierr); PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one)); /* A = A + g^*b' */ for (i=0;i<ds->l+ds->k;i++) for (j=ds->l;j<ds->l+ds->k;j++) A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta; /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */ PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one)); } /* Compute gamma factor */ if (gamma) { gnorm = 0.0; for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i])); *gamma = PetscSqrtReal(1.0+gnorm); } PetscFunctionReturn(0); #endif }
PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left) { #if defined(SLEPC_MISSING_LAPACK_TREVC) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable"); #else PetscErrorCode ierr; PetscInt i; PetscBLASInt mm=1,mout,info,ld,n,inc = 1; PetscScalar tmp,done=1.0,zero=0.0; PetscReal norm; PetscBool iscomplex = PETSC_FALSE; PetscBLASInt *select; PetscScalar *A = ds->mat[DS_MAT_A]; PetscScalar *Q = ds->mat[DS_MAT_Q]; PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X]; PetscScalar *Y; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr); select = ds->iwork; for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE; /* Compute k-th eigenvector Y of A */ Y = X+(*k)*ld; select[*k] = (PetscBLASInt)PETSC_TRUE; #if !defined(PETSC_USE_COMPLEX) if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE; mm = iscomplex? 2: 1; if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE; ierr = DSAllocateWork_Private(ds,3*ld,0,0);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info)); #else ierr = DSAllocateWork_Private(ds,2*ld,ld,0);CHKERRQ(ierr); PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info)); #endif if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info); if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments"); /* accumulate and normalize eigenvectors */ if (ds->state>=DS_STATE_CONDENSED) { ierr = PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc)); #if !defined(PETSC_USE_COMPLEX) if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc)); #endif norm = BLASnrm2_(&n,Y,&inc); #if !defined(PETSC_USE_COMPLEX) if (iscomplex) { tmp = BLASnrm2_(&n,Y+ld,&inc); norm = SlepcAbsEigenvalue(norm,tmp); } #endif tmp = 1.0 / norm; PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc)); #if !defined(PETSC_USE_COMPLEX) if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc)); #endif } /* set output arguments */ if (iscomplex) (*k)++; if (rnorm) { if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]); else *rnorm = PetscAbsScalar(Y[n-1]); } PetscFunctionReturn(0); #endif }
static PetscErrorCode PCSetUp_SVD(PC pc) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates."); #else PC_SVD *jac = (PC_SVD*)pc->data; PetscErrorCode ierr; PetscScalar *a,*u,*v,*d,*work; PetscBLASInt nb,lwork; PetscInt i,n; PetscMPIInt size; PetscFunctionBegin; ierr = MatDestroy(&jac->A);CHKERRQ(ierr); ierr = MPI_Comm_size(((PetscObject)pc->pmat)->comm,&size);CHKERRQ(ierr); if (size > 1) { Mat redmat; PetscInt M; ierr = MatGetSize(pc->pmat,&M,NULL);CHKERRQ(ierr); ierr = MatGetRedundantMatrix(pc->pmat,size,PETSC_COMM_SELF,M,MAT_INITIAL_MATRIX,&redmat);CHKERRQ(ierr); ierr = MatConvert(redmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr); ierr = MatDestroy(&redmat);CHKERRQ(ierr); } else { ierr = MatConvert(pc->pmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr); } if (!jac->diag) { /* assume square matrices */ ierr = MatGetVecs(jac->A,&jac->diag,&jac->work);CHKERRQ(ierr); } if (!jac->U) { ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->U);CHKERRQ(ierr); ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->Vt);CHKERRQ(ierr); } ierr = MatGetSize(pc->pmat,&n,NULL);CHKERRQ(ierr); ierr = PetscBLASIntCast(n,&nb);CHKERRQ(ierr); lwork = 5*nb; ierr = PetscMalloc(lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); ierr = MatDenseGetArray(jac->A,&a);CHKERRQ(ierr); ierr = MatDenseGetArray(jac->U,&u);CHKERRQ(ierr); ierr = MatDenseGetArray(jac->Vt,&v);CHKERRQ(ierr); ierr = VecGetArray(jac->diag,&d);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) { PetscBLASInt lierr; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&nb,&nb,a,&nb,d,u,&nb,v,&nb,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"gesv() error %d",lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for complex"); #endif ierr = MatDenseRestoreArray(jac->A,&a);CHKERRQ(ierr); ierr = MatDenseRestoreArray(jac->U,&u);CHKERRQ(ierr); ierr = MatDenseRestoreArray(jac->Vt,&v);CHKERRQ(ierr); for (i=n-1; i>=0; i--) if (PetscRealPart(d[i]) > jac->zerosing) break; jac->nzero = n-1-i; if (jac->monitor) { ierr = PetscViewerASCIIAddTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: condition number %14.12e, %D of %D singular values are (nearly) zero\n",(double)PetscRealPart(d[0]/d[n-1]),jac->nzero,n);CHKERRQ(ierr); if (n >= 10) { /* print 5 smallest and 5 largest */ ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: smallest singular values: %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[n-1]),(double)PetscRealPart(d[n-2]),(double)PetscRealPart(d[n-3]),(double)PetscRealPart(d[n-4]),(double)PetscRealPart(d[n-5]));CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: largest singular values : %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[4]),(double)PetscRealPart(d[3]),(double)PetscRealPart(d[2]),(double)PetscRealPart(d[1]),(double)PetscRealPart(d[0]));CHKERRQ(ierr); } else { /* print all singular values */ char buf[256],*p; size_t left = sizeof(buf),used; PetscInt thisline; for (p=buf,i=n-1,thisline=1; i>=0; i--,thisline++) { ierr = PetscSNPrintfCount(p,left," %14.12e",&used,(double)PetscRealPart(d[i]));CHKERRQ(ierr); left -= used; p += used; if (thisline > 4 || i==0) { ierr = PetscViewerASCIIPrintf(jac->monitor," SVD: singular values:%s\n",buf);CHKERRQ(ierr); p = buf; thisline = 0; } } } ierr = PetscViewerASCIISubtractTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr); } ierr = PetscInfo2(pc,"Largest and smallest singular values %14.12e %14.12e\n",(double)PetscRealPart(d[0]),(double)PetscRealPart(d[n-1]));CHKERRQ(ierr); for (i=0; i<n-jac->nzero; i++) d[i] = 1.0/d[i]; for (; i<n; i++) d[i] = 0.0; if (jac->essrank > 0) for (i=0; i<n-jac->nzero-jac->essrank; i++) d[i] = 0.0; /* Skip all but essrank eigenvalues */ ierr = PetscInfo1(pc,"Number of zero or nearly singular values %D\n",jac->nzero);CHKERRQ(ierr); ierr = VecRestoreArray(jac->diag,&d);CHKERRQ(ierr); #if defined(foo) { PetscViewer viewer; ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,"joe",FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); ierr = MatView(jac->A,viewer);CHKERRQ(ierr); ierr = MatView(jac->U,viewer);CHKERRQ(ierr); ierr = MatView(jac->Vt,viewer);CHKERRQ(ierr); ierr = VecView(jac->diag,viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); PetscFunctionReturn(0); #endif }
PetscErrorCode SNESNGMRESFormCombinedSolution_Private(SNES snes,PetscInt ivec,PetscInt l,Vec XM,Vec FM,PetscReal fMnorm,Vec X,Vec XA,Vec FA) { SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; PetscInt i,j; Vec *Fdot = ngmres->Fdot; Vec *Xdot = ngmres->Xdot; PetscScalar *beta = ngmres->beta; PetscScalar *xi = ngmres->xi; PetscScalar alph_total = 0.; PetscErrorCode ierr; PetscReal nu; Vec Y = snes->work[2]; PetscBool changed_y,changed_w; PetscFunctionBegin; nu = fMnorm*fMnorm; /* construct the right hand side and xi factors */ if (l > 0) { ierr = VecMDotBegin(FM,l,Fdot,xi);CHKERRQ(ierr); ierr = VecMDotBegin(Fdot[ivec],l,Fdot,beta);CHKERRQ(ierr); ierr = VecMDotEnd(FM,l,Fdot,xi);CHKERRQ(ierr); ierr = VecMDotEnd(Fdot[ivec],l,Fdot,beta);CHKERRQ(ierr); for (i = 0; i < l; i++) { Q(i,ivec) = beta[i]; Q(ivec,i) = beta[i]; } } else { Q(0,0) = ngmres->fnorms[ivec]*ngmres->fnorms[ivec]; } for (i = 0; i < l; i++) beta[i] = nu - xi[i]; /* construct h */ for (j = 0; j < l; j++) { for (i = 0; i < l; i++) { H(i,j) = Q(i,j)-xi[i]-xi[j]+nu; } } if (l == 1) { /* simply set alpha[0] = beta[0] / H[0, 0] */ if (H(0,0) != 0.) beta[0] = beta[0]/H(0,0); else beta[0] = 0.; } else { #if defined(PETSC_MISSING_LAPACK_GELSS) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"NGMRES with LS requires the LAPACK GELSS routine."); #else ierr = PetscBLASIntCast(l,&ngmres->m);CHKERRQ(ierr); ierr = PetscBLASIntCast(l,&ngmres->n);CHKERRQ(ierr); ngmres->info = 0; ngmres->rcond = -1.; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) PetscStackCallBLAS("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,ngmres->rwork,&ngmres->info)); #else PetscStackCallBLAS("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,&ngmres->info)); #endif ierr = PetscFPTrapPop();CHKERRQ(ierr); if (ngmres->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS"); if (ngmres->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge"); #endif } for (i=0; i<l; i++) { if (PetscIsInfOrNanScalar(beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output"); } alph_total = 0.; for (i = 0; i < l; i++) alph_total += beta[i]; ierr = VecCopy(XM,XA);CHKERRQ(ierr); ierr = VecScale(XA,1.-alph_total);CHKERRQ(ierr); ierr = VecMAXPY(XA,l,beta,Xdot);CHKERRQ(ierr); /* check the validity of the step */ ierr = VecCopy(XA,Y);CHKERRQ(ierr); ierr = VecAXPY(Y,-1.0,X);CHKERRQ(ierr); ierr = SNESLineSearchPostCheck(snes->linesearch,X,Y,XA,&changed_y,&changed_w);CHKERRQ(ierr); if (!ngmres->approxfunc) { if (snes->pc && snes->pcside == PC_LEFT) { ierr = SNESApplyNPC(snes,XA,NULL,FA);CHKERRQ(ierr); } else { ierr =SNESComputeFunction(snes,XA,FA);CHKERRQ(ierr); } } else { ierr = VecCopy(FM,FA);CHKERRQ(ierr); ierr = VecScale(FA,1.-alph_total);CHKERRQ(ierr); ierr = VecMAXPY(FA,l,beta,Fdot);CHKERRQ(ierr); } PetscFunctionReturn(0); }
static PetscErrorCode KSPAGMRESBuildSoln(KSP ksp,PetscInt it) { KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data; PetscErrorCode ierr; PetscInt max_k = agmres->max_k; /* Size of the non-augmented Krylov basis */ PetscInt i, j; PetscInt r = agmres->r; /* current number of augmented eigenvectors */ PetscBLASInt KspSize; PetscBLASInt lC; PetscBLASInt N; PetscBLASInt ldH = N + 1; PetscBLASInt lwork; PetscBLASInt info, nrhs = 1; PetscFunctionBegin; ierr = PetscBLASIntCast(KSPSIZE,&KspSize);CHKERRQ(ierr); ierr = PetscBLASIntCast(4 * (KspSize+1),&lwork);CHKERRQ(ierr); ierr = PetscBLASIntCast(KspSize+1,&lC);CHKERRQ(ierr); ierr = PetscBLASIntCast(MAXKSPSIZE + 1,&N);CHKERRQ(ierr); ierr = PetscBLASIntCast(N + 1,&ldH);CHKERRQ(ierr); /* Save a copy of the Hessenberg matrix */ for (j = 0; j < N-1; j++) { for (i = 0; i < N; i++) { *HS(i,j) = *H(i,j); } } /* QR factorize the Hessenberg matrix */ #if defined(PETSC_MISSING_LAPACK_GEQRF) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&lC, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->work, &lwork, &info)); if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGEQRF INFO=%d", info); #endif /* Update the right hand side of the least square problem */ ierr = PetscMemzero(agmres->nrs, N*sizeof(PetscScalar));CHKERRQ(ierr); agmres->nrs[0] = ksp->rnorm; #if defined(PETSC_MISSING_LAPACK_ORMQR) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKormqr",LAPACKormqr_("L", "T", &lC, &nrhs, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->nrs, &N, agmres->work, &lwork, &info)); if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XORMQR INFO=%d",info); #endif ksp->rnorm = PetscAbsScalar(agmres->nrs[KspSize]); /* solve the least-square problem */ #if defined(PETSC_MISSING_LAPACK_TRTRS) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRTRS - Lapack routine is unavailable."); #else PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U", "N", "N", &KspSize, &nrhs, agmres->hh_origin, &ldH, agmres->nrs, &N, &info)); if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XTRTRS INFO=%d",info); #endif /* Accumulate the correction to the solution of the preconditioned problem in VEC_TMP */ ierr = VecZeroEntries(VEC_TMP);CHKERRQ(ierr); ierr = VecMAXPY(VEC_TMP, max_k, agmres->nrs, &VEC_V(0));CHKERRQ(ierr); if (!agmres->DeflPrecond) { ierr = VecMAXPY(VEC_TMP, r, &agmres->nrs[max_k], agmres->U);CHKERRQ(ierr); } if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) { ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); ierr = VecCopy(VEC_TMP_MATOP, VEC_TMP);CHKERRQ(ierr); } ierr = KSPUnwindPreconditioner(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr); /* add the solution to the previous one */ ierr = VecAXPY(ksp->vec_sol, 1.0, VEC_TMP);CHKERRQ(ierr); PetscFunctionReturn(0); }
int main(int argc, char **args) { Mat A, L; AppCtx ctx; PetscViewer viewer; PetscErrorCode ierr; ierr = PetscInitialize(&argc, &args, (char *) 0, help);CHKERRQ(ierr); ierr = ProcessOptions(&ctx);CHKERRQ(ierr); /* Load matrix */ ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, ctx.matFilename, FILE_MODE_READ, &viewer);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr); ierr = MatLoad(A, viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); /* Make graph Laplacian from matrix */ ierr = MatLaplacian(A, 1.0e-12, &L);CHKERRQ(ierr); /* Check Laplacian */ PetscReal norm; Vec x, y; ierr = MatGetVecs(L, &x, NULL);CHKERRQ(ierr); ierr = VecDuplicate(x, &y);CHKERRQ(ierr); ierr = VecSet(x, 1.0);CHKERRQ(ierr); ierr = MatMult(L, x, y);CHKERRQ(ierr); ierr = VecNorm(y, NORM_INFINITY, &norm);CHKERRQ(ierr); if (norm > 1.0e-10) SETERRQ(PetscObjectComm((PetscObject) y), PETSC_ERR_PLIB, "Invalid graph Laplacian"); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&y);CHKERRQ(ierr); /* Compute Fiedler vector, and perhaps more vectors */ Mat LD; PetscScalar *a, *realpart, *imagpart, *eigvec, *work, sdummy; PetscBLASInt bn, bN, lwork, lierr, idummy; PetscInt n, i; ierr = MatConvert(L, MATDENSE, MAT_INITIAL_MATRIX, &LD);CHKERRQ(ierr); ierr = MatGetLocalSize(LD, &n, NULL);CHKERRQ(ierr); ierr = MatDenseGetArray(LD, &a);CHKERRQ(ierr); ierr = PetscBLASIntCast(n, &bn);CHKERRQ(ierr); ierr = PetscBLASIntCast(n, &bN);CHKERRQ(ierr); ierr = PetscBLASIntCast(5*n,&lwork);CHKERRQ(ierr); ierr = PetscBLASIntCast(1,&idummy);CHKERRQ(ierr); ierr = PetscMalloc4(n,PetscScalar,&realpart,n,PetscScalar,&imagpart,n*n,PetscScalar,&eigvec,lwork,PetscScalar,&work);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); PetscStackCall("LAPACKgeev", LAPACKgeev_("N","V",&bn,a,&bN,realpart,imagpart,&sdummy,&idummy,eigvec,&bN,work,&lwork,&lierr)); if (lierr) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine %d", (int) lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); PetscReal *r, *c; PetscInt *perm; ierr = PetscMalloc3(n,PetscInt,&perm,n,PetscReal,&r,n,PetscReal,&c);CHKERRQ(ierr); for (i = 0; i < n; ++i) perm[i] = i; ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i = 0; i < n; ++i) { r[i] = realpart[perm[i]]; c[i] = imagpart[perm[i]]; } for (i = 0; i < n; ++i) { realpart[i] = r[i]; imagpart[i] = c[i]; } /* Output spectrum */ if (ctx.showSpectrum) { ierr = PetscPrintf(PETSC_COMM_SELF, "Spectrum\n");CHKERRQ(ierr); for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: Real %g Imag %g\n", i, realpart[i], imagpart[i]);CHKERRQ(ierr);} } /* Check lowest eigenvalue and eigenvector */ PetscInt evInd = perm[0]; if ((realpart[0] > 1.0e-12) || (imagpart[0] > 1.0e-12)) SETERRQ(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have lowest eigenvalue 0"); for (i = 0; i < n; ++i) { if (fabs(eigvec[evInd*n+i] - eigvec[evInd*n+0]) > 1.0e-10) SETERRQ3(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have constant lowest eigenvector ev_%d %g != ev_0 %g", i, eigvec[evInd*n+i], eigvec[evInd*n+0]); } /* Output Fiedler vector */ evInd = perm[1]; if (ctx.showFiedler) { ierr = PetscPrintf(PETSC_COMM_SELF, "Fiedler vector, Re{ev} %g\n", realpart[1]);CHKERRQ(ierr); for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: %g\n", i, eigvec[evInd*n+i]);CHKERRQ(ierr);} } /* Construct Fiedler partition */ IS fIS, fIS2; PetscInt *fperm, *fperm2, pos, neg, posSize = 0; ierr = PetscMalloc(n * sizeof(PetscInt), &fperm);CHKERRQ(ierr); for (i = 0; i < n; ++i) { if (eigvec[evInd*n+i] > 0.0) ++posSize; } ierr = PetscMalloc(n * sizeof(PetscInt), &fperm2);CHKERRQ(ierr); for (i = 0; i < n; ++i) fperm[i] = i; ierr = PetscSortRealWithPermutation(n, &eigvec[evInd*n], fperm);CHKERRQ(ierr); for (i = 0; i < n; ++i) fperm2[n-1-i] = fperm[i]; for (i = 0, pos = 0, neg = posSize; i < n; ++i) { if (eigvec[evInd*n+i] > 0.0) fperm[pos++] = i; else fperm[neg++] = i; } ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm, PETSC_OWN_POINTER, &fIS);CHKERRQ(ierr); ierr = ISSetPermutation(fIS);CHKERRQ(ierr); ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm2, PETSC_OWN_POINTER, &fIS2);CHKERRQ(ierr); ierr = ISSetPermutation(fIS2);CHKERRQ(ierr); ierr = PetscFree3(perm,r,c);CHKERRQ(ierr); ierr = PetscFree4(realpart,imagpart,eigvec,work);CHKERRQ(ierr); ierr = MatDenseRestoreArray(LD, &a);CHKERRQ(ierr); ierr = MatDestroy(&LD);CHKERRQ(ierr); ierr = MatDestroy(&L);CHKERRQ(ierr); /* Permute matrix */ Mat AR, AR2; ierr = MatPermute(A, fIS, fIS, &AR);CHKERRQ(ierr); ierr = MatView(A, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(AR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = ISDestroy(&fIS);CHKERRQ(ierr); ierr = MatPermute(A, fIS2, fIS2, &AR2);CHKERRQ(ierr); ierr = MatView(AR2, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = ISDestroy(&fIS2);CHKERRQ(ierr); ierr = MatDestroy(&AR);CHKERRQ(ierr); AR = AR2; /* Extract blocks and reorder */ Mat AP, AN, APR, ANR; IS ispos, isneg, rpermpos, cpermpos, rpermneg, cpermneg; PetscInt bw, bwr; ierr = ISCreateStride(PETSC_COMM_SELF, posSize, 0, 1, &ispos);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF, n - posSize, posSize, 1, &isneg);CHKERRQ(ierr); ierr = MatGetSubMatrix(AR, ispos, ispos, MAT_INITIAL_MATRIX, &AP);CHKERRQ(ierr); ierr = MatGetSubMatrix(AR, isneg, isneg, MAT_INITIAL_MATRIX, &AN);CHKERRQ(ierr); ierr = ISDestroy(&ispos);CHKERRQ(ierr); ierr = ISDestroy(&isneg);CHKERRQ(ierr); ierr = MatGetOrdering(AP, ctx.matOrdtype, &rpermpos, &cpermpos);CHKERRQ(ierr); ierr = MatGetOrdering(AN, ctx.matOrdtype, &rpermneg, &cpermneg);CHKERRQ(ierr); ierr = MatPermute(AP, rpermpos, cpermpos, &APR);CHKERRQ(ierr); ierr = MatComputeBandwidth(AP, 0.0, &bw);CHKERRQ(ierr); ierr = MatComputeBandwidth(APR, 0.0, &bwr);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced positive bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr); ierr = MatPermute(AN, rpermneg, cpermneg, &ANR);CHKERRQ(ierr); ierr = MatComputeBandwidth(AN, 0.0, &bw);CHKERRQ(ierr); ierr = MatComputeBandwidth(ANR, 0.0, &bwr);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced negative bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr); ierr = MatView(AP, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(APR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(AN, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(ANR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); /* Reorder original matrix */ Mat ARR; IS rperm, cperm; PetscInt *idx; const PetscInt *cidx; ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr); ierr = ISGetIndices(rpermpos, &cidx);CHKERRQ(ierr); for (i = 0; i < posSize; ++i) idx[i] = cidx[i]; ierr = ISRestoreIndices(rpermpos, &cidx);CHKERRQ(ierr); ierr = ISGetIndices(rpermneg, &cidx);CHKERRQ(ierr); for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize; ierr = ISRestoreIndices(rpermneg, &cidx);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &rperm);CHKERRQ(ierr); ierr = ISSetPermutation(rperm);CHKERRQ(ierr); ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr); ierr = ISGetIndices(cpermpos, &cidx);CHKERRQ(ierr); for (i = 0; i < posSize; ++i) idx[i] = cidx[i]; ierr = ISRestoreIndices(cpermpos, &cidx);CHKERRQ(ierr); ierr = ISGetIndices(cpermneg, &cidx);CHKERRQ(ierr); for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize; ierr = ISRestoreIndices(cpermneg, &cidx);CHKERRQ(ierr); ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &cperm);CHKERRQ(ierr); ierr = ISSetPermutation(cperm);CHKERRQ(ierr); ierr = MatPermute(AR, rperm, cperm, &ARR);CHKERRQ(ierr); ierr = MatView(ARR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = ISDestroy(&rperm);CHKERRQ(ierr); ierr = ISDestroy(&cperm);CHKERRQ(ierr); ierr = ISDestroy(&rpermpos);CHKERRQ(ierr); ierr = ISDestroy(&cpermpos);CHKERRQ(ierr); ierr = ISDestroy(&rpermneg);CHKERRQ(ierr); ierr = ISDestroy(&cpermneg);CHKERRQ(ierr); ierr = MatDestroy(&AP);CHKERRQ(ierr); ierr = MatDestroy(&AN);CHKERRQ(ierr); ierr = MatDestroy(&APR);CHKERRQ(ierr); ierr = MatDestroy(&ANR);CHKERRQ(ierr); /* Compare bands */ Mat B, BR; ierr = MatCreateSubMatrixBanded(A, 50, 0.95, &B);CHKERRQ(ierr); ierr = MatCreateSubMatrixBanded(ARR, 50, 0.95, &BR);CHKERRQ(ierr); ierr = MatView(B, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatView(BR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr); ierr = MatDestroy(&B);CHKERRQ(ierr); ierr = MatDestroy(&BR);CHKERRQ(ierr); /* Cleanup */ ierr = MatDestroy(&ARR);CHKERRQ(ierr); ierr = MatDestroy(&AR);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }
static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals, PetscInt step) { PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2; PetscInt *iptr, *msg_list, *msg_size, **msg_nodes; PetscInt *pw, *list, *size, **nodes; MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out; MPI_Status status; PetscBLASInt i1 = 1,dstep; PetscErrorCode ierr; PetscFunctionBegin; /* strip and load s */ msg_list =list = gs->pair_list; msg_size =size = gs->msg_sizes; msg_nodes=nodes = gs->node_list; iptr=pw = gs->pw_elm_list; dptr1=dptr3 = gs->pw_vals; msg_ids_in = ids_in = gs->msg_ids_in; msg_ids_out = ids_out = gs->msg_ids_out; dptr2 = gs->out; in1=in2 = gs->in; /* post the receives */ /* msg_nodes=nodes; */ do { /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the second one *list and do list++ afterwards */ ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->gs_comm, msg_ids_in);CHKERRQ(ierr); list++;msg_ids_in++; in1 += *size++ *step; } while (*++msg_nodes); msg_nodes=nodes; /* load gs values into in out gs buffers */ while (*iptr >= 0) { rvec_copy(dptr3,in_vals + *iptr*step,step); dptr3+=step; iptr++; } /* load out buffers and post the sends */ while ((iptr = *msg_nodes++)) { dptr3 = dptr2; while (*iptr >= 0) { rvec_copy(dptr2,dptr1 + *iptr*step,step); dptr2+=step; iptr++; } ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+my_id, gs->gs_comm, msg_ids_out);CHKERRQ(ierr); msg_size++; msg_list++;msg_ids_out++; } /* tree */ if (gs->max_left_over) {gs_gop_vec_tree_plus(gs,in_vals,step);} /* process the received data */ msg_nodes=nodes; while ((iptr = *nodes++)){ PetscScalar d1 = 1.0; /* Should I check the return value of MPI_Wait() or status? */ /* Can this loop be replaced by a call to MPI_Waitall()? */ ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr); ids_in++; while (*iptr >= 0) { dstep = PetscBLASIntCast(step); BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1); in2+=step; iptr++; } } /* replace vals */ while (*pw >= 0) { rvec_copy(in_vals + *pw*step,dptr1,step); dptr1+=step; pw++; } /* clear isend message handles */ /* This changed for clarity though it could be the same */ while (*msg_nodes++) /* Should I check the return value of MPI_Wait() or status? */ /* Can this loop be replaced by a call to MPI_Waitall()? */ {ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr);ids_out++;} PetscFunctionReturn(0); }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL *) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat, epsilon, abstol; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscTruth bUpdateX; PetscTruth bBombed = PETSC_FALSE; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscErrorCode ierr; PetscFunctionBegin; if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL"); if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL"); if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL"); /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ldMZ = PetscBLASIntCast(ell+1); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0); CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0); CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB); CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT); CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit); CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; KSPLogResidualHistory(ksp, zeta); KSPMonitor(ksp, ksp->its, zeta); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1); CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]); CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM); CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma); CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; bBombed = PETSC_TRUE; break; } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]); CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]); CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM); CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0); CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectTakeAccess(ksp); CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectGrantAccess(ksp); CHKERRQ(ierr); break; } } if (bBombed==PETSC_TRUE) break; /* Polynomial part */ for(i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]); CHKERRQ(ierr); } /* Symmetrize MZa */ for(i = 0; i <= bcgsl->ell; ++i) { for(j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar)); CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell); AY0c[0] = -1; LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1); LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr); if (ierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; bBombed = PETSC_TRUE; break; } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar)); CHKERRQ(ierr); LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione); kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = sqrt(kappa0); kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione); kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione); if (kappa1<0) kappa1 = -kappa1; kappa1 = sqrt(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) { omega = AY0c[h]; } if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1); CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1); CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) { AY0c[i] *= -1.0; } ierr = VecNorm(VVR[0], NORM_2, &zeta); CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM); CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB); CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX); CHKERRQ(ierr); ierr = VecSet(VX,0.0); CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB); CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR); CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP); CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
/**************************************xxt.c***********************************/ static PetscInt xxt_generate(xxt_ADT xxt_handle) { PetscInt i,j,k,idex; PetscInt dim, col; PetscScalar *u, *uu, *v, *z, *w, alpha, alpha_w; PetscInt *segs; PetscInt op[] = {GL_ADD,0}; PetscInt off, len; PetscScalar *x_ptr; PetscInt *iptr, flag; PetscInt start =0, end, work; PetscInt op2[] = {GL_MIN,0}; PCTFS_gs_ADT PCTFS_gs_handle; PetscInt *nsep, *lnsep, *fo; PetscInt a_n =xxt_handle->mvi->n; PetscInt a_m =xxt_handle->mvi->m; PetscInt *a_local2global=xxt_handle->mvi->local2global; PetscInt level; PetscInt xxt_nnz=0, xxt_max_nnz=0; PetscInt n, m; PetscInt *col_sz, *col_indices, *stages; PetscScalar **col_vals, *x; PetscInt n_global; PetscInt xxt_zero_nnz =0; PetscInt xxt_zero_nnz_0=0; PetscBLASInt i1 = 1,dlen; PetscScalar dm1 = -1.0; PetscErrorCode ierr; n = xxt_handle->mvi->n; nsep = xxt_handle->info->nsep; lnsep = xxt_handle->info->lnsep; fo = xxt_handle->info->fo; end = lnsep[0]; level = xxt_handle->level; PCTFS_gs_handle = xxt_handle->mvi->PCTFS_gs_handle; /* is there a null space? */ /* LATER add in ability to detect null space by checking alpha */ for (i=0, j=0; i<=level; i++) j+=nsep[i]; m = j-xxt_handle->ns; if (m!=j) { ierr = PetscPrintf(PETSC_COMM_WORLD,"xxt_generate() :: null space exists %D %D %D\n",m,j,xxt_handle->ns);CHKERRQ(ierr); } /* get and initialize storage for x local */ /* note that x local is nxm and stored by columns */ col_sz = (PetscInt*) malloc(m*sizeof(PetscInt)); col_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt)); col_vals = (PetscScalar**) malloc(m*sizeof(PetscScalar*)); for (i=j=0; i<m; i++, j+=2) { col_indices[j]=col_indices[j+1]=col_sz[i]=-1; col_vals[i] = NULL; } col_indices[j]=-1; /* size of separators for each sub-hc working from bottom of tree to top */ /* this looks like nsep[]=segments */ stages = (PetscInt*) malloc((level+1)*sizeof(PetscInt)); segs = (PetscInt*) malloc((level+1)*sizeof(PetscInt)); PCTFS_ivec_zero(stages,level+1); PCTFS_ivec_copy(segs,nsep,level+1); for (i=0; i<level; i++) segs[i+1] += segs[i]; stages[0] = segs[0]; /* temporary vectors */ u = (PetscScalar*) malloc(n*sizeof(PetscScalar)); z = (PetscScalar*) malloc(n*sizeof(PetscScalar)); v = (PetscScalar*) malloc(a_m*sizeof(PetscScalar)); uu = (PetscScalar*) malloc(m*sizeof(PetscScalar)); w = (PetscScalar*) malloc(m*sizeof(PetscScalar)); /* extra nnz due to replication of vertices across separators */ for (i=1, j=0; i<=level; i++) j+=nsep[i]; /* storage for sparse x values */ n_global = xxt_handle->info->n_global; xxt_max_nnz = (PetscInt)(2.5*PetscPowReal(1.0*n_global,1.6667) + j*n/2)/PCTFS_num_nodes; x = (PetscScalar*) malloc(xxt_max_nnz*sizeof(PetscScalar)); xxt_nnz = 0; /* LATER - can embed next sep to fire in gs */ /* time to make the donuts - generate X factor */ for (dim=i=j=0; i<m; i++) { /* time to move to the next level? */ while (i==segs[dim]) { if (dim==level) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"dim about to exceed level\n"); stages[dim++]=i; end +=lnsep[dim]; } stages[dim]=i; /* which column are we firing? */ /* i.e. set v_l */ /* use new seps and do global min across hc to determine which one to fire */ (start<end) ? (col=fo[start]) : (col=INT_MAX); PCTFS_giop_hc(&col,&work,1,op2,dim); /* shouldn't need this */ if (col==INT_MAX) { ierr = PetscInfo(0,"hey ... col==INT_MAX??\n");CHKERRQ(ierr); continue; } /* do I own it? I should */ PCTFS_rvec_zero(v,a_m); if (col==fo[start]) { start++; idex=PCTFS_ivec_linear_search(col, a_local2global, a_n); if (idex!=-1) { v[idex] = 1.0; j++; } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"NOT FOUND!\n"); } else { idex=PCTFS_ivec_linear_search(col, a_local2global, a_m); if (idex!=-1) v[idex] = 1.0; } /* perform u = A.v_l */ PCTFS_rvec_zero(u,n); do_matvec(xxt_handle->mvi,v,u); /* uu = X^T.u_l (local portion) */ /* technically only need to zero out first i entries */ /* later turn this into an XXT_solve call ? */ PCTFS_rvec_zero(uu,m); x_ptr=x; iptr = col_indices; for (k=0; k<i; k++) { off = *iptr++; len = *iptr++; ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr); PetscStackCallBLAS("BLASdot",uu[k] = BLASdot_(&dlen,u+off,&i1,x_ptr,&i1)); x_ptr+=len; } /* uu = X^T.u_l (comm portion) */ PCTFS_ssgl_radd (uu, w, dim, stages); /* z = X.uu */ PCTFS_rvec_zero(z,n); x_ptr=x; iptr = col_indices; for (k=0; k<i; k++) { off = *iptr++; len = *iptr++; ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr); PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,&uu[k],x_ptr,&i1,z+off,&i1)); x_ptr+=len; } /* compute v_l = v_l - z */ PCTFS_rvec_zero(v+a_n,a_m-a_n); ierr = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr); PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,&dm1,z,&i1,v,&i1)); /* compute u_l = A.v_l */ if (a_n!=a_m) PCTFS_gs_gop_hc(PCTFS_gs_handle,v,"+\0",dim); PCTFS_rvec_zero(u,n); do_matvec(xxt_handle->mvi,v,u); /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - local portion */ ierr = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr); PetscStackCallBLAS("BLASdot",alpha = BLASdot_(&dlen,u,&i1,v,&i1)); /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - comm portion */ PCTFS_grop_hc(&alpha, &alpha_w, 1, op, dim); alpha = (PetscScalar) PetscSqrtReal((PetscReal)alpha); /* check for small alpha */ /* LATER use this to detect and determine null space */ if (PetscAbsScalar(alpha)<1.0e-14) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"bad alpha! %g\n",alpha); /* compute v_l = v_l/sqrt(alpha) */ PCTFS_rvec_scale(v,1.0/alpha,n); /* add newly generated column, v_l, to X */ flag = 1; off=len=0; for (k=0; k<n; k++) { if (v[k]!=0.0) { len=k; if (flag) { off=k; flag=0; } } } len -= (off-1); if (len>0) { if ((xxt_nnz+len)>xxt_max_nnz) { ierr = PetscInfo(0,"increasing space for X by 2x!\n");CHKERRQ(ierr); xxt_max_nnz *= 2; x_ptr = (PetscScalar*) malloc(xxt_max_nnz*sizeof(PetscScalar)); PCTFS_rvec_copy(x_ptr,x,xxt_nnz); free(x); x = x_ptr; x_ptr+=xxt_nnz; } xxt_nnz += len; PCTFS_rvec_copy(x_ptr,v+off,len); /* keep track of number of zeros */ if (dim) { for (k=0; k<len; k++) { if (x_ptr[k]==0.0) xxt_zero_nnz++; } } else { for (k=0; k<len; k++) { if (x_ptr[k]==0.0) xxt_zero_nnz_0++; } } col_indices[2*i] = off; col_sz[i] = col_indices[2*i+1] = len; col_vals[i] = x_ptr; } else { col_indices[2*i] = 0; col_sz[i] = col_indices[2*i+1] = 0; col_vals[i] = x_ptr; } } /* close off stages for execution phase */ while (dim!=level) { stages[dim++] = i; ierr = PetscInfo2(0,"disconnected!!! dim(%D)!=level(%D)\n",dim,level);CHKERRQ(ierr); } stages[dim]=i; xxt_handle->info->n = xxt_handle->mvi->n; xxt_handle->info->m = m; xxt_handle->info->nnz = xxt_nnz; xxt_handle->info->max_nnz = xxt_max_nnz; xxt_handle->info->msg_buf_sz = stages[level]-stages[0]; xxt_handle->info->solve_uu = (PetscScalar*) malloc(m*sizeof(PetscScalar)); xxt_handle->info->solve_w = (PetscScalar*) malloc(m*sizeof(PetscScalar)); xxt_handle->info->x = x; xxt_handle->info->col_vals = col_vals; xxt_handle->info->col_sz = col_sz; xxt_handle->info->col_indices = col_indices; xxt_handle->info->stages = stages; xxt_handle->info->nsolves = 0; xxt_handle->info->tot_solve_time = 0.0; free(segs); free(u); free(v); free(uu); free(z); free(w); return(0); }
PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi) { #if defined(SLEPC_MISSING_LAPACK_TREXC) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable"); #else PetscErrorCode ierr; PetscScalar re; PetscInt i,j,pos,result; PetscBLASInt ifst,ilst,info,n,ld; PetscScalar *T = ds->mat[DS_MAT_A]; PetscScalar *Q = ds->mat[DS_MAT_Q]; #if !defined(PETSC_USE_COMPLEX) PetscScalar *work,im; #endif PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr); work = ds->work; #endif /* selection sort */ for (i=ds->l;i<n-1;i++) { re = wr[i]; #if !defined(PETSC_USE_COMPLEX) im = wi[i]; #endif pos = 0; j=i+1; /* j points to the next eigenvalue */ #if !defined(PETSC_USE_COMPLEX) if (im != 0) j=i+2; #endif /* find minimum eigenvalue */ for (;j<n;j++) { #if !defined(PETSC_USE_COMPLEX) ierr = SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);CHKERRQ(ierr); #else ierr = SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);CHKERRQ(ierr); #endif if (result > 0) { re = wr[j]; #if !defined(PETSC_USE_COMPLEX) im = wi[j]; #endif pos = j; } #if !defined(PETSC_USE_COMPLEX) if (wi[j] != 0) j++; #endif } if (pos) { /* interchange blocks */ ierr = PetscBLASIntCast(pos+1,&ifst);CHKERRQ(ierr); ierr = PetscBLASIntCast(i+1,&ilst);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info)); #else PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info)); #endif if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREXC %d",info); /* recover original eigenvalues from T matrix */ for (j=i;j<n;j++) { wr[j] = T[j+j*ld]; #if !defined(PETSC_USE_COMPLEX) if (j<n-1 && T[j+1+j*ld] != 0.0) { /* complex conjugate eigenvalue */ wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld])); wr[j+1] = wr[j]; wi[j+1] = -wi[j]; j++; } else { wi[j] = 0.0; } #endif } } #if !defined(PETSC_USE_COMPLEX) if (wi[i] != 0) i++; #endif } PetscFunctionReturn(0); #endif }
/*@ KSPComputeEigenvaluesExplicitly - Computes all of the eigenvalues of the preconditioned operator using LAPACK. Collective on KSP Input Parameter: + ksp - iterative context obtained from KSPCreate() - n - size of arrays r and c Output Parameters: + r - real part of computed eigenvalues - c - complex part of computed eigenvalues Notes: This approach is very slow but will generally provide accurate eigenvalue estimates. This routine explicitly forms a dense matrix representing the preconditioned operator, and thus will run only for relatively small problems, say n < 500. Many users may just want to use the monitoring routine KSPMonitorSingularValue() (which can be set with option -ksp_monitor_singular_value) to print the singular values at each iteration of the linear solve. The preconditoner operator, rhs vector, solution vectors should be set before this routine is called. i.e use KSPSetOperators(),KSPSolve() or KSPSetOperators() Level: advanced .keywords: KSP, compute, eigenvalues, explicitly .seealso: KSPComputeEigenvalues(), KSPMonitorSingularValue(), KSPComputeExtremeSingularValues(), KSPSetOperators(), KSPSolve() @*/ PetscErrorCode KSPComputeEigenvaluesExplicitly(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c) { Mat BA; PetscErrorCode ierr; PetscMPIInt size,rank; MPI_Comm comm = ((PetscObject)ksp)->comm; PetscScalar *array; Mat A; PetscInt m,row,nz,i,n,dummy; const PetscInt *cols; const PetscScalar *vals; PetscFunctionBegin; ierr = KSPComputeExplicitOperator(ksp,&BA);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MatGetSize(BA,&n,&n);CHKERRQ(ierr); if (size > 1) { /* assemble matrix on first processor */ ierr = MatCreate(((PetscObject)ksp)->comm,&A);CHKERRQ(ierr); if (!rank) { ierr = MatSetSizes(A,n,n,n,n);CHKERRQ(ierr); } else { ierr = MatSetSizes(A,0,0,n,n);CHKERRQ(ierr); } ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr); ierr = PetscLogObjectParent(BA,A);CHKERRQ(ierr); ierr = MatGetOwnershipRange(BA,&row,&dummy);CHKERRQ(ierr); ierr = MatGetLocalSize(BA,&m,&dummy);CHKERRQ(ierr); for (i=0; i<m; i++) { ierr = MatGetRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr); ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); ierr = MatRestoreRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr); row++; } ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr); } else { ierr = MatDenseGetArray(BA,&array);CHKERRQ(ierr); } #if defined(PETSC_HAVE_ESSL) /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */ if (!rank) { PetscScalar sdummy,*cwork; PetscReal *work,*realpart; PetscBLASInt clen,idummy,lwork,bn,zero = 0; PetscInt *perm; #if !defined(PETSC_USE_COMPLEX) clen = n; #else clen = 2*n; #endif ierr = PetscMalloc(clen*sizeof(PetscScalar),&cwork);CHKERRQ(ierr); idummy = -1; /* unused */ bn = PetscBLASIntCast(n); lwork = 5*n; ierr = PetscMalloc(lwork*sizeof(PetscReal),&work);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscReal),&realpart);CHKERRQ(ierr); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); LAPACKgeev_(&zero,array,&bn,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork); ierr = PetscFPTrapPop();CHKERRQ(ierr); ierr = PetscFree(work);CHKERRQ(ierr); /* For now we stick with the convention of storing the real and imaginary components of evalues separately. But is this what we really want? */ ierr = PetscMalloc(n*sizeof(PetscInt),&perm);CHKERRQ(ierr); #if !defined(PETSC_USE_COMPLEX) for (i=0; i<n; i++) { realpart[i] = cwork[2*i]; perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = cwork[2*perm[i]]; c[i] = cwork[2*perm[i]+1]; } #else for (i=0; i<n; i++) { realpart[i] = PetscRealPart(cwork[i]); perm[i] = i; } ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(cwork[perm[i]]); c[i] = PetscImaginaryPart(cwork[perm[i]]); } #endif ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree(realpart);CHKERRQ(ierr); ierr = PetscFree(cwork);CHKERRQ(ierr); } #elif !defined(PETSC_USE_COMPLEX) if (!rank) { PetscScalar *work; PetscReal *realpart,*imagpart; PetscBLASInt idummy,lwork; PetscInt *perm; idummy = n; lwork = 5*n; ierr = PetscMalloc(2*n*sizeof(PetscReal),&realpart);CHKERRQ(ierr); imagpart = realpart + n; ierr = PetscMalloc(5*n*sizeof(PetscReal),&work);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_GEEV) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #else { PetscBLASInt lierr; PetscScalar sdummy; PetscBLASInt bn = PetscBLASIntCast(n); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); LAPACKgeev_("N","N",&bn,array,&bn,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt),&perm);CHKERRQ(ierr); for (i=0; i<n; i++) { perm[i] = i;} ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = realpart[perm[i]]; c[i] = imagpart[perm[i]]; } ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree(realpart);CHKERRQ(ierr); } #else if (!rank) { PetscScalar *work,*eigs; PetscReal *rwork; PetscBLASInt idummy,lwork; PetscInt *perm; idummy = n; lwork = 5*n; ierr = PetscMalloc(5*n*sizeof(PetscScalar),&work);CHKERRQ(ierr); ierr = PetscMalloc(2*n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscScalar),&eigs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_GEEV) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values."); #else { PetscBLASInt lierr; PetscScalar sdummy; PetscBLASInt nb = PetscBLASIntCast(n); ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); LAPACKgeev_("N","N",&nb,array,&nb,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,rwork,&lierr); if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr); ierr = PetscFPTrapPop();CHKERRQ(ierr); } #endif ierr = PetscFree(work);CHKERRQ(ierr); ierr = PetscFree(rwork);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt),&perm);CHKERRQ(ierr); for (i=0; i<n; i++) { perm[i] = i;} for (i=0; i<n; i++) { r[i] = PetscRealPart(eigs[i]);} ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr); for (i=0; i<n; i++) { r[i] = PetscRealPart(eigs[perm[i]]); c[i] = PetscImaginaryPart(eigs[perm[i]]); } ierr = PetscFree(perm);CHKERRQ(ierr); ierr = PetscFree(eigs);CHKERRQ(ierr); } #endif if (size > 1) { ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); } else { ierr = MatDenseRestoreArray(BA,&array);CHKERRQ(ierr); } ierr = MatDestroy(&BA);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi) { #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable"); #else PetscErrorCode ierr; PetscScalar *work,*tau; PetscInt i,j; PetscBLASInt ilo,lwork,info,n,ld; PetscScalar *A = ds->mat[DS_MAT_A]; PetscScalar *Q = ds->mat[DS_MAT_Q]; PetscFunctionBegin; #if !defined(PETSC_USE_COMPLEX) PetscValidPointer(wi,3); #endif ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->l+1,&ilo);CHKERRQ(ierr); ierr = DSAllocateWork_Private(ds,ld+ld*ld,0,0);CHKERRQ(ierr); tau = ds->work; work = ds->work+ld; lwork = ld*ld; /* initialize orthogonal matrix */ ierr = PetscMemzero(Q,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0;i<n;i++) Q[i+i*ld] = 1.0; if (n==1) { /* quick return */ wr[0] = A[0]; wi[0] = 0.0; PetscFunctionReturn(0); } /* reduce to upper Hessenberg form */ if (ds->state<DS_STATE_INTERMEDIATE) { PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info)); if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",info); for (j=0;j<n-1;j++) { for (i=j+2;i<n;i++) { Q[i+j*ld] = A[i+j*ld]; A[i+j*ld] = 0.0; } } PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info)); if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",info); } /* compute the (real) Schur form */ #if !defined(PETSC_USE_COMPLEX) PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info)); for (j=0;j<ds->l;j++) { if (j==n-1 || A[j+1+j*ld] == 0.0) { /* real eigenvalue */ wr[j] = A[j+j*ld]; wi[j] = 0.0; } else { /* complex eigenvalue */ wr[j] = A[j+j*ld]; wr[j+1] = A[j+j*ld]; wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld])); wi[j+1] = -wi[j]; j++; } } #else PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info)); if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0; #endif if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",info); PetscFunctionReturn(0); #endif }
static PetscErrorCode KSPSolve_BCGSL(KSP ksp) { KSP_BCGSL *bcgsl = (KSP_BCGSL*) ksp->data; PetscScalar alpha, beta, omega, sigma; PetscScalar rho0, rho1; PetscReal kappa0, kappaA, kappa1; PetscReal ghat; PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0; PetscBool bUpdateX; PetscInt maxit; PetscInt h, i, j, k, vi, ell; PetscBLASInt ldMZ,bierr; PetscScalar utb; PetscReal max_s, pinv_tol; PetscErrorCode ierr; PetscFunctionBegin; /* set up temporary vectors */ vi = 0; ell = bcgsl->ell; bcgsl->vB = ksp->work[vi]; vi++; bcgsl->vRt = ksp->work[vi]; vi++; bcgsl->vTm = ksp->work[vi]; vi++; bcgsl->vvR = ksp->work+vi; vi += ell+1; bcgsl->vvU = ksp->work+vi; vi += ell+1; bcgsl->vXr = ksp->work[vi]; vi++; ierr = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr); /* Prime the iterative solver */ ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr); rnmax_computed = zeta0; rnmax_true = zeta0; ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = zeta0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); PetscFunctionReturn(0); } ierr = VecSet(VVU[0],0.0);CHKERRQ(ierr); alpha = 0.; rho0 = omega = 1; if (bcgsl->delta>0.0) { ierr = VecCopy(VX, VXR);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); } else { ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr); } /* Life goes on */ ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr); zeta = zeta0; ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr); for (k=0; k<maxit; k += bcgsl->ell) { ksp->its = k; ksp->rnorm = zeta; ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); else if (ksp->reason) break; /* BiCG part */ rho0 = -omega*rho0; nrm0 = zeta; for (j=0; j<bcgsl->ell; j++) { /* rho1 <- r_j' * r_tilde */ ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr); if (rho1 == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } beta = alpha*(rho1/rho0); rho0 = rho1; for (i=0; i<=j; i++) { /* u_i <- r_i - beta*u_i */ ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr); } /* u_{j+1} <- inv(K)*A*u_j */ ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr); ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr); if (sigma == 0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG; PetscFunctionReturn(0); } alpha = rho1/sigma; /* x <- x + alpha*u_0 */ ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr); for (i=0; i<=j; i++) { /* r_i <- r_i - alpha*u_{i+1} */ ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr); } /* r_{j+1} <- inv(K)*A*r_j */ ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr); ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr); if (bcgsl->delta>0.0) { if (rnmax_computed<nrm0) rnmax_computed = nrm0; if (rnmax_true<nrm0) rnmax_true = nrm0; } /* NEW: check for early exit */ ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = k+j; ksp->rnorm = nrm0; ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr); if (ksp->reason < 0) PetscFunctionReturn(0); } } /* Polynomial part */ for (i = 0; i <= bcgsl->ell; ++i) { ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr); } /* Symmetrize MZa */ for (i = 0; i <= bcgsl->ell; ++i) { for (j = i+1; j <= bcgsl->ell; ++j) { MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]); } } /* Copy MZa to MZb */ ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr); if (!bcgsl->bConvex || bcgsl->ell==1) { PetscBLASInt ione = 1,bell; ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr); AY0c[0] = -1; if (bcgsl->pinv) { #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable."); #else # if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr)); # else PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr)); # endif #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } /* Apply pseudo-inverse */ max_s = bcgsl->s[0]; for (i=1; i<bell; i++) { if (bcgsl->s[i] > max_s) { max_s = bcgsl->s[i]; } } /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */ pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON; ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr); for (i=0; i<bell; i++) { if (bcgsl->s[i] >= pinv_tol) { utb=0.; for (j=0; j<bell; j++) { utb += MZb[1+j]*bcgsl->u[i*bell+j]; } for (j=0; j<bell; j++) { AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i]; } } } } else { #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); } } else { PetscBLASInt ione = 1; PetscScalar aone = 1.0, azero = 0.0; PetscBLASInt neqs; ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr); #if defined(PETSC_MISSING_LAPACK_POTRF) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable."); #else PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr)); #endif if (bierr!=0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr)); AY0c[0] = -1; AY0c[bcgsl->ell] = 0.; ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr); PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr)); AYlc[0] = 0.; AYlc[bcgsl->ell] = -1; PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione)); kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione)); /* round-off can cause negative kappa's */ if (kappa0<0) kappa0 = -kappa0; kappa0 = PetscSqrtReal(kappa0); kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione)); kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione)); if (kappa1<0) kappa1 = -kappa1; kappa1 = PetscSqrtReal(kappa1); if (kappa0!=0.0 && kappa1!=0.0) { if (kappaA<0.7*kappa0*kappa1) { ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1; } else { ghat = kappaA/(kappa1*kappa1); } for (i=0; i<=bcgsl->ell; i++) { AY0c[i] = AY0c[i] - ghat* AYlc[i]; } } } omega = AY0c[bcgsl->ell]; for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h]; if (omega==0.0) { ksp->reason = KSP_DIVERGED_BREAKDOWN; PetscFunctionReturn(0); } ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr); ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr); for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0; ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr); /* Accurate Update */ if (bcgsl->delta>0.0) { if (rnmax_computed<zeta) rnmax_computed = zeta; if (rnmax_true<zeta) rnmax_true = zeta; bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed); if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) { /* r0 <- b-inv(K)*A*X */ ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr); ierr = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr); rnmax_true = zeta; if (bUpdateX) { ierr = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr); ierr = VecSet(VX,0.0);CHKERRQ(ierr); ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr); rnmax_computed = zeta; } } } } if (bcgsl->delta>0.0) { ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr); } ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; PetscFunctionReturn(0); }
PetscErrorCode KSPAGMRESRoddec(KSP ksp, PetscInt nvec) { KSP_AGMRES *agmres = (KSP_AGMRES*) ksp->data; MPI_Comm comm; PetscScalar *Qloc = agmres->Qloc; PetscScalar *sgn = agmres->sgn; PetscScalar *tloc = agmres->tloc; PetscErrorCode ierr; PetscReal *wbufptr = agmres->wbufptr; PetscMPIInt rank = agmres->rank; PetscMPIInt First = agmres->First; PetscMPIInt Last = agmres->Last; PetscBLASInt pas,len,bnloc,bpos; PetscInt nloc,d, i, j, k; PetscInt pos; PetscReal c, s, rho, Ajj, val, tt, old; PetscScalar *col; MPI_Status status; PetscBLASInt N = MAXKSPSIZE + 1; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr); ierr = PetscLogEventBegin(KSP_AGMRESRoddec,ksp,0,0,0);CHKERRQ(ierr); ierr = PetscMemzero(agmres->Rloc, N*N*sizeof(PetscScalar));CHKERRQ(ierr); /* check input arguments */ if (nvec < 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "The number of input vectors shoud be positive"); ierr = VecGetLocalSize(VEC_V(0), &nloc);CHKERRQ(ierr); ierr = PetscBLASIntCast(nloc,&bnloc);CHKERRQ(ierr); if (nvec > nloc) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_WRONG, "In QR factorization, the number of local rows should be greater or equal to the number of columns"); pas = 1; /* Copy the vectors of the basis */ for (j = 0; j < nvec; j++) { ierr = VecGetArray(VEC_V(j), &col);CHKERRQ(ierr); PetscStackCallBLAS("BLAScopy",BLAScopy_(&bnloc, col, &pas, &Qloc[j*nloc], &pas)); ierr = VecRestoreArray(VEC_V(j), &col);CHKERRQ(ierr); } /* Each process performs a local QR on its own block */ for (j = 0; j < nvec; j++) { len = nloc - j; Ajj = Qloc[j*nloc+j]; rho = -PetscSign(Ajj) * BLASnrm2_(&len, &(Qloc[j*nloc+j]), &pas); if (rho == 0.0) tloc[j] = 0.0; else { tloc[j] = (Ajj - rho) / rho; len = len - 1; val = 1.0 / (Ajj - rho); PetscStackCallBLAS("BLASscal",BLASscal_(&len, &val, &(Qloc[j*nloc+j+1]), &pas)); Qloc[j*nloc+j] = 1.0; len = len + 1; for (k = j + 1; k < nvec; k++) { PetscStackCallBLAS("BLASdot",tt = tloc[j] * BLASdot_(&len, &(Qloc[j*nloc+j]), &pas, &(Qloc[k*nloc+j]), &pas)); PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&len, &tt, &(Qloc[j*nloc+j]), &pas, &(Qloc[k*nloc+j]), &pas)); } Qloc[j*nloc+j] = rho; } } /*annihilate undesirable Rloc, diagonal by diagonal*/ for (d = 0; d < nvec; d++) { len = nvec - d; if (rank == First) { PetscStackCallBLAS("BLAScopy",BLAScopy_(&len, &(Qloc[d*nloc+d]), &bnloc, &(wbufptr[d]), &pas)); ierr = MPI_Send(&(wbufptr[d]), len, MPIU_SCALAR, rank + 1, agmres->tag, comm);CHKERRQ(ierr); } else { ierr = MPI_Recv(&(wbufptr[d]), len, MPIU_SCALAR, rank - 1, agmres->tag, comm, &status);CHKERRQ(ierr); /*Elimination of Rloc(1,d)*/ c = wbufptr[d]; s = Qloc[d*nloc]; ierr = KSPAGMRESRoddecGivens(&c, &s, &rho, 1);CHKERRQ(ierr); /*Apply Givens Rotation*/ for (k = d; k < nvec; k++) { old = wbufptr[k]; wbufptr[k] = c * old - s * Qloc[k*nloc]; Qloc[k*nloc] = s * old + c * Qloc[k*nloc]; } Qloc[d*nloc] = rho; if (rank != Last) { ierr = MPI_Send(& (wbufptr[d]), len, MPIU_SCALAR, rank + 1, agmres->tag, comm);CHKERRQ(ierr); } /* zero-out the d-th diagonal of Rloc ...*/ for (j = d + 1; j < nvec; j++) { /* elimination of Rloc[i][j]*/ i = j - d; c = Qloc[j*nloc+i-1]; s = Qloc[j*nloc+i]; ierr = KSPAGMRESRoddecGivens(&c, &s, &rho, 1);CHKERRQ(ierr); for (k = j; k < nvec; k++) { old = Qloc[k*nloc+i-1]; Qloc[k*nloc+i-1] = c * old - s * Qloc[k*nloc+i]; Qloc[k*nloc+i] = s * old + c * Qloc[k*nloc+i]; } Qloc[j*nloc+i] = rho; } if (rank == Last) { PetscStackCallBLAS("BLAScopy",BLAScopy_(&len, &(wbufptr[d]), &pas, RLOC(d,d), &N)); for (k = d + 1; k < nvec; k++) *RLOC(k,d) = 0.0; } } } if (rank == Last) { for (d = 0; d < nvec; d++) { pos = nvec - d; ierr = PetscBLASIntCast(pos,&bpos);CHKERRQ(ierr); sgn[d] = PetscSign(*RLOC(d,d)); PetscStackCallBLAS("BLASscal",BLASscal_(&bpos, &(sgn[d]), RLOC(d,d), &N)); } } /*BroadCast Rloc to all other processes * NWD : should not be needed */ ierr = MPI_Bcast(agmres->Rloc,N*N,MPIU_SCALAR,Last,comm);CHKERRQ(ierr); ierr = PetscLogEventEnd(KSP_AGMRESRoddec,ksp,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }