Exemple #1
0
PetscInt main(PetscInt argc,char **args)
{
  Mat            A,A_dense,B;
  Vec            *evecs;
  PetscBool      flg,TestZHEEV=PETSC_TRUE,TestZHEEVX=PETSC_FALSE,TestZHEGV=PETSC_FALSE,TestZHEGVX=PETSC_FALSE;
  PetscErrorCode ierr;
  PetscBool      isSymmetric;
  PetscScalar    sigma,*arrayA,*arrayB,*evecs_array=NULL,*work;
  PetscReal      *evals,*rwork;
  PetscMPIInt    size;
  PetscInt       m,i,j,nevs,il,iu,cklvl=2;
  PetscReal      vl,vu,abstol=1.e-8;
  PetscBLASInt   *iwork,*ifail,lwork,lierr,bn;
  PetscReal      tols[2];
  PetscInt       nzeros[2],nz;
  PetscReal      ratio;
  PetscScalar    v,none = -1.0,sigma2,pfive = 0.5,*xa;
  PetscRandom    rctx;
  PetscReal      h2,sigma1 = 100.0;
  PetscInt       dim,Ii,J,Istart,Iend,n = 6,its,use_random,one=1;

  PetscInitialize(&argc,&args,(char*)0,help);
#if !defined(PETSC_USE_COMPLEX)
  SETERRQ(PETSC_COMM_WORLD,1,"This example requires complex numbers");
#endif
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!");

  ierr = PetscOptionsHasName(NULL, "-test_zheevx", &flg);CHKERRQ(ierr);
  if (flg) {
    TestZHEEV  = PETSC_FALSE;
    TestZHEEVX = PETSC_TRUE;
  }
  ierr = PetscOptionsHasName(NULL, "-test_zhegv", &flg);CHKERRQ(ierr);
  if (flg) {
    TestZHEEV = PETSC_FALSE;
    TestZHEGV = PETSC_TRUE;
  }
  ierr = PetscOptionsHasName(NULL, "-test_zhegvx", &flg);CHKERRQ(ierr);
  if (flg) {
    TestZHEEV  = PETSC_FALSE;
    TestZHEGVX = PETSC_TRUE;
  }

  ierr = PetscOptionsGetReal(NULL,"-sigma1",&sigma1,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr);
  dim  = n*n;

  ierr = MatCreate(PETSC_COMM_SELF,&A);CHKERRQ(ierr);
  ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,dim,dim);CHKERRQ(ierr);
  ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
  ierr = MatSetFromOptions(A);CHKERRQ(ierr);

  ierr = PetscOptionsHasName(NULL,"-norandom",&flg);CHKERRQ(ierr);
  if (flg) use_random = 0;
  else     use_random = 1;
  if (use_random) {
    ierr = PetscRandomCreate(PETSC_COMM_SELF,&rctx);CHKERRQ(ierr);
    ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr);
    ierr = PetscRandomSetInterval(rctx,0.0,PETSC_i);CHKERRQ(ierr);
  } else {
    sigma2 = 10.0*PETSC_i;
  }
  h2 = 1.0/((n+1)*(n+1));
  for (Ii=0; Ii<dim; Ii++) {
    v = -1.0; i = Ii/n; j = Ii - i*n;
    if (i>0) {
      J = Ii-n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);
    }
    if (i<n-1) {
      J = Ii+n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);
    }
    if (j>0) {
      J = Ii-1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);
    }
    if (j<n-1) {
      J = Ii+1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);
    }
    if (use_random) {ierr = PetscRandomGetValue(rctx,&sigma2);CHKERRQ(ierr);}
    v    = 4.0 - sigma1*h2;
    ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr);
  }
  /* make A complex Hermitian */
  v    = sigma2*h2;
  Ii   = 0; J = 1;
  ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);
  v    = -sigma2*h2;
  ierr = MatSetValues(A,1,&J,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr);
  if (use_random) {ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);}
  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  m    = n = dim;

  /* Check whether A is symmetric */
  ierr = PetscOptionsHasName(NULL, "-check_symmetry", &flg);CHKERRQ(ierr);
  if (flg) {
    Mat Trans;
    ierr = MatTranspose(A,MAT_INITIAL_MATRIX, &Trans);
    ierr = MatEqual(A, Trans, &isSymmetric);
    if (!isSymmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"A must be symmetric");
    ierr = MatDestroy(&Trans);CHKERRQ(ierr);
  }

  /* Convert aij matrix to MatSeqDense for LAPACK */
  ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
  if (flg) {
    ierr = MatDuplicate(A,MAT_COPY_VALUES,&A_dense);CHKERRQ(ierr);
  } else {
    ierr = MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&A_dense);CHKERRQ(ierr);
  }

  ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
  ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,dim,dim);CHKERRQ(ierr);
  ierr = MatSetType(B,MATSEQDENSE);CHKERRQ(ierr);
  ierr = MatSetFromOptions(B);CHKERRQ(ierr);
  v    = 1.0;
  for (Ii=0; Ii<dim; Ii++) {
    ierr = MatSetValues(B,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr);
  }

  /* Solve standard eigenvalue problem: A*x = lambda*x */
  /*===================================================*/
  ierr = PetscBLASIntCast(2*n,&lwork);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
  ierr = PetscMalloc(n*sizeof(PetscReal),&evals);CHKERRQ(ierr);
  ierr = PetscMalloc(lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
  ierr = MatDenseGetArray(A_dense,&arrayA);CHKERRQ(ierr);

  if (TestZHEEV) { /* test zheev() */
    printf(" LAPACKsyev: compute all %d eigensolutions...\n",m);
    ierr = PetscMalloc((3*n-2)*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
    LAPACKsyev_("V","U",&bn,arrayA,&bn,evals,work,&lwork,rwork,&lierr);
    ierr = PetscFree(rwork);CHKERRQ(ierr);

    evecs_array = arrayA;
    nevs        = m;
    il          =1; iu=m;
  }
  if (TestZHEEVX) {
    il   = 1;
    ierr = PetscBLASIntCast((0.2*m),&iu);CHKERRQ(ierr);
    printf(" LAPACKsyevx: compute %d to %d-th eigensolutions...\n",il,iu);
    ierr = PetscMalloc((m*n+1)*sizeof(PetscScalar),&evecs_array);CHKERRQ(ierr);
    ierr = PetscMalloc((7*n+1)*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
    ierr = PetscMalloc((5*n+1)*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr);
    ierr = PetscMalloc((n+1)*sizeof(PetscBLASInt),&ifail);CHKERRQ(ierr);

    /* in the case "I", vl and vu are not referenced */
    vl = 0.0; vu = 8.0;
    LAPACKsyevx_("V","I","U",&bn,arrayA,&bn,&vl,&vu,&il,&iu,&abstol,&nevs,evals,evecs_array,&n,work,&lwork,rwork,iwork,ifail,&lierr);
    ierr = PetscFree(iwork);CHKERRQ(ierr);
    ierr = PetscFree(ifail);CHKERRQ(ierr);
    ierr = PetscFree(rwork);CHKERRQ(ierr);
  }
  if (TestZHEGV) {
    printf(" LAPACKsygv: compute all %d eigensolutions...\n",m);
    ierr = PetscMalloc((3*n+1)*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
    ierr = MatDenseGetArray(B,&arrayB);CHKERRQ(ierr);
    LAPACKsygv_(&one,"V","U",&bn,arrayA,&bn,arrayB,&bn,evals,work,&lwork,rwork,&lierr);
    evecs_array = arrayA;
    nevs        = m;
    il          = 1; iu=m;
    ierr        = MatDenseRestoreArray(B,&arrayB);CHKERRQ(ierr);
    ierr        = PetscFree(rwork);CHKERRQ(ierr);
  }
  if (TestZHEGVX) {
    il   = 1;
    ierr = PetscBLASIntCast((0.2*m),&iu);CHKERRQ(ierr);
    printf(" LAPACKsygv: compute %d to %d-th eigensolutions...\n",il,iu);
    ierr  = PetscMalloc((m*n+1)*sizeof(PetscScalar),&evecs_array);CHKERRQ(ierr);
    ierr  = PetscMalloc((6*n+1)*sizeof(PetscBLASInt),&iwork);CHKERRQ(ierr);
    ifail = iwork + 5*n;
    ierr  = PetscMalloc((7*n+1)*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
    ierr  = MatDenseGetArray(B,&arrayB);CHKERRQ(ierr);
    vl    = 0.0; vu = 8.0;
    LAPACKsygvx_(&one,"V","I","U",&bn,arrayA,&bn,arrayB,&bn,&vl,&vu,&il,&iu,&abstol,&nevs,evals,evecs_array,&n,work,&lwork,rwork,iwork,ifail,&lierr);
    ierr = MatDenseRestoreArray(B,&arrayB);CHKERRQ(ierr);
    ierr = PetscFree(iwork);CHKERRQ(ierr);
    ierr = PetscFree(rwork);CHKERRQ(ierr);
  }
  ierr = MatDenseRestoreArray(A_dense,&arrayA);CHKERRQ(ierr);
  if (nevs <= 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED, "nev=%d, no eigensolution has found", nevs);

  /* View evals */
  ierr = PetscOptionsHasName(NULL, "-eig_view", &flg);CHKERRQ(ierr);
  if (flg) {
    printf(" %d evals: \n",nevs);
    for (i=0; i<nevs; i++) printf("%d  %G\n",i+il,evals[i]);
  }

  /* Check residuals and orthogonality */
  ierr = PetscMalloc((nevs+1)*sizeof(Vec),&evecs);CHKERRQ(ierr);
  for (i=0; i<nevs; i++) {
    ierr = VecCreate(PETSC_COMM_SELF,&evecs[i]);CHKERRQ(ierr);
    ierr = VecSetSizes(evecs[i],PETSC_DECIDE,n);CHKERRQ(ierr);
    ierr = VecSetFromOptions(evecs[i]);CHKERRQ(ierr);
    ierr = VecPlaceArray(evecs[i],evecs_array+i*n);CHKERRQ(ierr);
  }

  tols[0] = 1.e-8;  tols[1] = 1.e-8;
  ierr    = CkEigenSolutions(cklvl,A,il-1,iu-1,evals,evecs,tols);CHKERRQ(ierr);
  for (i=0; i<nevs; i++) { ierr = VecDestroy(&evecs[i]);CHKERRQ(ierr);}
  ierr = PetscFree(evecs);CHKERRQ(ierr);

  /* Free work space. */
  if (TestZHEEVX || TestZHEGVX) {
    ierr = PetscFree(evecs_array);CHKERRQ(ierr);
  }
  ierr = PetscFree(evals);CHKERRQ(ierr);
  ierr = PetscFree(work);CHKERRQ(ierr);
  ierr = MatDestroy(&A_dense);CHKERRQ(ierr);
  ierr = MatDestroy(&A);CHKERRQ(ierr);
  ierr = MatDestroy(&B);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return 0;
}
int getQ1(Mat *pA){
  int i,j;
  PetscInt m,n;
  PetscBLASInt M,N,K,lda,ldwork,info;
  PetscScalar *tau,*work; 
  PetscInt worksize;
  PetscErrorCode ierr;


  ierr = MatGetSize(*pA,&m,&n);CHKERRQ(ierr);
  PetscBLASIntCast(m,&M);
  PetscBLASIntCast(n,&N);
  
  worksize=m;
  PetscBLASIntCast(worksize,&ldwork);


  PetscMalloc1(m, &tau);//worksize,&work);
  PetscMalloc1(worksize,&work);

  K = N;     /*full rank*/   
  lda = M ; //N - row domain   M - col domain
 

  //ierr = PetscPrintf (PETSC_COMM_SELF,"L72\n");CHKERRQ(ierr);
  //ierr = MatView(*pA,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  //ierr = PetscPrintf (PETSC_COMM_SELF,"L74\n");CHKERRQ(ierr);
  PetscScalar *v;//[4]={0};
  PetscInt *Is; //[4]={0,1,2,3};
  PetscInt nC;// = 4;
  
  PetscReal arr[10*4];
  
  for(i=0; i<10; i++){
    ierr = MatGetRow(*pA,i,&nC,(const PetscInt **)&Is,(const PetscScalar **)&v); CHKERRQ(ierr);
    //ierr = PetscPrintf (PETSC_COMM_SELF,"get row %d %d\n", i, nC);CHKERRQ(ierr);
    for(j=0; j<nC; j++){
      arr[i+10*Is[j]]=v[j];
    }
    ierr = MatRestoreRow(*pA,i,&nC,(const PetscInt**)&Is,(const PetscScalar**)&v); CHKERRQ(ierr);
  }


  //ierr = PetscPrintf (PETSC_COMM_SELF,"new L72\n");CHKERRQ(ierr);
  //ierr = MatView(*pA,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  //ierr = PetscPrintf (PETSC_COMM_SELF,"new L74\n");CHKERRQ(ierr);
  //for(i=0;i<40; i++){
  //  ierr = PetscPrintf (PETSC_COMM_SELF,"arr[%d] = %f\n",i,arr[i]);CHKERRQ(ierr);
  //}

  /* Do QR */
  PetscFPTrapPush(PETSC_FP_TRAP_OFF);
  LAPACKgeqrf_(&M,&N,arr,&lda,tau,work,&ldwork,&info);
  PetscFPTrapPop();
  if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRF error");

  /*Extract an explicit representation of Q */
  //PetscMemcpy(Q,A,mstride*n*sizeof(PetscScalar));
  LAPACKungqr_(&M,&N,&K,arr,&lda,tau,work,&ldwork,&info);
  if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR/xUNGQR error");


  //ierr = PetscPrintf (PETSC_COMM_SELF,"\nWe store Q1 in arr:\n");CHKERRQ(ierr);
  //for(i=0;i<40; i++){
  //  ierr = PetscPrintf (PETSC_COMM_SELF,"arr[%d] = %f\n",i,arr[i]);CHKERRQ(ierr);
  //}
  
   
  for (i=0; i<10; i++) {
    for(j=0; j<4; j++) {
      ierr = MatSetValues(*pA,1,&i,1,&j,&arr[i+j*10],INSERT_VALUES);CHKERRQ(ierr);
   }
  }


  ierr = MatAssemblyBegin(*pA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(*pA,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  //ierr = PetscPrintf (PETSC_COMM_SELF,"\nThe Q1 we are going to return is\n");CHKERRQ(ierr);
  //ierr = MatView(*pA,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);

  //ierr = PetscFree(arr);CHKERRQ(ierr);
  return 0; 
}
Exemple #3
0
PetscErrorCode DSFunction_EXP_NHEP_PADE(DS ds)
{
#if defined(PETSC_MISSING_LAPACK_GESV) || defined(SLEPC_MISSING_LAPACK_LANGE)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/LANGE - Lapack routines are unavailable");
#else
  PetscErrorCode ierr;
  PetscBLASInt   n,ld,ld2,*ipiv,info,inc=1;
  PetscInt       j,k,odd;
  const PetscInt p=MAX_PADE;
  PetscReal      c[MAX_PADE+1],s;
  PetscScalar    scale,mone=-1.0,one=1.0,two=2.0,zero=0.0;
  PetscScalar    *A,*A2,*Q,*P,*W,*aux;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  ld2 = ld*ld;
  ierr = DSAllocateWork_Private(ds,0,ld,ld);CHKERRQ(ierr);
  ipiv = ds->iwork;
  if (!ds->mat[DS_MAT_W]) { ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); }
  if (!ds->mat[DS_MAT_Z]) { ierr = DSAllocateMat_Private(ds,DS_MAT_Z);CHKERRQ(ierr); }
  A  = ds->mat[DS_MAT_A];
  A2 = ds->mat[DS_MAT_Z];
  Q  = ds->mat[DS_MAT_Q];
  P  = ds->mat[DS_MAT_F];
  W  = ds->mat[DS_MAT_W];

  /* Pade' coefficients */
  c[0] = 1.0;
  for (k=1;k<=p;k++) {
    c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
  }

  /* Scaling */
  s = LAPACKlange_("I",&n,&n,A,&ld,ds->rwork);
  if (s>0.5) {
    s = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0)) + 2);
    scale = PetscPowReal(2.0,(-1)*s);
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&scale,A,&inc));
  }

  /* Horner evaluation */
  PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,A,&ld,A,&ld,&zero,A2,&ld));
  ierr = PetscMemzero(Q,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr);
  ierr = PetscMemzero(P,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr);
  for (j=0;j<n;j++) {
    Q[j+j*ld] = c[p];
    P[j+j*ld] = c[p-1];
  }

  odd = 1;
  for (k=p-1;k>0;k--) {
    if (odd==1) {
      PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld));
      aux = Q;
      Q = W;
      W = aux;
      for (j=0;j<n;j++)
        Q[j+j*ld] = Q[j+j*ld] + c[k-1];
    } else {
      PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld));
      aux = P;
      P = W;
      W = aux;
      for (j=0;j<n;j++)
        P[j+j*ld] = P[j+j*ld] + c[k-1];
    }
    odd = 1-odd;
  }
  if (odd==1) {
    PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A,&ld,&zero,W,&ld));
    aux = Q;
    Q = W;
    W = aux;
    PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
    PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
    for (j=0;j<n;j++)
      P[j+j*ld] = P[j+j*ld] + 1.0;
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc));
  } else {
    PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A,&ld,&zero,W,&ld));
    aux = P;
    P = W;
    W = aux;
    PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
    PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
    for (j=0;j<n;j++)
      P[j+j*ld] = P[j+j*ld] + 1.0;
  }

  for (k=1;k<=s;k++) {
    PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld));
    ierr = PetscMemcpy(P,W,ld2*sizeof(PetscScalar));CHKERRQ(ierr);
  }
  if (P!=ds->mat[DS_MAT_F]) {
    ierr = PetscMemcpy(ds->mat[DS_MAT_F],P,ld2*sizeof(PetscScalar));CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
#endif
}
Exemple #4
0
PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
{
#if defined(PETSC_HAVE_ESSL)
    KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
    PetscErrorCode ierr;
    PetscInt       n = gmres->it + 1,N = gmres->max_k + 1;
    PetscInt       i,*perm;
    PetscScalar    *R = gmres->Rsvd;
    PetscScalar    *cwork = R + N*N,sdummy;
    PetscReal      *work,*realpart = gmres->Dsvd ;
    PetscBLASInt   zero = 0,bn,bN,idummy,lwork;

    PetscFunctionBegin;
    bn = PetscBLASIntCast(n);
    bN = PetscBLASIntCast(N);
    idummy = -1;                  /* unused */
    lwork = PetscBLASIntCast(5*N);
    if (nmax < n) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
    *neig = n;

    if (!n) {
        PetscFunctionReturn(0);
    }
    /* copy R matrix to work space */
    ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));
    CHKERRQ(ierr);

    /* compute eigenvalues */

    /* for ESSL version need really cwork of length N (complex), 2N
       (real); already at least 5N of space has been allocated */

    ierr = PetscMalloc(lwork*sizeof(PetscReal),&work);
    CHKERRQ(ierr);
    ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);
    CHKERRQ(ierr);
    LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork);
    ierr = PetscFPTrapPop();
    CHKERRQ(ierr);
    ierr = PetscFree(work);
    CHKERRQ(ierr);

    /* For now we stick with the convention of storing the real and imaginary
       components of evalues separately.  But is this what we really want? */
    ierr = PetscMalloc(n*sizeof(PetscInt),&perm);
    CHKERRQ(ierr);

#if !defined(PETSC_USE_COMPLEX)
    for (i=0; i<n; i++) {
        realpart[i] = cwork[2*i];
        perm[i]     = i;
    }
    ierr = PetscSortRealWithPermutation(n,realpart,perm);
    CHKERRQ(ierr);
    for (i=0; i<n; i++) {
        r[i] = cwork[2*perm[i]];
        c[i] = cwork[2*perm[i]+1];
    }
#else
    for (i=0; i<n; i++) {
        realpart[i] = PetscRealPart(cwork[i]);
        perm[i]     = i;
    }
    ierr = PetscSortRealWithPermutation(n,realpart,perm);
    CHKERRQ(ierr);
    for (i=0; i<n; i++) {
        r[i] = PetscRealPart(cwork[perm[i]]);
        c[i] = PetscImaginaryPart(cwork[perm[i]]);
    }
#endif
    ierr = PetscFree(perm);
    CHKERRQ(ierr);
#elif defined(PETSC_MISSING_LAPACK_GEEV)
    PetscFunctionBegin;
    SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
#elif !defined(PETSC_USE_COMPLEX)
    KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
    PetscErrorCode ierr;
    PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
    PetscBLASInt   bn, bN, lwork, idummy, lierr;
    PetscScalar    *R = gmres->Rsvd,*work = R + N*N;
    PetscScalar    *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy;

    PetscFunctionBegin;
    bn = PetscBLASIntCast(n);
    bN = PetscBLASIntCast(N);
    lwork = PetscBLASIntCast(5*N);
    idummy = PetscBLASIntCast(N);
    if (nmax < n) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
    *neig = n;

    if (!n) {
        PetscFunctionReturn(0);
    }

    /* copy R matrix to work space */
    ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));
    CHKERRQ(ierr);

    /* compute eigenvalues */
    ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);
    CHKERRQ(ierr);
    LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr);
    if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
    ierr = PetscFPTrapPop();
    CHKERRQ(ierr);
    ierr = PetscMalloc(n*sizeof(PetscInt),&perm);
    CHKERRQ(ierr);
    for (i=0; i<n; i++) {
        perm[i] = i;
    }
    ierr = PetscSortRealWithPermutation(n,realpart,perm);
    CHKERRQ(ierr);
    for (i=0; i<n; i++) {
        r[i] = realpart[perm[i]];
        c[i] = imagpart[perm[i]];
    }
    ierr = PetscFree(perm);
    CHKERRQ(ierr);
#else
    KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
    PetscErrorCode ierr;
    PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
    PetscScalar    *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
    PetscBLASInt   bn,bN,lwork,idummy,lierr;

    PetscFunctionBegin;
    bn = PetscBLASIntCast(n);
    bN = PetscBLASIntCast(N);
    lwork = PetscBLASIntCast(5*N);
    idummy = PetscBLASIntCast(N);
    if (nmax < n) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
    *neig = n;

    if (!n) {
        PetscFunctionReturn(0);
    }
    /* copy R matrix to work space */
    ierr = PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));
    CHKERRQ(ierr);

    /* compute eigenvalues */
    ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);
    CHKERRQ(ierr);
    LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr);
    if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
    ierr = PetscFPTrapPop();
    CHKERRQ(ierr);
    ierr = PetscMalloc(n*sizeof(PetscInt),&perm);
    CHKERRQ(ierr);
    for (i=0; i<n; i++) {
        perm[i] = i;
    }
    for (i=0; i<n; i++) {
        r[i]    = PetscRealPart(eigs[i]);
    }
    ierr = PetscSortRealWithPermutation(n,r,perm);
    CHKERRQ(ierr);
    for (i=0; i<n; i++) {
        r[i] = PetscRealPart(eigs[perm[i]]);
        c[i] = PetscImaginaryPart(eigs[perm[i]]);
    }
    ierr = PetscFree(perm);
    CHKERRQ(ierr);
#endif
    PetscFunctionReturn(0);
}
Exemple #5
0
PetscErrorCode KSPAGMRESRodvec(KSP ksp, PetscInt nvec, PetscScalar *In, Vec Out)
{
  KSP_AGMRES     *agmres  = (KSP_AGMRES*) ksp->data;
  MPI_Comm       comm;
  PetscScalar    *Qloc    = agmres->Qloc;
  PetscScalar    *sgn     = agmres->sgn;
  PetscScalar    *tloc    = agmres->tloc;
  PetscMPIInt    rank     = agmres->rank;
  PetscMPIInt    First    = agmres->First, Last = agmres->Last;
  PetscMPIInt    Iright   = agmres->Iright, Ileft = agmres->Ileft;
  PetscScalar    *y, *zloc;
  PetscErrorCode ierr;
  PetscInt       nloc,d, len, i, j;
  PetscBLASInt   bnvec,pas,blen;
  PetscInt       dpt;
  PetscReal      c, s, rho, zp, zq, yd, tt;
  MPI_Status     status;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(nvec,&bnvec);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr);
  pas  = 1;
  ierr = VecGetLocalSize(VEC_V(0), &nloc);CHKERRQ(ierr);
  ierr = PetscMalloc1(nvec, &y);CHKERRQ(ierr);
  ierr = PetscMemcpy(y, In, nvec*sizeof(PetscScalar));CHKERRQ(ierr);
  ierr = VecGetArray(Out, &zloc);CHKERRQ(ierr);

  if (rank == Last) {
    for (i = 0; i < nvec; i++) y[i] = sgn[i] * y[i];
  }
  for (i = 0; i < nloc; i++) zloc[i] = 0.0;
  if (agmres->size == 1) PetscStackCallBLAS("BLAScopy",BLAScopy_(&bnvec, y, &pas, &(zloc[0]), &pas));
  else {
    for (d = nvec - 1; d >= 0; d--) {
      if (rank == First) {
        ierr = MPI_Recv(&(zloc[d]), 1, MPIU_SCALAR, Iright, agmres->tag, comm, &status);CHKERRQ(ierr);
      } else {
        for (j = nvec - 1; j >= d + 1; j--) {
          i         = j - d;
          ierr      = KSPAGMRESRoddecGivens(&c, &s, &(Qloc[j * nloc + i]), 0);CHKERRQ(ierr);
          zp        = zloc[i-1];
          zq        = zloc[i];
          zloc[i-1] =     c * zp + s * zq;
          zloc[i]   =     -s * zp + c * zq;
        }
        ierr = KSPAGMRESRoddecGivens(&c, &s, &(Qloc[d * nloc]), 0);CHKERRQ(ierr);
        if (rank == Last) {
          zp      = y[d];
          zq      = zloc[0];
          y[d]    =      c * zp + s * zq;
          zloc[0] =   -s * zp + c * zq;
          ierr    = MPI_Send(&(y[d]), 1, MPIU_SCALAR, Ileft, agmres->tag, comm);CHKERRQ(ierr);
        } else {
          ierr    = MPI_Recv(&yd, 1, MPIU_SCALAR, Iright, agmres->tag, comm, &status);CHKERRQ(ierr);
          zp      = yd;
          zq      = zloc[0];
          yd      =      c * zp + s * zq;
          zloc[0] =   -s * zp + c * zq;
          ierr    = MPI_Send(&yd, 1, MPIU_SCALAR, Ileft, agmres->tag, comm);CHKERRQ(ierr);
        }
      }
    }
  }
  for (j = nvec - 1; j >= 0; j--) {
    dpt = j * nloc + j;
    if (tloc[j] != 0.0) {
      len       = nloc - j;
      ierr      = PetscBLASIntCast(len,&blen);CHKERRQ(ierr); 
      rho       = Qloc[dpt];
      Qloc[dpt] = 1.0;
      tt        = tloc[j] * (BLASdot_(&blen, &(Qloc[dpt]), &pas, &(zloc[j]), &pas));
      PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blen, &tt, &(Qloc[dpt]), &pas, &(zloc[j]), &pas));
      Qloc[dpt] = rho;
    }
  }
  ierr = VecRestoreArray(Out, &zloc);CHKERRQ(ierr);
  ierr = PetscFree(y);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #6
0
PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
{
#if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
#else
  PetscErrorCode ierr;
  PetscInt       i,j;
  PetscBLASInt   *ipiv,info,n,ld,one=1,ncol;
  PetscScalar    *A,*B,*Q,*g=gin,*ghat;
  PetscScalar    done=1.0,dmone=-1.0,dzero=0.0;
  PetscReal      gnorm;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  A  = ds->mat[DS_MAT_A];

  if (!recover) {

    ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr);
    ipiv = ds->iwork;
    if (!g) {
      ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr);
      g = ds->work;
    }
    /* use workspace matrix W to factor A-tau*eye(n) */
    ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr);
    B = ds->mat[DS_MAT_W];
    ierr = PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);CHKERRQ(ierr);

    /* Vector g initialy stores b = beta*e_n^T */
    ierr = PetscMemzero(g,n*sizeof(PetscScalar));CHKERRQ(ierr);
    g[n-1] = beta;

    /* g = (A-tau*eye(n))'\b */
    for (i=0;i<n;i++)
      B[i+i*ld] -= tau;
    PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
    if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization");
    if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
    ierr = PetscLogFlops(2.0*n*n*n/3.0);CHKERRQ(ierr);
    PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
    if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
    ierr = PetscLogFlops(2.0*n*n-n);CHKERRQ(ierr);

    /* A = A + g*b' */
    for (i=0;i<n;i++)
      A[i+(n-1)*ld] += g[i]*beta;

  } else { /* recover */

    PetscValidPointer(g,6);
    ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr);
    ghat = ds->work;
    Q    = ds->mat[DS_MAT_Q];

    /* g^ = -Q(:,idx)'*g */
    ierr = PetscBLASIntCast(ds->l+ds->k,&ncol);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));

    /* A = A + g^*b' */
    for (i=0;i<ds->l+ds->k;i++)
      for (j=ds->l;j<ds->l+ds->k;j++)
        A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;

    /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
    PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
  }

  /* Compute gamma factor */
  if (gamma) {
    gnorm = 0.0;
    for (i=0;i<n;i++)
      gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
    *gamma = PetscSqrtReal(1.0+gnorm);
  }
  PetscFunctionReturn(0);
#endif
}
Exemple #7
0
PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
{
#if defined(SLEPC_MISSING_LAPACK_TREVC)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
#else
  PetscErrorCode ierr;
  PetscInt       i;
  PetscBLASInt   mm=1,mout,info,ld,n,inc = 1;
  PetscScalar    tmp,done=1.0,zero=0.0;
  PetscReal      norm;
  PetscBool      iscomplex = PETSC_FALSE;
  PetscBLASInt   *select;
  PetscScalar    *A = ds->mat[DS_MAT_A];
  PetscScalar    *Q = ds->mat[DS_MAT_Q];
  PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
  PetscScalar    *Y;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr);
  select = ds->iwork;
  for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;

  /* Compute k-th eigenvector Y of A */
  Y = X+(*k)*ld;
  select[*k] = (PetscBLASInt)PETSC_TRUE;
#if !defined(PETSC_USE_COMPLEX)
  if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
  mm = iscomplex? 2: 1;
  if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
  ierr = DSAllocateWork_Private(ds,3*ld,0,0);CHKERRQ(ierr);
  PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
#else
  ierr = DSAllocateWork_Private(ds,2*ld,ld,0);CHKERRQ(ierr);
  PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
#endif
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
  if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");

  /* accumulate and normalize eigenvectors */
  if (ds->state>=DS_STATE_CONDENSED) {
    ierr = PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));CHKERRQ(ierr);
    PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
#if !defined(PETSC_USE_COMPLEX)
    if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
#endif
    norm = BLASnrm2_(&n,Y,&inc);
#if !defined(PETSC_USE_COMPLEX)
    if (iscomplex) {
      tmp = BLASnrm2_(&n,Y+ld,&inc);
      norm = SlepcAbsEigenvalue(norm,tmp);
    }
#endif
    tmp = 1.0 / norm;
    PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
#if !defined(PETSC_USE_COMPLEX)
    if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
#endif
  }

  /* set output arguments */
  if (iscomplex) (*k)++;
  if (rnorm) {
    if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
    else *rnorm = PetscAbsScalar(Y[n-1]);
  }
  PetscFunctionReturn(0);
#endif
}
Exemple #8
0
static PetscErrorCode PCSetUp_SVD(PC pc)
{
#if defined(PETSC_MISSING_LAPACK_GESVD)
  SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates.");
#else
  PC_SVD         *jac = (PC_SVD*)pc->data;
  PetscErrorCode ierr;
  PetscScalar    *a,*u,*v,*d,*work;
  PetscBLASInt   nb,lwork;
  PetscInt       i,n;
  PetscMPIInt    size;

  PetscFunctionBegin;
  ierr = MatDestroy(&jac->A);CHKERRQ(ierr);
  ierr = MPI_Comm_size(((PetscObject)pc->pmat)->comm,&size);CHKERRQ(ierr);
  if (size > 1) {
    Mat      redmat;
    PetscInt M;
    ierr = MatGetSize(pc->pmat,&M,NULL);CHKERRQ(ierr);
    ierr = MatGetRedundantMatrix(pc->pmat,size,PETSC_COMM_SELF,M,MAT_INITIAL_MATRIX,&redmat);CHKERRQ(ierr);
    ierr = MatConvert(redmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr);
    ierr = MatDestroy(&redmat);CHKERRQ(ierr);
  } else {
    ierr = MatConvert(pc->pmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr);
  }
  if (!jac->diag) {    /* assume square matrices */
    ierr = MatGetVecs(jac->A,&jac->diag,&jac->work);CHKERRQ(ierr);
  }
  if (!jac->U) {
    ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->U);CHKERRQ(ierr);
    ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->Vt);CHKERRQ(ierr);
  }
  ierr  = MatGetSize(pc->pmat,&n,NULL);CHKERRQ(ierr);
  ierr  = PetscBLASIntCast(n,&nb);CHKERRQ(ierr);
  lwork = 5*nb;
  ierr  = PetscMalloc(lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
  ierr  = MatDenseGetArray(jac->A,&a);CHKERRQ(ierr);
  ierr  = MatDenseGetArray(jac->U,&u);CHKERRQ(ierr);
  ierr  = MatDenseGetArray(jac->Vt,&v);CHKERRQ(ierr);
  ierr  = VecGetArray(jac->diag,&d);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
  {
    PetscBLASInt lierr;
    ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
    PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&nb,&nb,a,&nb,d,u,&nb,v,&nb,work,&lwork,&lierr));
    if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"gesv() error %d",lierr);
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
  }
#else
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for complex");
#endif
  ierr = MatDenseRestoreArray(jac->A,&a);CHKERRQ(ierr);
  ierr = MatDenseRestoreArray(jac->U,&u);CHKERRQ(ierr);
  ierr = MatDenseRestoreArray(jac->Vt,&v);CHKERRQ(ierr);
  for (i=n-1; i>=0; i--) if (PetscRealPart(d[i]) > jac->zerosing) break;
  jac->nzero = n-1-i;
  if (jac->monitor) {
    ierr = PetscViewerASCIIAddTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(jac->monitor,"    SVD: condition number %14.12e, %D of %D singular values are (nearly) zero\n",(double)PetscRealPart(d[0]/d[n-1]),jac->nzero,n);CHKERRQ(ierr);
    if (n >= 10) {              /* print 5 smallest and 5 largest */
      ierr = PetscViewerASCIIPrintf(jac->monitor,"    SVD: smallest singular values: %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[n-1]),(double)PetscRealPart(d[n-2]),(double)PetscRealPart(d[n-3]),(double)PetscRealPart(d[n-4]),(double)PetscRealPart(d[n-5]));CHKERRQ(ierr);
      ierr = PetscViewerASCIIPrintf(jac->monitor,"    SVD: largest singular values : %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[4]),(double)PetscRealPart(d[3]),(double)PetscRealPart(d[2]),(double)PetscRealPart(d[1]),(double)PetscRealPart(d[0]));CHKERRQ(ierr);
    } else {                    /* print all singular values */
      char     buf[256],*p;
      size_t   left = sizeof(buf),used;
      PetscInt thisline;
      for (p=buf,i=n-1,thisline=1; i>=0; i--,thisline++) {
        ierr  = PetscSNPrintfCount(p,left," %14.12e",&used,(double)PetscRealPart(d[i]));CHKERRQ(ierr);
        left -= used;
        p    += used;
        if (thisline > 4 || i==0) {
          ierr     = PetscViewerASCIIPrintf(jac->monitor,"    SVD: singular values:%s\n",buf);CHKERRQ(ierr);
          p        = buf;
          thisline = 0;
        }
      }
    }
    ierr = PetscViewerASCIISubtractTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr);
  }
  ierr = PetscInfo2(pc,"Largest and smallest singular values %14.12e %14.12e\n",(double)PetscRealPart(d[0]),(double)PetscRealPart(d[n-1]));CHKERRQ(ierr);
  for (i=0; i<n-jac->nzero; i++) d[i] = 1.0/d[i];
  for (; i<n; i++) d[i] = 0.0;
  if (jac->essrank > 0) for (i=0; i<n-jac->nzero-jac->essrank; i++) d[i] = 0.0; /* Skip all but essrank eigenvalues */
  ierr = PetscInfo1(pc,"Number of zero or nearly singular values %D\n",jac->nzero);CHKERRQ(ierr);
  ierr = VecRestoreArray(jac->diag,&d);CHKERRQ(ierr);
#if defined(foo)
  {
    PetscViewer viewer;
    ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,"joe",FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
    ierr = MatView(jac->A,viewer);CHKERRQ(ierr);
    ierr = MatView(jac->U,viewer);CHKERRQ(ierr);
    ierr = MatView(jac->Vt,viewer);CHKERRQ(ierr);
    ierr = VecView(jac->diag,viewer);CHKERRQ(ierr);
    ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
  }
#endif
  ierr = PetscFree(work);CHKERRQ(ierr);
  PetscFunctionReturn(0);
#endif
}
Exemple #9
0
PetscErrorCode SNESNGMRESFormCombinedSolution_Private(SNES snes,PetscInt ivec,PetscInt l,Vec XM,Vec FM,PetscReal fMnorm,Vec X,Vec XA,Vec FA)
{
  SNES_NGMRES    *ngmres = (SNES_NGMRES*) snes->data;
  PetscInt       i,j;
  Vec            *Fdot      = ngmres->Fdot;
  Vec            *Xdot      = ngmres->Xdot;
  PetscScalar    *beta      = ngmres->beta;
  PetscScalar    *xi        = ngmres->xi;
  PetscScalar    alph_total = 0.;
  PetscErrorCode ierr;
  PetscReal      nu;
  Vec            Y = snes->work[2];
  PetscBool      changed_y,changed_w;

  PetscFunctionBegin;
  nu = fMnorm*fMnorm;

  /* construct the right hand side and xi factors */
  if (l > 0) {
    ierr = VecMDotBegin(FM,l,Fdot,xi);CHKERRQ(ierr);
    ierr = VecMDotBegin(Fdot[ivec],l,Fdot,beta);CHKERRQ(ierr);
    ierr = VecMDotEnd(FM,l,Fdot,xi);CHKERRQ(ierr);
    ierr = VecMDotEnd(Fdot[ivec],l,Fdot,beta);CHKERRQ(ierr);
    for (i = 0; i < l; i++) {
      Q(i,ivec) = beta[i];
      Q(ivec,i) = beta[i];
    }
  } else {
    Q(0,0) = ngmres->fnorms[ivec]*ngmres->fnorms[ivec];
  }

  for (i = 0; i < l; i++) beta[i] = nu - xi[i];

  /* construct h */
  for (j = 0; j < l; j++) {
    for (i = 0; i < l; i++) {
      H(i,j) = Q(i,j)-xi[i]-xi[j]+nu;
    }
  }
  if (l == 1) {
    /* simply set alpha[0] = beta[0] / H[0, 0] */
    if (H(0,0) != 0.) beta[0] = beta[0]/H(0,0);
    else beta[0] = 0.;
  } else {
#if defined(PETSC_MISSING_LAPACK_GELSS)
    SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"NGMRES with LS requires the LAPACK GELSS routine.");
#else
    ierr          = PetscBLASIntCast(l,&ngmres->m);CHKERRQ(ierr);
    ierr          = PetscBLASIntCast(l,&ngmres->n);CHKERRQ(ierr);
    ngmres->info  = 0;
    ngmres->rcond = -1.;
    ierr          = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
#if defined(PETSC_USE_COMPLEX)
    PetscStackCallBLAS("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,ngmres->rwork,&ngmres->info));
#else
    PetscStackCallBLAS("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,&ngmres->info));
#endif
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
    if (ngmres->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS");
    if (ngmres->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge");
#endif
  }
  for (i=0; i<l; i++) {
    if (PetscIsInfOrNanScalar(beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output");
  }
  alph_total = 0.;
  for (i = 0; i < l; i++) alph_total += beta[i];

  ierr = VecCopy(XM,XA);CHKERRQ(ierr);
  ierr = VecScale(XA,1.-alph_total);CHKERRQ(ierr);
  ierr = VecMAXPY(XA,l,beta,Xdot);CHKERRQ(ierr);
  /* check the validity of the step */
  ierr = VecCopy(XA,Y);CHKERRQ(ierr);
  ierr = VecAXPY(Y,-1.0,X);CHKERRQ(ierr);
  ierr = SNESLineSearchPostCheck(snes->linesearch,X,Y,XA,&changed_y,&changed_w);CHKERRQ(ierr);
  if (!ngmres->approxfunc) {
    if (snes->pc && snes->pcside == PC_LEFT) {
      ierr = SNESApplyNPC(snes,XA,NULL,FA);CHKERRQ(ierr);
    } else {
      ierr =SNESComputeFunction(snes,XA,FA);CHKERRQ(ierr);
    }
  } else {
    ierr = VecCopy(FM,FA);CHKERRQ(ierr);
    ierr = VecScale(FA,1.-alph_total);CHKERRQ(ierr);
    ierr = VecMAXPY(FA,l,beta,Fdot);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemple #10
0
static PetscErrorCode KSPAGMRESBuildSoln(KSP ksp,PetscInt it)
{
  KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
  PetscErrorCode ierr;
  PetscInt       max_k = agmres->max_k;       /* Size of the non-augmented Krylov basis */
  PetscInt       i, j;
  PetscInt       r = agmres->r;           /* current number of augmented eigenvectors */
  PetscBLASInt   KspSize;
  PetscBLASInt   lC;
  PetscBLASInt   N;
  PetscBLASInt   ldH = N + 1;
  PetscBLASInt   lwork;
  PetscBLASInt   info, nrhs = 1;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(KSPSIZE,&KspSize);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(4 * (KspSize+1),&lwork);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(KspSize+1,&lC);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(MAXKSPSIZE + 1,&N);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(N + 1,&ldH);CHKERRQ(ierr);
  /* Save a copy of the Hessenberg matrix */
  for (j = 0; j < N-1; j++) {
    for (i = 0; i < N; i++) {
      *HS(i,j) = *H(i,j);
    }
  }
  /* QR factorize the Hessenberg matrix */
#if defined(PETSC_MISSING_LAPACK_GEQRF)
  SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable.");
#else
  PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&lC, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->work, &lwork, &info));
  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGEQRF INFO=%d", info);
#endif
  /* Update the right hand side of the least square problem */
  ierr = PetscMemzero(agmres->nrs, N*sizeof(PetscScalar));CHKERRQ(ierr);

  agmres->nrs[0] = ksp->rnorm;
#if defined(PETSC_MISSING_LAPACK_ORMQR)
  SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable.");
#else
  PetscStackCallBLAS("LAPACKormqr",LAPACKormqr_("L", "T", &lC, &nrhs, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->nrs, &N, agmres->work, &lwork, &info));
  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XORMQR INFO=%d",info);
#endif
  ksp->rnorm = PetscAbsScalar(agmres->nrs[KspSize]);
  /* solve the least-square problem */
#if defined(PETSC_MISSING_LAPACK_TRTRS)
  SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRTRS - Lapack routine is unavailable.");
#else
  PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U", "N", "N", &KspSize, &nrhs, agmres->hh_origin, &ldH, agmres->nrs, &N, &info));
  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XTRTRS INFO=%d",info);
#endif
  /* Accumulate the correction to the solution of the preconditioned problem in VEC_TMP */
  ierr = VecZeroEntries(VEC_TMP);CHKERRQ(ierr);
  ierr = VecMAXPY(VEC_TMP, max_k, agmres->nrs, &VEC_V(0));CHKERRQ(ierr);
  if (!agmres->DeflPrecond) { ierr = VecMAXPY(VEC_TMP, r, &agmres->nrs[max_k], agmres->U);CHKERRQ(ierr); }

  if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
    ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr);
    ierr = VecCopy(VEC_TMP_MATOP, VEC_TMP);CHKERRQ(ierr);
  }
  ierr = KSPUnwindPreconditioner(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr);
  /* add the solution to the previous one */
  ierr = VecAXPY(ksp->vec_sol, 1.0, VEC_TMP);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
int main(int argc, char **args)
{
  Mat            A, L;
  AppCtx         ctx;
  PetscViewer    viewer;
  PetscErrorCode ierr;

  ierr = PetscInitialize(&argc, &args, (char *) 0, help);CHKERRQ(ierr);
  ierr = ProcessOptions(&ctx);CHKERRQ(ierr);
  /* Load matrix */
  ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, ctx.matFilename, FILE_MODE_READ, &viewer);CHKERRQ(ierr);
  ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr);
  ierr = MatLoad(A, viewer);CHKERRQ(ierr);
  ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
  /* Make graph Laplacian from matrix */
  ierr = MatLaplacian(A, 1.0e-12, &L);CHKERRQ(ierr);
  /* Check Laplacian */
  PetscReal norm;
  Vec       x, y;

  ierr = MatGetVecs(L, &x, NULL);CHKERRQ(ierr);
  ierr = VecDuplicate(x, &y);CHKERRQ(ierr);
  ierr = VecSet(x, 1.0);CHKERRQ(ierr);
  ierr = MatMult(L, x, y);CHKERRQ(ierr);
  ierr = VecNorm(y, NORM_INFINITY, &norm);CHKERRQ(ierr);
  if (norm > 1.0e-10) SETERRQ(PetscObjectComm((PetscObject) y), PETSC_ERR_PLIB, "Invalid graph Laplacian");
  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = VecDestroy(&y);CHKERRQ(ierr);
  /* Compute Fiedler vector, and perhaps more vectors */
  Mat          LD;
  PetscScalar *a, *realpart, *imagpart, *eigvec, *work, sdummy;
  PetscBLASInt bn, bN, lwork, lierr, idummy;
  PetscInt     n, i;

  ierr = MatConvert(L, MATDENSE, MAT_INITIAL_MATRIX, &LD);CHKERRQ(ierr);
  ierr = MatGetLocalSize(LD, &n, NULL);CHKERRQ(ierr);
  ierr = MatDenseGetArray(LD, &a);CHKERRQ(ierr);

  ierr = PetscBLASIntCast(n, &bn);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(n, &bN);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(5*n,&lwork);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(1,&idummy);CHKERRQ(ierr);
  ierr = PetscMalloc4(n,PetscScalar,&realpart,n,PetscScalar,&imagpart,n*n,PetscScalar,&eigvec,lwork,PetscScalar,&work);CHKERRQ(ierr);
  ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
  PetscStackCall("LAPACKgeev", LAPACKgeev_("N","V",&bn,a,&bN,realpart,imagpart,&sdummy,&idummy,eigvec,&bN,work,&lwork,&lierr));
  if (lierr) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine %d", (int) lierr);
  ierr = PetscFPTrapPop();CHKERRQ(ierr);
  PetscReal *r, *c;
  PetscInt  *perm;

  ierr = PetscMalloc3(n,PetscInt,&perm,n,PetscReal,&r,n,PetscReal,&c);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) perm[i] = i;
  ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) {
    r[i] = realpart[perm[i]];
    c[i] = imagpart[perm[i]];
  }
  for (i = 0; i < n; ++i) {
    realpart[i] = r[i];
    imagpart[i] = c[i];
  }
  /* Output spectrum */
  if (ctx.showSpectrum) {
    ierr = PetscPrintf(PETSC_COMM_SELF, "Spectrum\n");CHKERRQ(ierr);
    for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: Real %g Imag %g\n", i, realpart[i], imagpart[i]);CHKERRQ(ierr);}
  }
  /* Check lowest eigenvalue and eigenvector */
  PetscInt evInd = perm[0];

  if ((realpart[0] > 1.0e-12) || (imagpart[0] > 1.0e-12)) SETERRQ(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have lowest eigenvalue 0");
  for (i = 0; i < n; ++i) {
    if (fabs(eigvec[evInd*n+i] - eigvec[evInd*n+0]) > 1.0e-10) SETERRQ3(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have constant lowest eigenvector ev_%d %g != ev_0 %g", i, eigvec[evInd*n+i], eigvec[evInd*n+0]);
  }
  /* Output Fiedler vector */
  evInd = perm[1];
  if (ctx.showFiedler) {
    ierr = PetscPrintf(PETSC_COMM_SELF, "Fiedler vector, Re{ev} %g\n", realpart[1]);CHKERRQ(ierr);
    for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: %g\n", i, eigvec[evInd*n+i]);CHKERRQ(ierr);}
  }
  /* Construct Fiedler partition */
  IS        fIS, fIS2;
  PetscInt *fperm, *fperm2, pos, neg, posSize = 0;

  ierr = PetscMalloc(n * sizeof(PetscInt), &fperm);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) {
    if (eigvec[evInd*n+i] > 0.0) ++posSize;
  }

  ierr = PetscMalloc(n * sizeof(PetscInt), &fperm2);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) fperm[i] = i;
  ierr = PetscSortRealWithPermutation(n, &eigvec[evInd*n], fperm);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) fperm2[n-1-i] = fperm[i];

  for (i = 0, pos = 0, neg = posSize; i < n; ++i) {
    if (eigvec[evInd*n+i] > 0.0) fperm[pos++] = i;
    else                         fperm[neg++] = i;
  }

  ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm, PETSC_OWN_POINTER, &fIS);CHKERRQ(ierr);
  ierr = ISSetPermutation(fIS);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm2, PETSC_OWN_POINTER, &fIS2);CHKERRQ(ierr);
  ierr = ISSetPermutation(fIS2);CHKERRQ(ierr);

  ierr = PetscFree3(perm,r,c);CHKERRQ(ierr);
  ierr = PetscFree4(realpart,imagpart,eigvec,work);CHKERRQ(ierr);
  ierr = MatDenseRestoreArray(LD, &a);CHKERRQ(ierr);
  ierr = MatDestroy(&LD);CHKERRQ(ierr);
  ierr = MatDestroy(&L);CHKERRQ(ierr);
  /* Permute matrix */
  Mat AR, AR2;

  ierr = MatPermute(A, fIS, fIS, &AR);CHKERRQ(ierr);
  ierr = MatView(A,  PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(AR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = ISDestroy(&fIS);CHKERRQ(ierr);

  ierr = MatPermute(A, fIS2, fIS2, &AR2);CHKERRQ(ierr);
  ierr = MatView(AR2, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = ISDestroy(&fIS2);CHKERRQ(ierr);
  ierr = MatDestroy(&AR);CHKERRQ(ierr);
  AR   = AR2;
  /* Extract blocks and reorder */
  Mat               AP, AN, APR, ANR;
  IS                ispos, isneg, rpermpos, cpermpos, rpermneg, cpermneg;
  PetscInt          bw, bwr;

  ierr = ISCreateStride(PETSC_COMM_SELF, posSize, 0, 1, &ispos);CHKERRQ(ierr);
  ierr = ISCreateStride(PETSC_COMM_SELF, n - posSize, posSize, 1, &isneg);CHKERRQ(ierr);
  ierr = MatGetSubMatrix(AR, ispos, ispos, MAT_INITIAL_MATRIX, &AP);CHKERRQ(ierr);
  ierr = MatGetSubMatrix(AR, isneg, isneg, MAT_INITIAL_MATRIX, &AN);CHKERRQ(ierr);
  ierr = ISDestroy(&ispos);CHKERRQ(ierr);
  ierr = ISDestroy(&isneg);CHKERRQ(ierr);
  ierr = MatGetOrdering(AP, ctx.matOrdtype, &rpermpos, &cpermpos);CHKERRQ(ierr);
  ierr = MatGetOrdering(AN, ctx.matOrdtype, &rpermneg, &cpermneg);CHKERRQ(ierr);
  ierr = MatPermute(AP, rpermpos, cpermpos, &APR);CHKERRQ(ierr);
  ierr = MatComputeBandwidth(AP, 0.0, &bw);CHKERRQ(ierr);
  ierr = MatComputeBandwidth(APR, 0.0, &bwr);CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced positive bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr);
  ierr = MatPermute(AN, rpermneg, cpermneg, &ANR);CHKERRQ(ierr);
  ierr = MatComputeBandwidth(AN, 0.0, &bw);CHKERRQ(ierr);
  ierr = MatComputeBandwidth(ANR, 0.0, &bwr);CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced negative bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr);
  ierr = MatView(AP,  PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(APR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(AN,  PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(ANR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  /* Reorder original matrix */
  Mat             ARR;
  IS              rperm, cperm;
  PetscInt       *idx;
  const PetscInt *cidx;

  ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr);
  ierr = ISGetIndices(rpermpos, &cidx);CHKERRQ(ierr);
  for (i = 0; i < posSize; ++i) idx[i] = cidx[i];
  ierr = ISRestoreIndices(rpermpos, &cidx);CHKERRQ(ierr);
  ierr = ISGetIndices(rpermneg, &cidx);CHKERRQ(ierr);
  for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize;
  ierr = ISRestoreIndices(rpermneg, &cidx);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &rperm);CHKERRQ(ierr);
  ierr = ISSetPermutation(rperm);CHKERRQ(ierr);
  ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr);
  ierr = ISGetIndices(cpermpos, &cidx);CHKERRQ(ierr);
  for (i = 0; i < posSize; ++i) idx[i] = cidx[i];
  ierr = ISRestoreIndices(cpermpos, &cidx);CHKERRQ(ierr);
  ierr = ISGetIndices(cpermneg, &cidx);CHKERRQ(ierr);
  for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize;
  ierr = ISRestoreIndices(cpermneg, &cidx);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &cperm);CHKERRQ(ierr);
  ierr = ISSetPermutation(cperm);CHKERRQ(ierr);
  ierr = MatPermute(AR, rperm, cperm, &ARR);CHKERRQ(ierr);
  ierr = MatView(ARR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = ISDestroy(&rperm);CHKERRQ(ierr);
  ierr = ISDestroy(&cperm);CHKERRQ(ierr);
  ierr = ISDestroy(&rpermpos);CHKERRQ(ierr);
  ierr = ISDestroy(&cpermpos);CHKERRQ(ierr);
  ierr = ISDestroy(&rpermneg);CHKERRQ(ierr);
  ierr = ISDestroy(&cpermneg);CHKERRQ(ierr);
  ierr = MatDestroy(&AP);CHKERRQ(ierr);
  ierr = MatDestroy(&AN);CHKERRQ(ierr);
  ierr = MatDestroy(&APR);CHKERRQ(ierr);
  ierr = MatDestroy(&ANR);CHKERRQ(ierr);
  /* Compare bands */
  Mat B, BR;

  ierr = MatCreateSubMatrixBanded(A,   50, 0.95, &B);CHKERRQ(ierr);
  ierr = MatCreateSubMatrixBanded(ARR, 50, 0.95, &BR);CHKERRQ(ierr);
  ierr = MatView(B,  PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(BR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatDestroy(&B);CHKERRQ(ierr);
  ierr = MatDestroy(&BR);CHKERRQ(ierr);
  /* Cleanup */
  ierr = MatDestroy(&ARR);CHKERRQ(ierr);
  ierr = MatDestroy(&AR);CHKERRQ(ierr);
  ierr = MatDestroy(&A);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return 0;
}
Exemple #12
0
static PetscErrorCode gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals, PetscInt step)
{
  PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
  PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
  PetscInt *pw, *list, *size, **nodes;
  MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
  MPI_Status status;
  PetscBLASInt i1 = 1,dstep;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* strip and load s */
  msg_list =list         = gs->pair_list;
  msg_size =size         = gs->msg_sizes;
  msg_nodes=nodes        = gs->node_list;
  iptr=pw                = gs->pw_elm_list;  
  dptr1=dptr3            = gs->pw_vals;
  msg_ids_in  = ids_in   = gs->msg_ids_in;
  msg_ids_out = ids_out  = gs->msg_ids_out;
  dptr2                  = gs->out;
  in1=in2                = gs->in;

  /* post the receives */
  /*  msg_nodes=nodes; */
  do 
    {
      /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
         second one *list and do list++ afterwards */
      ierr = MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->gs_comm, msg_ids_in);CHKERRQ(ierr);
      list++;msg_ids_in++;
      in1 += *size++ *step;
    }
  while (*++msg_nodes);
  msg_nodes=nodes;

  /* load gs values into in out gs buffers */  
  while (*iptr >= 0)
    {
      rvec_copy(dptr3,in_vals + *iptr*step,step);
      dptr3+=step;
      iptr++;
    }

  /* load out buffers and post the sends */
  while ((iptr = *msg_nodes++))
    {
      dptr3 = dptr2;
      while (*iptr >= 0)
        {
          rvec_copy(dptr2,dptr1 + *iptr*step,step);
          dptr2+=step;
          iptr++;
        }
      ierr = MPI_Isend(dptr3, *msg_size *step, MPIU_SCALAR, *msg_list, MSGTAG1+my_id, gs->gs_comm, msg_ids_out);CHKERRQ(ierr);
      msg_size++; msg_list++;msg_ids_out++;
    }

  /* tree */
  if (gs->max_left_over)
    {gs_gop_vec_tree_plus(gs,in_vals,step);}

  /* process the received data */
  msg_nodes=nodes;
  while ((iptr = *nodes++)){
    PetscScalar d1 = 1.0;
      /* Should I check the return value of MPI_Wait() or status? */
      /* Can this loop be replaced by a call to MPI_Waitall()? */
      ierr = MPI_Wait(ids_in, &status);CHKERRQ(ierr);
      ids_in++;
      while (*iptr >= 0) {
	dstep = PetscBLASIntCast(step);
        BLASaxpy_(&dstep,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
	in2+=step;
	iptr++;
      }
  }

  /* replace vals */
  while (*pw >= 0)
    {
      rvec_copy(in_vals + *pw*step,dptr1,step);
      dptr1+=step;
      pw++;
    }

  /* clear isend message handles */
  /* This changed for clarity though it could be the same */
  while (*msg_nodes++)
    /* Should I check the return value of MPI_Wait() or status? */
    /* Can this loop be replaced by a call to MPI_Waitall()? */
    {ierr = MPI_Wait(ids_out, &status);CHKERRQ(ierr);ids_out++;}
    

  PetscFunctionReturn(0);
}
Exemple #13
0
static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
    KSP_BCGSL      *bcgsl = (KSP_BCGSL *) ksp->data;
    PetscScalar    alpha, beta, omega, sigma;
    PetscScalar    rho0, rho1;
    PetscReal      kappa0, kappaA, kappa1;
    PetscReal      ghat, epsilon, abstol;
    PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
    PetscTruth     bUpdateX;
    PetscTruth     bBombed = PETSC_FALSE;

    PetscInt       maxit;
    PetscInt       h, i, j, k, vi, ell;
    PetscBLASInt   ldMZ,bierr;

    PetscErrorCode ierr;

    PetscFunctionBegin;
    if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL");
    if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL");
    if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL");

    /* set up temporary vectors */
    vi = 0;
    ell = bcgsl->ell;
    bcgsl->vB    = ksp->work[vi];
    vi++;
    bcgsl->vRt   = ksp->work[vi];
    vi++;
    bcgsl->vTm   = ksp->work[vi];
    vi++;
    bcgsl->vvR   = ksp->work+vi;
    vi += ell+1;
    bcgsl->vvU   = ksp->work+vi;
    vi += ell+1;
    bcgsl->vXr   = ksp->work[vi];
    vi++;
    ldMZ = PetscBLASIntCast(ell+1);

    /* Prime the iterative solver */
    ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
    CHKERRQ(ierr);
    ierr = VecNorm(VVR[0], NORM_2, &zeta0);
    CHKERRQ(ierr);
    rnmax_computed = zeta0;
    rnmax_true = zeta0;

    ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
    CHKERRQ(ierr);
    if (ksp->reason) {
        ierr = PetscObjectTakeAccess(ksp);
        CHKERRQ(ierr);
        ksp->its   = 0;
        ksp->rnorm = zeta0;
        ierr = PetscObjectGrantAccess(ksp);
        CHKERRQ(ierr);
        PetscFunctionReturn(0);
    }

    ierr = VecSet(VVU[0],0.0);
    CHKERRQ(ierr);
    alpha = 0.;
    rho0 = omega = 1;

    if (bcgsl->delta>0.0) {
        ierr = VecCopy(VX, VXR);
        CHKERRQ(ierr);
        ierr = VecSet(VX,0.0);
        CHKERRQ(ierr);
        ierr = VecCopy(VVR[0], VB);
        CHKERRQ(ierr);
    } else {
        ierr = VecCopy(ksp->vec_rhs, VB);
        CHKERRQ(ierr);
    }

    /* Life goes on */
    ierr = VecCopy(VVR[0], VRT);
    CHKERRQ(ierr);
    zeta = zeta0;

    ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);
    CHKERRQ(ierr);

    for (k=0; k<maxit; k += bcgsl->ell) {
        ksp->its   = k;
        ksp->rnorm = zeta;

        KSPLogResidualHistory(ksp, zeta);
        KSPMonitor(ksp, ksp->its, zeta);

        ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
        CHKERRQ(ierr);
        if (ksp->reason) break;

        /* BiCG part */
        rho0 = -omega*rho0;
        nrm0 = zeta;
        for (j=0; j<bcgsl->ell; j++) {
            /* rho1 <- r_j' * r_tilde */
            ierr = VecDot(VVR[j], VRT, &rho1);
            CHKERRQ(ierr);
            if (rho1 == 0.0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
                bBombed = PETSC_TRUE;
                break;
            }
            beta = alpha*(rho1/rho0);
            rho0 = rho1;
            for (i=0; i<=j; i++) {
                /* u_i <- r_i - beta*u_i */
                ierr = VecAYPX(VVU[i], -beta, VVR[i]);
                CHKERRQ(ierr);
            }
            /* u_{j+1} <- inv(K)*A*u_j */
            ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
            CHKERRQ(ierr);

            ierr = VecDot(VVU[j+1], VRT, &sigma);
            CHKERRQ(ierr);
            if (sigma == 0.0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
                bBombed = PETSC_TRUE;
                break;
            }
            alpha = rho1/sigma;

            /* x <- x + alpha*u_0 */
            ierr = VecAXPY(VX, alpha, VVU[0]);
            CHKERRQ(ierr);

            for (i=0; i<=j; i++) {
                /* r_i <- r_i - alpha*u_{i+1} */
                ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);
                CHKERRQ(ierr);
            }

            /* r_{j+1} <- inv(K)*A*r_j */
            ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
            CHKERRQ(ierr);

            ierr = VecNorm(VVR[0], NORM_2, &nrm0);
            CHKERRQ(ierr);
            if (bcgsl->delta>0.0) {
                if (rnmax_computed<nrm0) rnmax_computed = nrm0;
                if (rnmax_true<nrm0) rnmax_true = nrm0;
            }

            /* NEW: check for early exit */
            ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
            CHKERRQ(ierr);
            if (ksp->reason) {
                ierr = PetscObjectTakeAccess(ksp);
                CHKERRQ(ierr);
                ksp->its   = k+j;
                ksp->rnorm = nrm0;
                ierr = PetscObjectGrantAccess(ksp);
                CHKERRQ(ierr);
                break;
            }
        }

        if (bBombed==PETSC_TRUE) break;

        /* Polynomial part */
        for(i = 0; i <= bcgsl->ell; ++i) {
            ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
            CHKERRQ(ierr);
        }
        /* Symmetrize MZa */
        for(i = 0; i <= bcgsl->ell; ++i) {
            for(j = i+1; j <= bcgsl->ell; ++j) {
                MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
            }
        }
        /* Copy MZa to MZb */
        ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));
        CHKERRQ(ierr);

        if (!bcgsl->bConvex || bcgsl->ell==1) {
            PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);

            AY0c[0] = -1;
            LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
            if (ierr!=0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN;
                bBombed = PETSC_TRUE;
                break;
            }
            ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
        } else {
            PetscBLASInt ione = 1;
            PetscScalar aone = 1.0, azero = 0.0;
            PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);

            LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
            if (ierr!=0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN;
                bBombed = PETSC_TRUE;
                break;
            }
            ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
            AY0c[0] = -1;
            AY0c[bcgsl->ell] = 0.;

            ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

            AYlc[0] = 0.;
            AYlc[bcgsl->ell] = -1;

            BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

            kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

            /* round-off can cause negative kappa's */
            if (kappa0<0) kappa0 = -kappa0;
            kappa0 = sqrt(kappa0);

            kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

            BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

            kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

            if (kappa1<0) kappa1 = -kappa1;
            kappa1 = sqrt(kappa1);

            if (kappa0!=0.0 && kappa1!=0.0) {
                if (kappaA<0.7*kappa0*kappa1) {
                    ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
                } else {
                    ghat = kappaA/(kappa1*kappa1);
                }
                for (i=0; i<=bcgsl->ell; i++) {
                    AY0c[i] = AY0c[i] - ghat* AYlc[i];
                }
            }
        }

        omega = AY0c[bcgsl->ell];
        for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
            omega = AY0c[h];
        }
        if (omega==0.0) {
            ksp->reason = KSP_DIVERGED_BREAKDOWN;
            break;
        }


        ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);
        CHKERRQ(ierr);
        for (i=1; i<=bcgsl->ell; i++) {
            AY0c[i] *= -1.0;
        }
        ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);
        CHKERRQ(ierr);
        ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);
        CHKERRQ(ierr);
        for (i=1; i<=bcgsl->ell; i++) {
            AY0c[i] *= -1.0;
        }
        ierr = VecNorm(VVR[0], NORM_2, &zeta);
        CHKERRQ(ierr);

        /* Accurate Update */
        if (bcgsl->delta>0.0) {
            if (rnmax_computed<zeta) rnmax_computed = zeta;
            if (rnmax_true<zeta) rnmax_true = zeta;

            bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
            if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
                /* r0 <- b-inv(K)*A*X */
                ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
                CHKERRQ(ierr);
                ierr = VecAYPX(VVR[0], -1.0, VB);
                CHKERRQ(ierr);
                rnmax_true = zeta;

                if (bUpdateX) {
                    ierr = VecAXPY(VXR,1.0,VX);
                    CHKERRQ(ierr);
                    ierr = VecSet(VX,0.0);
                    CHKERRQ(ierr);
                    ierr = VecCopy(VVR[0], VB);
                    CHKERRQ(ierr);
                    rnmax_computed = zeta;
                }
            }
        }
    }
    if (bcgsl->delta>0.0) {
        ierr = VecAXPY(VX,1.0,VXR);
        CHKERRQ(ierr);
    }

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
    CHKERRQ(ierr);
    if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
    PetscFunctionReturn(0);
}
Exemple #14
0
/**************************************xxt.c***********************************/
static PetscInt xxt_generate(xxt_ADT xxt_handle)
{
  PetscInt       i,j,k,idex;
  PetscInt       dim, col;
  PetscScalar    *u, *uu, *v, *z, *w, alpha, alpha_w;
  PetscInt       *segs;
  PetscInt       op[] = {GL_ADD,0};
  PetscInt       off, len;
  PetscScalar    *x_ptr;
  PetscInt       *iptr, flag;
  PetscInt       start =0, end, work;
  PetscInt       op2[] = {GL_MIN,0};
  PCTFS_gs_ADT   PCTFS_gs_handle;
  PetscInt       *nsep, *lnsep, *fo;
  PetscInt       a_n            =xxt_handle->mvi->n;
  PetscInt       a_m            =xxt_handle->mvi->m;
  PetscInt       *a_local2global=xxt_handle->mvi->local2global;
  PetscInt       level;
  PetscInt       xxt_nnz=0, xxt_max_nnz=0;
  PetscInt       n, m;
  PetscInt       *col_sz, *col_indices, *stages;
  PetscScalar    **col_vals, *x;
  PetscInt       n_global;
  PetscInt       xxt_zero_nnz  =0;
  PetscInt       xxt_zero_nnz_0=0;
  PetscBLASInt   i1            = 1,dlen;
  PetscScalar    dm1           = -1.0;
  PetscErrorCode ierr;

  n               = xxt_handle->mvi->n;
  nsep            = xxt_handle->info->nsep;
  lnsep           = xxt_handle->info->lnsep;
  fo              = xxt_handle->info->fo;
  end             = lnsep[0];
  level           = xxt_handle->level;
  PCTFS_gs_handle = xxt_handle->mvi->PCTFS_gs_handle;

  /* is there a null space? */
  /* LATER add in ability to detect null space by checking alpha */
  for (i=0, j=0; i<=level; i++) j+=nsep[i];

  m = j-xxt_handle->ns;
  if (m!=j) {
    ierr = PetscPrintf(PETSC_COMM_WORLD,"xxt_generate() :: null space exists %D %D %D\n",m,j,xxt_handle->ns);CHKERRQ(ierr);
  }

  /* get and initialize storage for x local         */
  /* note that x local is nxm and stored by columns */
  col_sz      = (PetscInt*) malloc(m*sizeof(PetscInt));
  col_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt));
  col_vals    = (PetscScalar**) malloc(m*sizeof(PetscScalar*));
  for (i=j=0; i<m; i++, j+=2) {
    col_indices[j]=col_indices[j+1]=col_sz[i]=-1;
    col_vals[i]   = NULL;
  }
  col_indices[j]=-1;

  /* size of separators for each sub-hc working from bottom of tree to top */
  /* this looks like nsep[]=segments */
  stages = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
  segs   = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
  PCTFS_ivec_zero(stages,level+1);
  PCTFS_ivec_copy(segs,nsep,level+1);
  for (i=0; i<level; i++) segs[i+1] += segs[i];
  stages[0] = segs[0];

  /* temporary vectors  */
  u  = (PetscScalar*) malloc(n*sizeof(PetscScalar));
  z  = (PetscScalar*) malloc(n*sizeof(PetscScalar));
  v  = (PetscScalar*) malloc(a_m*sizeof(PetscScalar));
  uu = (PetscScalar*) malloc(m*sizeof(PetscScalar));
  w  = (PetscScalar*) malloc(m*sizeof(PetscScalar));

  /* extra nnz due to replication of vertices across separators */
  for (i=1, j=0; i<=level; i++) j+=nsep[i];

  /* storage for sparse x values */
  n_global    = xxt_handle->info->n_global;
  xxt_max_nnz = (PetscInt)(2.5*PetscPowReal(1.0*n_global,1.6667) + j*n/2)/PCTFS_num_nodes;
  x           = (PetscScalar*) malloc(xxt_max_nnz*sizeof(PetscScalar));
  xxt_nnz     = 0;

  /* LATER - can embed next sep to fire in gs */
  /* time to make the donuts - generate X factor */
  for (dim=i=j=0; i<m; i++) {
    /* time to move to the next level? */
    while (i==segs[dim]) {
      if (dim==level) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"dim about to exceed level\n");
      stages[dim++]=i;
      end         +=lnsep[dim];
    }
    stages[dim]=i;

    /* which column are we firing? */
    /* i.e. set v_l */
    /* use new seps and do global min across hc to determine which one to fire */
    (start<end) ? (col=fo[start]) : (col=INT_MAX);
    PCTFS_giop_hc(&col,&work,1,op2,dim);

    /* shouldn't need this */
    if (col==INT_MAX) {
      ierr = PetscInfo(0,"hey ... col==INT_MAX??\n");CHKERRQ(ierr);
      continue;
    }

    /* do I own it? I should */
    PCTFS_rvec_zero(v,a_m);
    if (col==fo[start]) {
      start++;
      idex=PCTFS_ivec_linear_search(col, a_local2global, a_n);
      if (idex!=-1) {
        v[idex] = 1.0; j++;
      } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"NOT FOUND!\n");
    } else {
      idex=PCTFS_ivec_linear_search(col, a_local2global, a_m);
      if (idex!=-1) v[idex] = 1.0;
    }

    /* perform u = A.v_l */
    PCTFS_rvec_zero(u,n);
    do_matvec(xxt_handle->mvi,v,u);

    /* uu =  X^T.u_l (local portion) */
    /* technically only need to zero out first i entries */
    /* later turn this into an XXT_solve call ? */
    PCTFS_rvec_zero(uu,m);
    x_ptr=x;
    iptr = col_indices;
    for (k=0; k<i; k++) {
      off   = *iptr++;
      len   = *iptr++;
      ierr  = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
      PetscStackCallBLAS("BLASdot",uu[k] = BLASdot_(&dlen,u+off,&i1,x_ptr,&i1));
      x_ptr+=len;
    }


    /* uu = X^T.u_l (comm portion) */
    PCTFS_ssgl_radd  (uu, w, dim, stages);

    /* z = X.uu */
    PCTFS_rvec_zero(z,n);
    x_ptr=x;
    iptr = col_indices;
    for (k=0; k<i; k++) {
      off  = *iptr++;
      len  = *iptr++;
      ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
      PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,&uu[k],x_ptr,&i1,z+off,&i1));
      x_ptr+=len;
    }

    /* compute v_l = v_l - z */
    PCTFS_rvec_zero(v+a_n,a_m-a_n);
    ierr = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,&dm1,z,&i1,v,&i1));

    /* compute u_l = A.v_l */
    if (a_n!=a_m) PCTFS_gs_gop_hc(PCTFS_gs_handle,v,"+\0",dim);
    PCTFS_rvec_zero(u,n);
    do_matvec(xxt_handle->mvi,v,u);

    /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - local portion */
    ierr  = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASdot",alpha = BLASdot_(&dlen,u,&i1,v,&i1));
    /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - comm portion */
    PCTFS_grop_hc(&alpha, &alpha_w, 1, op, dim);

    alpha = (PetscScalar) PetscSqrtReal((PetscReal)alpha);

    /* check for small alpha                             */
    /* LATER use this to detect and determine null space */
    if (PetscAbsScalar(alpha)<1.0e-14) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"bad alpha! %g\n",alpha);

    /* compute v_l = v_l/sqrt(alpha) */
    PCTFS_rvec_scale(v,1.0/alpha,n);

    /* add newly generated column, v_l, to X */
    flag = 1;
    off=len=0;
    for (k=0; k<n; k++) {
      if (v[k]!=0.0) {
        len=k;
        if (flag) { off=k; flag=0; }
      }
    }

    len -= (off-1);

    if (len>0) {
      if ((xxt_nnz+len)>xxt_max_nnz) {
        ierr         = PetscInfo(0,"increasing space for X by 2x!\n");CHKERRQ(ierr);
        xxt_max_nnz *= 2;
        x_ptr        = (PetscScalar*) malloc(xxt_max_nnz*sizeof(PetscScalar));
        PCTFS_rvec_copy(x_ptr,x,xxt_nnz);
        free(x);
        x     = x_ptr;
        x_ptr+=xxt_nnz;
      }
      xxt_nnz += len;
      PCTFS_rvec_copy(x_ptr,v+off,len);

      /* keep track of number of zeros */
      if (dim) {
        for (k=0; k<len; k++) {
          if (x_ptr[k]==0.0) xxt_zero_nnz++;
        }
      } else {
        for (k=0; k<len; k++) {
          if (x_ptr[k]==0.0) xxt_zero_nnz_0++;
        }
      }
      col_indices[2*i] = off;
      col_sz[i] = col_indices[2*i+1] = len;
      col_vals[i] = x_ptr;
    }
    else {
      col_indices[2*i] = 0;
      col_sz[i]        = col_indices[2*i+1] = 0;
      col_vals[i]      = x_ptr;
    }
  }

  /* close off stages for execution phase */
  while (dim!=level) {
    stages[dim++] = i;
    ierr          = PetscInfo2(0,"disconnected!!! dim(%D)!=level(%D)\n",dim,level);CHKERRQ(ierr);
  }
  stages[dim]=i;

  xxt_handle->info->n              = xxt_handle->mvi->n;
  xxt_handle->info->m              = m;
  xxt_handle->info->nnz            = xxt_nnz;
  xxt_handle->info->max_nnz        = xxt_max_nnz;
  xxt_handle->info->msg_buf_sz     = stages[level]-stages[0];
  xxt_handle->info->solve_uu       = (PetscScalar*) malloc(m*sizeof(PetscScalar));
  xxt_handle->info->solve_w        = (PetscScalar*) malloc(m*sizeof(PetscScalar));
  xxt_handle->info->x              = x;
  xxt_handle->info->col_vals       = col_vals;
  xxt_handle->info->col_sz         = col_sz;
  xxt_handle->info->col_indices    = col_indices;
  xxt_handle->info->stages         = stages;
  xxt_handle->info->nsolves        = 0;
  xxt_handle->info->tot_solve_time = 0.0;

  free(segs);
  free(u);
  free(v);
  free(uu);
  free(z);
  free(w);

  return(0);
}
Exemple #15
0
PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
{
#if defined(SLEPC_MISSING_LAPACK_TREXC)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
#else
  PetscErrorCode ierr;
  PetscScalar    re;
  PetscInt       i,j,pos,result;
  PetscBLASInt   ifst,ilst,info,n,ld;
  PetscScalar    *T = ds->mat[DS_MAT_A];
  PetscScalar    *Q = ds->mat[DS_MAT_Q];
#if !defined(PETSC_USE_COMPLEX)
  PetscScalar    *work,im;
#endif

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
  ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr);
  work = ds->work;
#endif
  /* selection sort */
  for (i=ds->l;i<n-1;i++) {
    re = wr[i];
#if !defined(PETSC_USE_COMPLEX)
    im = wi[i];
#endif
    pos = 0;
    j=i+1; /* j points to the next eigenvalue */
#if !defined(PETSC_USE_COMPLEX)
    if (im != 0) j=i+2;
#endif
    /* find minimum eigenvalue */
    for (;j<n;j++) {
#if !defined(PETSC_USE_COMPLEX)
      ierr = SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);CHKERRQ(ierr);
#else
      ierr = SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);CHKERRQ(ierr);
#endif
      if (result > 0) {
        re = wr[j];
#if !defined(PETSC_USE_COMPLEX)
        im = wi[j];
#endif
        pos = j;
      }
#if !defined(PETSC_USE_COMPLEX)
      if (wi[j] != 0) j++;
#endif
    }
    if (pos) {
      /* interchange blocks */
      ierr = PetscBLASIntCast(pos+1,&ifst);CHKERRQ(ierr);
      ierr = PetscBLASIntCast(i+1,&ilst);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
      PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
#else
      PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
#endif
      if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREXC %d",info);
      /* recover original eigenvalues from T matrix */
      for (j=i;j<n;j++) {
        wr[j] = T[j+j*ld];
#if !defined(PETSC_USE_COMPLEX)
        if (j<n-1 && T[j+1+j*ld] != 0.0) {
          /* complex conjugate eigenvalue */
          wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
                  PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
          wr[j+1] = wr[j];
          wi[j+1] = -wi[j];
          j++;
        } else {
          wi[j] = 0.0;
        }
#endif
      }
    }
#if !defined(PETSC_USE_COMPLEX)
    if (wi[i] != 0) i++;
#endif
  }
  PetscFunctionReturn(0);
#endif
}
Exemple #16
0
/*@
   KSPComputeEigenvaluesExplicitly - Computes all of the eigenvalues of the
   preconditioned operator using LAPACK.

   Collective on KSP

   Input Parameter:
+  ksp - iterative context obtained from KSPCreate()
-  n - size of arrays r and c

   Output Parameters:
+  r - real part of computed eigenvalues
-  c - complex part of computed eigenvalues

   Notes:
   This approach is very slow but will generally provide accurate eigenvalue
   estimates.  This routine explicitly forms a dense matrix representing
   the preconditioned operator, and thus will run only for relatively small
   problems, say n < 500.

   Many users may just want to use the monitoring routine
   KSPMonitorSingularValue() (which can be set with option -ksp_monitor_singular_value)
   to print the singular values at each iteration of the linear solve.

   The preconditoner operator, rhs vector, solution vectors should be
   set before this routine is called. i.e use KSPSetOperators(),KSPSolve() or
   KSPSetOperators()

   Level: advanced

.keywords: KSP, compute, eigenvalues, explicitly

.seealso: KSPComputeEigenvalues(), KSPMonitorSingularValue(), KSPComputeExtremeSingularValues(), KSPSetOperators(), KSPSolve()
@*/
PetscErrorCode  KSPComputeEigenvaluesExplicitly(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c)
{
  Mat                BA;
  PetscErrorCode     ierr;
  PetscMPIInt        size,rank;
  MPI_Comm           comm = ((PetscObject)ksp)->comm;
  PetscScalar        *array;
  Mat                A;
  PetscInt           m,row,nz,i,n,dummy;
  const PetscInt     *cols;
  const PetscScalar  *vals;

  PetscFunctionBegin;
  ierr = KSPComputeExplicitOperator(ksp,&BA);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);

  ierr = MatGetSize(BA,&n,&n);CHKERRQ(ierr);
  if (size > 1) { /* assemble matrix on first processor */
    ierr = MatCreate(((PetscObject)ksp)->comm,&A);CHKERRQ(ierr);
    if (!rank) {
      ierr = MatSetSizes(A,n,n,n,n);CHKERRQ(ierr);
    } else {
      ierr = MatSetSizes(A,0,0,n,n);CHKERRQ(ierr);
    }
    ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
    ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr);
    ierr = PetscLogObjectParent(BA,A);CHKERRQ(ierr);

    ierr = MatGetOwnershipRange(BA,&row,&dummy);CHKERRQ(ierr);
    ierr = MatGetLocalSize(BA,&m,&dummy);CHKERRQ(ierr);
    for (i=0; i<m; i++) {
      ierr = MatGetRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr);
      ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
      ierr = MatRestoreRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr);
      row++;
    }

    ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr);
  } else {
    ierr = MatDenseGetArray(BA,&array);CHKERRQ(ierr);
  }

#if defined(PETSC_HAVE_ESSL)
  /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
  if (!rank) {
    PetscScalar  sdummy,*cwork;
    PetscReal    *work,*realpart;
    PetscBLASInt clen,idummy,lwork,bn,zero = 0;
    PetscInt *perm;

#if !defined(PETSC_USE_COMPLEX)
    clen = n;
#else
    clen = 2*n;
#endif
    ierr   = PetscMalloc(clen*sizeof(PetscScalar),&cwork);CHKERRQ(ierr);
    idummy = -1;                /* unused */
    bn = PetscBLASIntCast(n);
    lwork  = 5*n;
    ierr   = PetscMalloc(lwork*sizeof(PetscReal),&work);CHKERRQ(ierr);
    ierr   = PetscMalloc(n*sizeof(PetscReal),&realpart);CHKERRQ(ierr);
    ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
    LAPACKgeev_(&zero,array,&bn,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork);
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
    ierr = PetscFree(work);CHKERRQ(ierr);

    /* For now we stick with the convention of storing the real and imaginary
       components of evalues separately.  But is this what we really want? */
    ierr = PetscMalloc(n*sizeof(PetscInt),&perm);CHKERRQ(ierr);

#if !defined(PETSC_USE_COMPLEX)
    for (i=0; i<n; i++) {
      realpart[i] = cwork[2*i];
      perm[i]     = i;
    }
    ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      r[i] = cwork[2*perm[i]];
      c[i] = cwork[2*perm[i]+1];
    }
#else
    for (i=0; i<n; i++) {
      realpart[i] = PetscRealPart(cwork[i]);
      perm[i]     = i;
    }
    ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      r[i] = PetscRealPart(cwork[perm[i]]);
      c[i] = PetscImaginaryPart(cwork[perm[i]]);
    }
#endif
    ierr = PetscFree(perm);CHKERRQ(ierr);
    ierr = PetscFree(realpart);CHKERRQ(ierr);
    ierr = PetscFree(cwork);CHKERRQ(ierr);
  }
#elif !defined(PETSC_USE_COMPLEX)
  if (!rank) {
    PetscScalar  *work;
    PetscReal    *realpart,*imagpart;
    PetscBLASInt idummy,lwork;
    PetscInt     *perm;

    idummy   = n;
    lwork    = 5*n;
    ierr     = PetscMalloc(2*n*sizeof(PetscReal),&realpart);CHKERRQ(ierr);
    imagpart = realpart + n;
    ierr     = PetscMalloc(5*n*sizeof(PetscReal),&work);CHKERRQ(ierr);
#if defined(PETSC_MISSING_LAPACK_GEEV)
    SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
#else
    {
      PetscBLASInt lierr;
      PetscScalar sdummy;
      PetscBLASInt bn = PetscBLASIntCast(n);
      ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
      LAPACKgeev_("N","N",&bn,array,&bn,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr);
      if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
      ierr = PetscFPTrapPop();CHKERRQ(ierr);
    }
#endif
    ierr = PetscFree(work);CHKERRQ(ierr);
    ierr = PetscMalloc(n*sizeof(PetscInt),&perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) { perm[i] = i;}
    ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      r[i] = realpart[perm[i]];
      c[i] = imagpart[perm[i]];
    }
    ierr = PetscFree(perm);CHKERRQ(ierr);
    ierr = PetscFree(realpart);CHKERRQ(ierr);
  }
#else
  if (!rank) {
    PetscScalar  *work,*eigs;
    PetscReal    *rwork;
    PetscBLASInt idummy,lwork;
    PetscInt     *perm;

    idummy   = n;
    lwork    = 5*n;
    ierr = PetscMalloc(5*n*sizeof(PetscScalar),&work);CHKERRQ(ierr);
    ierr = PetscMalloc(2*n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
    ierr = PetscMalloc(n*sizeof(PetscScalar),&eigs);CHKERRQ(ierr);
#if defined(PETSC_MISSING_LAPACK_GEEV)
    SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
#else
    {
      PetscBLASInt lierr;
      PetscScalar  sdummy;
      PetscBLASInt nb = PetscBLASIntCast(n);
      ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
      LAPACKgeev_("N","N",&nb,array,&nb,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,rwork,&lierr);
      if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
      ierr = PetscFPTrapPop();CHKERRQ(ierr);
    }
#endif
    ierr = PetscFree(work);CHKERRQ(ierr);
    ierr = PetscFree(rwork);CHKERRQ(ierr);
    ierr = PetscMalloc(n*sizeof(PetscInt),&perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) { perm[i] = i;}
    for (i=0; i<n; i++) { r[i]    = PetscRealPart(eigs[i]);}
    ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      r[i] = PetscRealPart(eigs[perm[i]]);
      c[i] = PetscImaginaryPart(eigs[perm[i]]);
    }
    ierr = PetscFree(perm);CHKERRQ(ierr);
    ierr = PetscFree(eigs);CHKERRQ(ierr);
  }
#endif
  if (size > 1) {
    ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr);
    ierr = MatDestroy(&A);CHKERRQ(ierr);
  } else {
    ierr = MatDenseRestoreArray(BA,&array);CHKERRQ(ierr);
  }
  ierr = MatDestroy(&BA);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #17
0
PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
{
#if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
#else
  PetscErrorCode ierr;
  PetscScalar    *work,*tau;
  PetscInt       i,j;
  PetscBLASInt   ilo,lwork,info,n,ld;
  PetscScalar    *A = ds->mat[DS_MAT_A];
  PetscScalar    *Q = ds->mat[DS_MAT_Q];

  PetscFunctionBegin;
#if !defined(PETSC_USE_COMPLEX)
  PetscValidPointer(wi,3);
#endif
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->l+1,&ilo);CHKERRQ(ierr);
  ierr = DSAllocateWork_Private(ds,ld+ld*ld,0,0);CHKERRQ(ierr);
  tau  = ds->work;
  work = ds->work+ld;
  lwork = ld*ld;

  /* initialize orthogonal matrix */
  ierr = PetscMemzero(Q,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr);
  for (i=0;i<n;i++)
    Q[i+i*ld] = 1.0;
  if (n==1) { /* quick return */
    wr[0] = A[0];
    wi[0] = 0.0;
    PetscFunctionReturn(0);
  }

  /* reduce to upper Hessenberg form */
  if (ds->state<DS_STATE_INTERMEDIATE) {
    PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",info);
    for (j=0;j<n-1;j++) {
      for (i=j+2;i<n;i++) {
        Q[i+j*ld] = A[i+j*ld];
        A[i+j*ld] = 0.0;
      }
    }
    PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",info);
  }

  /* compute the (real) Schur form */
#if !defined(PETSC_USE_COMPLEX)
  PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
  for (j=0;j<ds->l;j++) {
    if (j==n-1 || A[j+1+j*ld] == 0.0) {
      /* real eigenvalue */
      wr[j] = A[j+j*ld];
      wi[j] = 0.0;
    } else {
      /* complex eigenvalue */
      wr[j] = A[j+j*ld];
      wr[j+1] = A[j+j*ld];
      wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
              PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
      wi[j+1] = -wi[j];
      j++;
    }
  }
#else
  PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
  if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
#endif
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",info);
  PetscFunctionReturn(0);
#endif
}
Exemple #18
0
static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
  KSP_BCGSL      *bcgsl = (KSP_BCGSL*) ksp->data;
  PetscScalar    alpha, beta, omega, sigma;
  PetscScalar    rho0, rho1;
  PetscReal      kappa0, kappaA, kappa1;
  PetscReal      ghat;
  PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
  PetscBool      bUpdateX;
  PetscInt       maxit;
  PetscInt       h, i, j, k, vi, ell;
  PetscBLASInt   ldMZ,bierr;
  PetscScalar    utb;
  PetscReal      max_s, pinv_tol;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* set up temporary vectors */
  vi         = 0;
  ell        = bcgsl->ell;
  bcgsl->vB  = ksp->work[vi]; vi++;
  bcgsl->vRt = ksp->work[vi]; vi++;
  bcgsl->vTm = ksp->work[vi]; vi++;
  bcgsl->vvR = ksp->work+vi; vi += ell+1;
  bcgsl->vvU = ksp->work+vi; vi += ell+1;
  bcgsl->vXr = ksp->work[vi]; vi++;
  ierr       = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr);

  /* Prime the iterative solver */
  ierr           = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr);
  ierr           = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr);
  rnmax_computed = zeta0;
  rnmax_true     = zeta0;

  ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) {
    ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its   = 0;
    ksp->rnorm = zeta0;
    ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr  = VecSet(VVU[0],0.0);CHKERRQ(ierr);
  alpha = 0.;
  rho0  = omega = 1;

  if (bcgsl->delta>0.0) {
    ierr = VecCopy(VX, VXR);CHKERRQ(ierr);
    ierr = VecSet(VX,0.0);CHKERRQ(ierr);
    ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr);
  } else {
    ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr);
  }

  /* Life goes on */
  ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr);
  zeta = zeta0;

  ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr);

  for (k=0; k<maxit; k += bcgsl->ell) {
    ksp->its   = k;
    ksp->rnorm = zeta;

    ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr);

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason < 0) PetscFunctionReturn(0);
    else if (ksp->reason) break;

    /* BiCG part */
    rho0 = -omega*rho0;
    nrm0 = zeta;
    for (j=0; j<bcgsl->ell; j++) {
      /* rho1 <- r_j' * r_tilde */
      ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr);
      if (rho1 == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      beta = alpha*(rho1/rho0);
      rho0 = rho1;
      for (i=0; i<=j; i++) {
        /* u_i <- r_i - beta*u_i */
        ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr);
      }
      /* u_{j+1} <- inv(K)*A*u_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr);

      ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr);
      if (sigma == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      alpha = rho1/sigma;

      /* x <- x + alpha*u_0 */
      ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr);

      for (i=0; i<=j; i++) {
        /* r_i <- r_i - alpha*u_{i+1} */
        ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr);
      }

      /* r_{j+1} <- inv(K)*A*r_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr);

      ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr);
      if (bcgsl->delta>0.0) {
        if (rnmax_computed<nrm0) rnmax_computed = nrm0;
        if (rnmax_true<nrm0) rnmax_true = nrm0;
      }

      /* NEW: check for early exit */
      ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
      if (ksp->reason) {
        ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);

        ksp->its   = k+j;
        ksp->rnorm = nrm0;

        ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
        if (ksp->reason < 0) PetscFunctionReturn(0);
      }
    }

    /* Polynomial part */
    for (i = 0; i <= bcgsl->ell; ++i) {
      ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr);
    }
    /* Symmetrize MZa */
    for (i = 0; i <= bcgsl->ell; ++i) {
      for (j = i+1; j <= bcgsl->ell; ++j) {
        MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
      }
    }
    /* Copy MZa to MZb */
    ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr);

    if (!bcgsl->bConvex || bcgsl->ell==1) {
      PetscBLASInt ione = 1,bell;
      ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr);

      AY0c[0] = -1;
      if (bcgsl->pinv) {
#if defined(PETSC_MISSING_LAPACK_GESVD)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable.");
#else
#  if defined(PETSC_USE_COMPLEX)
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr));
#  else
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr));
#  endif
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        /* Apply pseudo-inverse */
        max_s = bcgsl->s[0];
        for (i=1; i<bell; i++) {
          if (bcgsl->s[i] > max_s) {
            max_s = bcgsl->s[i];
          }
        }
        /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
        pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON;
        ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr);
        for (i=0; i<bell; i++) {
          if (bcgsl->s[i] >= pinv_tol) {
            utb=0.;
            for (j=0; j<bell; j++) {
              utb += MZb[1+j]*bcgsl->u[i*bell+j];
            }

            for (j=0; j<bell; j++) {
              AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i];
            }
          }
        }
      } else {
#if defined(PETSC_MISSING_LAPACK_POTRF)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
        PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr);
        PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      }
    } else {
      PetscBLASInt ione = 1;
      PetscScalar  aone = 1.0, azero = 0.0;
      PetscBLASInt neqs;
      ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr);

#if defined(PETSC_MISSING_LAPACK_POTRF)
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
      PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
      if (bierr!=0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN;
        PetscFunctionReturn(0);
      }
      ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      AY0c[0]          = -1;
      AY0c[bcgsl->ell] = 0.;

      ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));

      AYlc[0]          = 0.;
      AYlc[bcgsl->ell] = -1;

      PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));

      kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));

      /* round-off can cause negative kappa's */
      if (kappa0<0) kappa0 = -kappa0;
      kappa0 = PetscSqrtReal(kappa0);

      kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));

      kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      if (kappa1<0) kappa1 = -kappa1;
      kappa1 = PetscSqrtReal(kappa1);

      if (kappa0!=0.0 && kappa1!=0.0) {
        if (kappaA<0.7*kappa0*kappa1) {
          ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
        } else {
          ghat = kappaA/(kappa1*kappa1);
        }
        for (i=0; i<=bcgsl->ell; i++) {
          AY0c[i] = AY0c[i] - ghat* AYlc[i];
        }
      }
    }

    omega = AY0c[bcgsl->ell];
    for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h];
    if (omega==0.0) {
      ksp->reason = KSP_DIVERGED_BREAKDOWN;
      PetscFunctionReturn(0);
    }


    ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr);
    ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr);

    /* Accurate Update */
    if (bcgsl->delta>0.0) {
      if (rnmax_computed<zeta) rnmax_computed = zeta;
      if (rnmax_true<zeta) rnmax_true = zeta;

      bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
      if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
        /* r0 <- b-inv(K)*A*X */
        ierr       = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr);
        ierr       = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr);
        rnmax_true = zeta;

        if (bUpdateX) {
          ierr           = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr);
          ierr           = VecSet(VX,0.0);CHKERRQ(ierr);
          ierr           = VecCopy(VVR[0], VB);CHKERRQ(ierr);
          rnmax_computed = zeta;
        }
      }
    }
  }
  if (bcgsl->delta>0.0) {
    ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
  }

  ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
  PetscFunctionReturn(0);
}
Exemple #19
0
PetscErrorCode KSPAGMRESRoddec(KSP ksp, PetscInt nvec)
{
  KSP_AGMRES     *agmres = (KSP_AGMRES*) ksp->data;
  MPI_Comm       comm;
  PetscScalar    *Qloc   = agmres->Qloc;
  PetscScalar    *sgn    = agmres->sgn;
  PetscScalar    *tloc   = agmres->tloc;
  PetscErrorCode ierr;
  PetscReal      *wbufptr = agmres->wbufptr;
  PetscMPIInt    rank     = agmres->rank;
  PetscMPIInt    First    = agmres->First;
  PetscMPIInt    Last     = agmres->Last;
  PetscBLASInt   pas,len,bnloc,bpos;
  PetscInt       nloc,d, i, j, k;
  PetscInt       pos;
  PetscReal      c, s, rho, Ajj, val, tt, old;
  PetscScalar    *col;
  MPI_Status     status;
  PetscBLASInt   N = MAXKSPSIZE + 1;


  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr);
  ierr = PetscLogEventBegin(KSP_AGMRESRoddec,ksp,0,0,0);CHKERRQ(ierr);
  ierr = PetscMemzero(agmres->Rloc, N*N*sizeof(PetscScalar));CHKERRQ(ierr);
  /* check input arguments */
  if (nvec < 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "The number of input vectors shoud be positive");
  ierr = VecGetLocalSize(VEC_V(0), &nloc);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(nloc,&bnloc);CHKERRQ(ierr);
  if (nvec > nloc) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_WRONG, "In QR factorization, the number of local rows should be greater or equal to the number of columns");
  pas = 1;
  /* Copy the vectors of the basis */
  for (j = 0; j < nvec; j++) {
    ierr = VecGetArray(VEC_V(j), &col);CHKERRQ(ierr);
    PetscStackCallBLAS("BLAScopy",BLAScopy_(&bnloc, col, &pas, &Qloc[j*nloc], &pas));
    ierr = VecRestoreArray(VEC_V(j), &col);CHKERRQ(ierr);
  }
  /* Each process performs a local QR on its own block */
  for (j = 0; j < nvec; j++) {
    len = nloc - j;
    Ajj = Qloc[j*nloc+j];
    rho = -PetscSign(Ajj) * BLASnrm2_(&len, &(Qloc[j*nloc+j]), &pas);
    if (rho == 0.0) tloc[j] = 0.0;
    else {
      tloc[j] = (Ajj - rho) / rho;
      len     = len - 1;
      val     = 1.0 / (Ajj - rho);
      PetscStackCallBLAS("BLASscal",BLASscal_(&len, &val, &(Qloc[j*nloc+j+1]), &pas));
      Qloc[j*nloc+j] = 1.0;
      len            = len + 1;
      for (k = j + 1; k < nvec; k++) {
        PetscStackCallBLAS("BLASdot",tt = tloc[j] * BLASdot_(&len, &(Qloc[j*nloc+j]), &pas, &(Qloc[k*nloc+j]), &pas));
        PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&len, &tt, &(Qloc[j*nloc+j]), &pas, &(Qloc[k*nloc+j]), &pas));
      }
      Qloc[j*nloc+j] = rho;
    }
  }
  /*annihilate undesirable Rloc, diagonal by diagonal*/
  for (d = 0; d < nvec; d++) {
    len = nvec - d;
    if (rank == First) {
      PetscStackCallBLAS("BLAScopy",BLAScopy_(&len, &(Qloc[d*nloc+d]), &bnloc, &(wbufptr[d]), &pas));
      ierr = MPI_Send(&(wbufptr[d]), len, MPIU_SCALAR, rank + 1, agmres->tag, comm);CHKERRQ(ierr);
    } else {
      ierr = MPI_Recv(&(wbufptr[d]), len, MPIU_SCALAR, rank - 1, agmres->tag, comm, &status);CHKERRQ(ierr);
      /*Elimination of Rloc(1,d)*/
      c    = wbufptr[d];
      s    = Qloc[d*nloc];
      ierr = KSPAGMRESRoddecGivens(&c, &s, &rho, 1);CHKERRQ(ierr);
      /*Apply Givens Rotation*/
      for (k = d; k < nvec; k++) {
        old          = wbufptr[k];
        wbufptr[k]   =  c * old - s * Qloc[k*nloc];
        Qloc[k*nloc] =  s * old + c * Qloc[k*nloc];
      }
      Qloc[d*nloc] = rho;
      if (rank != Last) {
        ierr = MPI_Send(& (wbufptr[d]), len, MPIU_SCALAR, rank + 1, agmres->tag, comm);CHKERRQ(ierr);
      }
      /* zero-out the d-th diagonal of Rloc ...*/
      for (j = d + 1; j < nvec; j++) {
        /* elimination of Rloc[i][j]*/
        i    = j - d;
        c    = Qloc[j*nloc+i-1];
        s    = Qloc[j*nloc+i];
        ierr = KSPAGMRESRoddecGivens(&c, &s, &rho, 1);CHKERRQ(ierr);
        for (k = j; k < nvec; k++) {
          old              = Qloc[k*nloc+i-1];
          Qloc[k*nloc+i-1] = c * old - s * Qloc[k*nloc+i];
          Qloc[k*nloc+i]   =   s * old + c * Qloc[k*nloc+i];
        }
        Qloc[j*nloc+i] = rho;
      }
      if (rank == Last) {
        PetscStackCallBLAS("BLAScopy",BLAScopy_(&len, &(wbufptr[d]), &pas, RLOC(d,d), &N));
        for (k = d + 1; k < nvec; k++) *RLOC(k,d) = 0.0;
      }
    }
  }

  if (rank == Last) {
    for (d = 0; d < nvec; d++) {
      pos    = nvec - d;
      ierr = PetscBLASIntCast(pos,&bpos);CHKERRQ(ierr);
      sgn[d] = PetscSign(*RLOC(d,d));
      PetscStackCallBLAS("BLASscal",BLASscal_(&bpos, &(sgn[d]), RLOC(d,d), &N));
    }
  }
  /*BroadCast Rloc to all other processes
   * NWD : should not be needed
   */
  ierr = MPI_Bcast(agmres->Rloc,N*N,MPIU_SCALAR,Last,comm);CHKERRQ(ierr);
  ierr = PetscLogEventEnd(KSP_AGMRESRoddec,ksp,0,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}