Exemple #1
0
static PetscErrorCode KSPDestroy_PIPEFCG(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i;
  KSP_PIPEFCG    *pipefcg;

  PetscFunctionBegin;
  pipefcg = (KSP_PIPEFCG*)ksp->data;

  /* Destroy "standard" work vecs */
  VecDestroyVecs(ksp->nwork,&ksp->work);

  /* Destroy vectors of old directions and the arrays that manage pointers to them */
  if(pipefcg->nvecs){
    for(i=0;i<pipefcg->nchunks;++i){
      ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pPvecs[i]);CHKERRQ(ierr);
      ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pSvecs[i]);CHKERRQ(ierr);
      ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pQvecs[i]);CHKERRQ(ierr);
      ierr = VecDestroyVecs(pipefcg->chunksizes[i],&pipefcg->pZETAvecs[i]);CHKERRQ(ierr);
    }
  }
  ierr = PetscFree4(pipefcg->Pvecs,pipefcg->Svecs,pipefcg->pPvecs,pipefcg->pSvecs);CHKERRQ(ierr);
  ierr = PetscFree4(pipefcg->Qvecs,pipefcg->ZETAvecs,pipefcg->pQvecs,pipefcg->pZETAvecs);CHKERRQ(ierr);
  ierr = PetscFree4(pipefcg->Pold,pipefcg->Sold,pipefcg->Qold,pipefcg->ZETAold);CHKERRQ(ierr);
  ierr = PetscFree(pipefcg->chunksizes);CHKERRQ(ierr);
  ierr = PetscFree3(pipefcg->dots,pipefcg->etas,pipefcg->redux);CHKERRQ(ierr);
  ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #2
0
/*@
   PetscDTReconstructPoly - create matrix representing polynomial reconstruction using cell intervals and evaluation at target intervals

   Not Collective

   Input Arguments:
+  degree - degree of reconstruction polynomial
.  nsource - number of source intervals
.  sourcex - sorted coordinates of source cell boundaries (length nsource+1)
.  ntarget - number of target intervals
-  targetx - sorted coordinates of target cell boundaries (length ntarget+1)

   Output Arguments:
.  R - reconstruction matrix, utarget = sum_s R[t*nsource+s] * usource[s]

   Level: advanced

.seealso: PetscDTLegendreEval()
@*/
PetscErrorCode PetscDTReconstructPoly(PetscInt degree,PetscInt nsource,const PetscReal *sourcex,PetscInt ntarget,const PetscReal *targetx,PetscReal *R)
{
  PetscErrorCode ierr;
  PetscInt i,j,k,*bdegrees,worksize;
  PetscReal xmin,xmax,center,hscale,*sourcey,*targety,*Bsource,*Bsinv,*Btarget;
  PetscScalar *tau,*work;

  PetscFunctionBegin;
  PetscValidRealPointer(sourcex,3);
  PetscValidRealPointer(targetx,5);
  PetscValidRealPointer(R,6);
  if (degree >= nsource) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Reconstruction degree %D must be less than number of source intervals %D",degree,nsource);
#if defined(PETSC_USE_DEBUG)
  for (i=0; i<nsource; i++) {
    if (sourcex[i] >= sourcex[i+1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Source interval %D has negative orientation (%G,%G)",i,sourcex[i],sourcex[i+1]);
  }
  for (i=0; i<ntarget; i++) {
    if (targetx[i] >= targetx[i+1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Target interval %D has negative orientation (%G,%G)",i,targetx[i],targetx[i+1]);
  }
#endif
  xmin = PetscMin(sourcex[0],targetx[0]);
  xmax = PetscMax(sourcex[nsource],targetx[ntarget]);
  center = (xmin + xmax)/2;
  hscale = (xmax - xmin)/2;
  worksize = nsource;
  ierr = PetscMalloc4(degree+1,PetscInt,&bdegrees,nsource+1,PetscReal,&sourcey,nsource*(degree+1),PetscReal,&Bsource,worksize,PetscScalar,&work);CHKERRQ(ierr);
  ierr = PetscMalloc4(nsource,PetscScalar,&tau,nsource*(degree+1),PetscReal,&Bsinv,ntarget+1,PetscReal,&targety,ntarget*(degree+1),PetscReal,&Btarget);CHKERRQ(ierr);
  for (i=0; i<=nsource; i++) sourcey[i] = (sourcex[i]-center)/hscale;
  for (i=0; i<=degree; i++) bdegrees[i] = i+1;
  ierr = PetscDTLegendreIntegrate(nsource,sourcey,degree+1,bdegrees,PETSC_TRUE,Bsource);CHKERRQ(ierr);
  ierr = PetscDTPseudoInverseQR(nsource,nsource,degree+1,Bsource,Bsinv,tau,nsource,work);CHKERRQ(ierr);
  for (i=0; i<=ntarget; i++) targety[i] = (targetx[i]-center)/hscale;
  ierr = PetscDTLegendreIntegrate(ntarget,targety,degree+1,bdegrees,PETSC_FALSE,Btarget);CHKERRQ(ierr);
  for (i=0; i<ntarget; i++) {
    PetscReal rowsum = 0;
    for (j=0; j<nsource; j++) {
      PetscReal sum = 0;
      for (k=0; k<degree+1; k++) {
        sum += Btarget[i*(degree+1)+k] * Bsinv[k*nsource+j];
      }
      R[i*nsource+j] = sum;
      rowsum += sum;
    }
    for (j=0; j<nsource; j++) R[i*nsource+j] /= rowsum; /* normalize each row */
  }
  ierr = PetscFree4(bdegrees,sourcey,Bsource,work);CHKERRQ(ierr);
  ierr = PetscFree4(tau,Bsinv,targety,Btarget);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #3
0
EXTERN_C_BEGIN
/*
    MatOrdering_ND - Find the nested dissection ordering of a given matrix.
*/    
#undef __FUNCT__  
#define __FUNCT__ "MatOrdering_ND"
PetscErrorCode PETSCMAT_DLLEXPORT MatOrdering_ND(Mat mat,const MatOrderingType type,IS *row,IS *col)
{
  PetscErrorCode ierr;
  PetscInt       i, *mask,*xls,*ls,nrow,*ia,*ja,*perm;
  PetscTruth     done;

  PetscFunctionBegin;
  ierr = MatGetRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr);
  if (!done) SETERRQ1(PETSC_ERR_SUP,"Cannot get rows for matrix type %s",((PetscObject)mat)->type_name);

  ierr = PetscMalloc4(nrow,PetscInt,&mask,nrow,PetscInt,&perm,nrow+1,PetscInt,&xls,nrow,PetscInt,&ls);CHKERRQ(ierr);
  SPARSEPACKgennd(&nrow,ia,ja,mask,perm,xls,ls);
  ierr = MatRestoreRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr);

  /* shift because Sparsepack indices start at one */
  for (i=0; i<nrow; i++) perm[i]--;

  ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,row);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,col);CHKERRQ(ierr);
  ierr = PetscFree4(mask,perm,xls,ls);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #4
0
PetscErrorCode KSPDestroy_FCG(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i;
  KSP_FCG        *fcg = (KSP_FCG*)ksp->data;

  PetscFunctionBegin;

  /* Destroy "standard" work vecs */
  VecDestroyVecs(ksp->nwork,&ksp->work);

  /* Destroy P and C vectors and the arrays that manage pointers to them */
  if (fcg->nvecs){
    for (i=0;i<fcg->nchunks;++i){
      ierr = VecDestroyVecs(fcg->chunksizes[i],&fcg->pPvecs[i]);CHKERRQ(ierr);
      ierr = VecDestroyVecs(fcg->chunksizes[i],&fcg->pCvecs[i]);CHKERRQ(ierr);
    }
  }
  ierr = PetscFree5(fcg->Pvecs,fcg->Cvecs,fcg->pPvecs,fcg->pCvecs,fcg->chunksizes);CHKERRQ(ierr);
  /* free space used for singular value calculations */
  if (ksp->calc_sings) {
    ierr = PetscFree4(fcg->e,fcg->d,fcg->ee,fcg->dd);CHKERRQ(ierr);
  }
  ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #5
0
/*@C
  PetscDTGaussJacobiQuadrature - create Gauss-Jacobi quadrature for a simplex

  Not Collective

  Input Arguments:
+ dim   - The simplex dimension
. order - The number of points in one dimension
. a     - left end of interval (often-1)
- b     - right end of interval (often +1)

  Output Argument:
. q - A PetscQuadrature object

  Level: intermediate

  References:
  Karniadakis and Sherwin.
  FIAT

.seealso: PetscDTGaussTensorQuadrature(), PetscDTGaussQuadrature()
@*/
PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt dim, PetscInt order, PetscReal a, PetscReal b, PetscQuadrature *q)
{
  PetscInt       npoints = dim > 1 ? dim > 2 ? order*PetscSqr(order) : PetscSqr(order) : order;
  PetscReal     *px, *wx, *py, *wy, *pz, *wz, *x, *w;
  PetscInt       i, j, k;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  if ((a != -1.0) || (b != 1.0)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now");
  ierr = PetscMalloc1(npoints*dim, &x);CHKERRQ(ierr);
  ierr = PetscMalloc1(npoints, &w);CHKERRQ(ierr);
  switch (dim) {
  case 0:
    ierr = PetscFree(x);CHKERRQ(ierr);
    ierr = PetscFree(w);CHKERRQ(ierr);
    ierr = PetscMalloc1(1, &x);CHKERRQ(ierr);
    ierr = PetscMalloc1(1, &w);CHKERRQ(ierr);
    x[0] = 0.0;
    w[0] = 1.0;
    break;
  case 1:
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, x, w);CHKERRQ(ierr);
    break;
  case 2:
    ierr = PetscMalloc4(order,&px,order,&wx,order,&py,order,&wy);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, px, wx);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 1.0, 0.0, py, wy);CHKERRQ(ierr);
    for (i = 0; i < order; ++i) {
      for (j = 0; j < order; ++j) {
        ierr = PetscDTMapSquareToTriangle_Internal(px[i], py[j], &x[(i*order+j)*2+0], &x[(i*order+j)*2+1]);CHKERRQ(ierr);
        w[i*order+j] = 0.5 * wx[i] * wy[j];
      }
    }
    ierr = PetscFree4(px,wx,py,wy);CHKERRQ(ierr);
    break;
  case 3:
    ierr = PetscMalloc6(order,&px,order,&wx,order,&py,order,&wy,order,&pz,order,&wz);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 0.0, 0.0, px, wx);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 1.0, 0.0, py, wy);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(order, 2.0, 0.0, pz, wz);CHKERRQ(ierr);
    for (i = 0; i < order; ++i) {
      for (j = 0; j < order; ++j) {
        for (k = 0; k < order; ++k) {
          ierr = PetscDTMapCubeToTetrahedron_Internal(px[i], py[j], pz[k], &x[((i*order+j)*order+k)*3+0], &x[((i*order+j)*order+k)*3+1], &x[((i*order+j)*order+k)*3+2]);CHKERRQ(ierr);
          w[(i*order+j)*order+k] = 0.125 * wx[i] * wy[j] * wz[k];
        }
      }
    }
    ierr = PetscFree6(px,wx,py,wy,pz,wz);CHKERRQ(ierr);
    break;
  default:
    SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %d", dim);
  }
  ierr = PetscQuadratureCreate(PETSC_COMM_SELF, q);CHKERRQ(ierr);
  ierr = PetscQuadratureSetData(*q, dim, npoints, x, w);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
PetscErrorCode NEPSolve_Interpol(NEP nep)
{
  PetscErrorCode ierr;
  NEP_INTERPOL   *ctx = (NEP_INTERPOL*)nep->data;
  Mat            *A;   /*T=nep->function,Tp=nep->jacobian;*/
  PetscScalar    *x,*fx,t;
  PetscReal      *cs,a,b,s;
  PetscInt       i,j,k,deg=ctx->deg;

  PetscFunctionBegin;
  ierr = PetscMalloc4(deg+1,&A,(deg+1)*(deg+1),&cs,deg+1,&x,(deg+1)*nep->nt,&fx);CHKERRQ(ierr);
  ierr = RGIntervalGetEndpoints(nep->rg,&a,&b,NULL,NULL);CHKERRQ(ierr);
  ierr = ChebyshevNodes(deg,a,b,x,cs);CHKERRQ(ierr);
  for (j=0;j<nep->nt;j++) {
    for (i=0;i<=deg;i++) {
      ierr = FNEvaluateFunction(nep->f[j],x[i],&fx[i+j*(deg+1)]);CHKERRQ(ierr);
    }
  }

  /* Polynomial coefficients */
  for (k=0;k<=deg;k++) {
    ierr = MatDuplicate(nep->A[0],MAT_COPY_VALUES,&A[k]);CHKERRQ(ierr);
    t = 0.0;
    for (i=0;i<deg+1;i++) t += fx[i]*cs[i*(deg+1)+k];
    t *= 2.0/(deg+1); 
    if (k==0) t /= 2.0;
    ierr = MatScale(A[k],t);CHKERRQ(ierr);
    for (j=1;j<nep->nt;j++) {
      t = 0.0;
      for (i=0;i<deg+1;i++) t += fx[i+j*(deg+1)]*cs[i*(deg+1)+k];
      t *= 2.0/(deg+1); 
      if (k==0) t /= 2.0;
      ierr = MatAXPY(A[k],t,nep->A[j],SUBSET_NONZERO_PATTERN);CHKERRQ(ierr);
    }
  }

  ierr = PEPSetOperators(ctx->pep,deg+1,A);CHKERRQ(ierr);
  for (k=0;k<=deg;k++) {
    ierr = MatDestroy(&A[k]);CHKERRQ(ierr);
  }
  ierr = PetscFree4(A,cs,x,fx);CHKERRQ(ierr);

  /* Solve polynomial eigenproblem */
  ierr = PEPSolve(ctx->pep);CHKERRQ(ierr);
  ierr = PEPGetConverged(ctx->pep,&nep->nconv);CHKERRQ(ierr);
  ierr = PEPGetIterationNumber(ctx->pep,&nep->its);CHKERRQ(ierr);
  ierr = PEPGetConvergedReason(ctx->pep,(PEPConvergedReason*)&nep->reason);CHKERRQ(ierr);
  s = 2.0/(b-a);
  for (i=0;i<nep->nconv;i++) {
    ierr = PEPGetEigenpair(ctx->pep,i,&nep->eigr[i],&nep->eigi[i],NULL,NULL);CHKERRQ(ierr);
    nep->eigr[i] /= s;
    nep->eigr[i] += (a+b)/2.0;
    nep->eigi[i] /= s;
  }
  nep->state = NEP_STATE_EIGENVECTORS;
  PetscFunctionReturn(0);
}
Exemple #7
0
PetscErrorCode AODestroy_Mapping(AO ao)
{
  AO_Mapping     *aomap = (AO_Mapping*) ao->data;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  ierr = PetscFree4(aomap->app,aomap->appPerm,aomap->petsc,aomap->petscPerm);CHKERRQ(ierr);
  ierr = PetscFree(aomap);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #8
0
PetscErrorCode MPIAIJ_MPIDenseDestroy(void *ctx)
{
  MPIAIJ_MPIDense *contents = (MPIAIJ_MPIDense*) ctx;
  PetscErrorCode  ierr;

  PetscFunctionBegin;
  if (contents->workB) {ierr = MatDestroy(contents->workB);CHKERRQ(ierr);}
  ierr = PetscFree4(contents->rvalues,contents->svalues,contents->rwaits,contents->swaits);CHKERRQ(ierr);
  ierr = PetscFree(contents);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #9
0
PetscErrorCode PetscFEGeomDestroy(PetscFEGeom **geom)
{
  PetscErrorCode ierr;

  PetscFunctionBegin;
  if (!*geom) PetscFunctionReturn(0);
  ierr = PetscFree3((*geom)->v,(*geom)->J,(*geom)->detJ);CHKERRQ(ierr);
  ierr = PetscFree((*geom)->invJ);CHKERRQ(ierr);
  ierr = PetscFree4((*geom)->face,(*geom)->n,(*geom)->suppInvJ[0],(*geom)->suppInvJ[1]);CHKERRQ(ierr);
  ierr = PetscFree(*geom);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #10
0
PetscErrorCode MatDestroy_Essl(Mat A)
{
  PetscErrorCode ierr;
  Mat_Essl       *essl=(Mat_Essl*)A->data;

  PetscFunctionBegin;
  if (essl->CleanUpESSL) {
    ierr = PetscFree4(essl->a,essl->aux,essl->ia,essl->ja);CHKERRQ(ierr);
  }
  ierr = PetscFree(A->data);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #11
0
/*@C
  PetscDTGaussJacobiQuadrature - create Gauss-Jacobi quadrature for a simplex

  Not Collective

  Input Arguments:
+ dim - The simplex dimension
. npoints - number of points
. a - left end of interval (often-1)
- b - right end of interval (often +1)

  Output Arguments:
+ points - quadrature points
- weights - quadrature weights

  Level: intermediate

  References:
  Karniadakis and Sherwin.
  FIAT

.seealso: PetscDTGaussQuadrature()
@*/
PetscErrorCode PetscDTGaussJacobiQuadrature(PetscInt dim, PetscInt npoints, PetscReal a, PetscReal b, PetscReal *points[], PetscReal *weights[])
{
  PetscReal     *px, *wx, *py, *wy, *pz, *wz, *x, *w;
  PetscInt       i, j, k;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  if ((a != -1.0) || (b != 1.0)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Must use default internal right now");
  switch (dim) {
  case 1:
    ierr = PetscMalloc(npoints * sizeof(PetscReal), &x);CHKERRQ(ierr);
    ierr = PetscMalloc(npoints * sizeof(PetscReal), &w);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, x, w);CHKERRQ(ierr);
    break;
  case 2:
    ierr = PetscMalloc(npoints*npoints*2 * sizeof(PetscReal), &x);CHKERRQ(ierr);
    ierr = PetscMalloc(npoints*npoints   * sizeof(PetscReal), &w);CHKERRQ(ierr);
    ierr = PetscMalloc4(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);CHKERRQ(ierr);
    for (i = 0; i < npoints; ++i) {
      for (j = 0; j < npoints; ++j) {
        ierr = PetscDTMapSquareToTriangle_Internal(px[i], py[j], &x[(i*npoints+j)*2+0], &x[(i*npoints+j)*2+1]);CHKERRQ(ierr);
        w[i*npoints+j] = 0.5 * wx[i] * wy[j];
      }
    }
    ierr = PetscFree4(px,wx,py,wy);CHKERRQ(ierr);
    break;
  case 3:
    ierr = PetscMalloc(npoints*npoints*3 * sizeof(PetscReal), &x);CHKERRQ(ierr);
    ierr = PetscMalloc(npoints*npoints   * sizeof(PetscReal), &w);CHKERRQ(ierr);
    ierr = PetscMalloc6(npoints,PetscReal,&px,npoints,PetscReal,&wx,npoints,PetscReal,&py,npoints,PetscReal,&wy,npoints,PetscReal,&pz,npoints,PetscReal,&wz);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 0.0, 0.0, px, wx);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 1.0, 0.0, py, wy);CHKERRQ(ierr);
    ierr = PetscDTGaussJacobiQuadrature1D_Internal(npoints, 2.0, 0.0, pz, wz);CHKERRQ(ierr);
    for (i = 0; i < npoints; ++i) {
      for (j = 0; j < npoints; ++j) {
        for (k = 0; k < npoints; ++k) {
          ierr = PetscDTMapCubeToTetrahedron_Internal(px[i], py[j], pz[k], &x[((i*npoints+j)*npoints+k)*3+0], &x[((i*npoints+j)*npoints+k)*3+1], &x[((i*npoints+j)*npoints+k)*3+2]);CHKERRQ(ierr);
          w[(i*npoints+j)*npoints+k] = 0.125 * wx[i] * wy[j] * wz[k];
        }
      }
    }
    ierr = PetscFree6(px,wx,py,wy,pz,wz);CHKERRQ(ierr);
    break;
  default:
    SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot construct quadrature rule for dimension %d", dim);
  }
  if (points)  *points  = x;
  if (weights) *weights = w;
  PetscFunctionReturn(0);
}
Exemple #12
0
/*@C
  PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
  including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
  except it takes TWO ilenths and output TWO olengths.

  Collective on MPI_Comm

  Input Parameters:
+ comm      - Communicator
. nsends    - number of messages that are to be sent.
. nrecvs    - number of messages being received
- ilengths1, ilengths2 - array of integers of length sizeof(comm)
              a non zero ilengths[i] represent a message to i of length ilengths[i]

  Output Parameters:
+ onodes    - list of node-ids from which messages are expected
- olengths1, olengths2 - corresponding message lengths

  Level: developer

  Concepts: mpi utility

  Notes:
  With this info, the correct MPI_Irecv() can be posted with the correct
  from-id, with a buffer with the right amount of memory required.

  The calling function deallocates the memory in onodes and olengths

  To determine nrecevs, one can use PetscGatherNumberOfMessages()

.seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
@*/
PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
{
  PetscErrorCode ierr;
  PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
  MPI_Request    *s_waits  = NULL,*r_waits = NULL;
  MPI_Status     *w_status = NULL;

  PetscFunctionBegin;
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);

  /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
  ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr);
  s_waits = r_waits + nrecvs;

  /* Post the Irecv to get the message length-info */
  ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr);
  ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr);
  for (i=0; i<nrecvs; i++) {
    buf_j = buf_r + (2*i);
    ierr  = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRQ(ierr);
  }

  /* Post the Isends with the message length-info */
  for (i=0,j=0; i<size; ++i) {
    if (ilengths1[i]) {
      buf_j    = buf_s + (2*j);
      buf_j[0] = *(ilengths1+i);
      buf_j[1] = *(ilengths2+i);
      ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRQ(ierr);
      j++;
    }
  }
  if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends);

  /* Post waits on sends and receivs */
  if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRQ(ierr);}


  /* Pack up the received data */
  ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr);
  for (i=0; i<nrecvs; ++i) {
    (*onodes)[i]    = w_status[i].MPI_SOURCE;
    buf_j           = buf_r + (2*i);
    (*olengths1)[i] = buf_j[0];
    (*olengths2)[i] = buf_j[1];
  }

  ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #13
0
/*@C
   PetscDrawViewPortsDestroy - frees a PetscDrawViewPorts object

   Collective on PetscDraw inside PetscDrawViewPorts

   Input Parameter:
.  ports - the PetscDrawViewPorts object

   Level: advanced

.seealso: PetscDrawSplitViewPort(), PetscDrawSetViewPort(), PetscDrawViewPortsSet(), PetscDrawViewPortsCreate()

@*/
PetscErrorCode  PetscDrawViewPortsDestroy(PetscDrawViewPorts *ports)
{
  PetscErrorCode ierr;

  PetscFunctionBegin;
  if (!ports) PetscFunctionReturn(0);
  PetscValidPointer(ports,1);
  /* reset Drawport of Window back to previous value */
  ierr = PetscDrawSetViewPort(ports->draw,ports->port_xl,ports->port_yl,ports->port_xr,ports->port_yr);CHKERRQ(ierr);
  ierr = PetscDrawDestroy(&ports->draw);CHKERRQ(ierr);
  ierr = PetscFree4(ports->xl,ports->xr,ports->yl,ports->yr);CHKERRQ(ierr);
  ierr = PetscFree(ports);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #14
0
PetscErrorCode KSPDestroy_CG(KSP ksp)
{
  KSP_CG         *cg = (KSP_CG*)ksp->data;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* free space used for singular value calculations */
  if (ksp->calc_sings) {
    ierr = PetscFree4(cg->e,cg->d,cg->ee,cg->dd);CHKERRQ(ierr);
  }
  ierr = KSPDestroyDefault(ksp);CHKERRQ(ierr);
  ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPCGSetType_C",NULL);CHKERRQ(ierr);
  ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPCGUseSingleReduction_C",NULL);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #15
0
static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
{
  PetscErrorCode ierr;
  PetscMatStashSpace space;
  PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
  PetscScalar **valptr;

  PetscFunctionBegin;
  ierr = PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);CHKERRQ(ierr);
  for (space=stash->space_head,cnt=0; space; space=space->next) {
    for (i=0; i<space->local_used; i++) {
      row[cnt] = space->idx[i];
      col[cnt] = space->idy[i];
      valptr[cnt] = &space->val[i*bs2];
      perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
      cnt++;
    }
  }
  if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
  ierr = PetscSortIntWithArrayPair(n,row,col,perm);CHKERRQ(ierr);
  /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
  for (rowstart=0,cnt=0,i=1; i<=n; i++) {
    if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
      PetscInt colstart;
      ierr = PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);CHKERRQ(ierr);
      for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
        PetscInt j,l;
        MatStashBlock *block;
        ierr = PetscSegBufferGet(stash->segsendblocks,1,&block);CHKERRQ(ierr);
        block->row = row[rowstart];
        block->col = col[colstart];
        ierr = PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
        for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
          if (insertmode == ADD_VALUES) {
            for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
          } else {
            ierr = PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));CHKERRQ(ierr);
          }
        }
        colstart = j;
      }
      rowstart = i;
    }
  }
  ierr = PetscFree4(row,col,valptr,perm);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #16
0
/*@
   KSPBCGSLSetXRes - Sets the parameter governing when
   exact residuals will be used instead of computed residuals.

   Logically Collective on KSP

   Input Parameters:
+  ksp - iterative context obtained from KSPCreate
-  delta - computed residuals are used alone when delta is not positive

   Options Database Keys:

.  -ksp_bcgsl_xres delta

   Level: intermediate

.keywords: KSP, BiCGStab(L), set, exact residuals

.seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
@*/
PetscErrorCode  KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
{
  KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidLogicalCollectiveReal(ksp,delta,2);
  if (ksp->setupstage) {
    if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
      ierr            = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
      ierr            = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
      ierr            = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr);
      ksp->setupstage = KSP_SETUP_NEW;
    }
  }
  bcgsl->delta = delta;
  PetscFunctionReturn(0);
}
/*@
   NEPAllocateSolution - Allocate memory storage for common variables such
   as eigenvalues and eigenvectors.

   Collective on NEP

   Input Parameters:
+  nep   - eigensolver context
-  extra - number of additional positions, used for methods that require a
           working basis slightly larger than ncv

   Developers Note:
   This is PETSC_EXTERN because it may be required by user plugin NEP
   implementations.

   Level: developer
@*/
PetscErrorCode NEPAllocateSolution(NEP nep,PetscInt extra)
{
  PetscErrorCode ierr;
  PetscInt       oldsize,newc,requested;
  PetscLogDouble cnt;
  Mat            T;
  Vec            t;

  PetscFunctionBegin;
  requested = nep->ncv + extra;

  /* oldsize is zero if this is the first time setup is called */
  ierr = BVGetSizes(nep->V,NULL,NULL,&oldsize);CHKERRQ(ierr);
  newc = PetscMax(0,requested-oldsize);

  /* allocate space for eigenvalues and friends */
  if (requested != oldsize) {
    if (oldsize) {
      ierr = PetscFree4(nep->eigr,nep->eigi,nep->errest,nep->perm);CHKERRQ(ierr);
    }
    ierr = PetscMalloc4(requested,&nep->eigr,requested,&nep->eigi,requested,&nep->errest,requested,&nep->perm);CHKERRQ(ierr);
    cnt = newc*sizeof(PetscScalar) + newc*sizeof(PetscReal) + newc*sizeof(PetscInt);
    ierr = PetscLogObjectMemory((PetscObject)nep,cnt);CHKERRQ(ierr);
  }

  /* allocate V */
  if (!nep->V) { ierr = NEPGetBV(nep,&nep->V);CHKERRQ(ierr); }
  if (!oldsize) {
    if (!((PetscObject)(nep->V))->type_name) {
      ierr = BVSetType(nep->V,BVSVEC);CHKERRQ(ierr);
    }
    if (nep->split) T = nep->A[0];
    else {
      ierr = NEPGetFunction(nep,&T,NULL,NULL,NULL);CHKERRQ(ierr);
    }
    ierr = MatGetVecs(T,&t,NULL);CHKERRQ(ierr);
    ierr = BVSetSizesFromVec(nep->V,t,requested);CHKERRQ(ierr);
    ierr = VecDestroy(&t);CHKERRQ(ierr);
  } else {
    ierr = BVResize(nep->V,requested,PETSC_FALSE);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemple #18
0
/*@C
   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

   Collective

   Input Arguments:
.  sf - star forest

   Level: advanced

.seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
@*/
PetscErrorCode PetscSFReset(PetscSF sf)
{
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
  sf->mine   = NULL;
  ierr       = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
  sf->remote = NULL;
  ierr       = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
  ierr       = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
  ierr       = PetscFree(sf->degree);CHKERRQ(ierr);
  if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
  if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
  ierr         = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
  sf->graphset = PETSC_FALSE;
  if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
  sf->setupcalled = PETSC_FALSE;
  PetscFunctionReturn(0);
}
Exemple #19
0
static PetscErrorCode SNESReset_QN(SNES snes)
{
  PetscErrorCode ierr;
  SNES_QN        *qn;

  PetscFunctionBegin;
  if (snes->data) {
    qn = (SNES_QN*)snes->data;
    if (qn->U) {
      ierr = VecDestroyVecs(qn->m, &qn->U);CHKERRQ(ierr);
    }
    if (qn->V) {
      ierr = VecDestroyVecs(qn->m, &qn->V);CHKERRQ(ierr);
    }
    if (qn->singlereduction) {
      ierr = PetscFree3(qn->dXdFmat, qn->dFtdX, qn->YtdX);CHKERRQ(ierr);
    }
    ierr = PetscFree4(qn->alpha,qn->beta,qn->dXtdF,qn->lambda);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemple #20
0
/*@
   KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).

   Logically Collective on KSP

   Input Parameters:
+  ksp - iterative context obtained from KSPCreate
-  ell - number of search directions

   Options Database Keys:

.  -ksp_bcgsl_ell ell

   Level: intermediate

   Notes:
   For large ell it is common for the polynomial update problem to become singular (due to happy breakdown for smallish
   test problems, but also for larger problems). Consequently, by default, the system is solved by pseudoinverse, which
   allows the iteration to complete successfully. See KSPBCGSLSetUsePseudoinverse() to switch to a conventional solve.

.keywords: KSP, BiCGStab(L), set, exact residuals,

.seealso: KSPBCGSLSetUsePseudoinverse()
@*/
PetscErrorCode  KSPBCGSLSetEll(KSP ksp, PetscInt ell)
{
  KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  if (ell < 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
  PetscValidLogicalCollectiveInt(ksp,ell,2);

  if (!ksp->setupstage) bcgsl->ell = ell;
  else if (bcgsl->ell != ell) {
    /* free the data structures, then create them again */
    ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
    ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
    ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr);

    bcgsl->ell      = ell;
    ksp->setupstage = KSP_SETUP_NEW;
  }
  PetscFunctionReturn(0);
}
Exemple #21
0
/*@
   KSPBCGSLSetPol - Sets the type of polynomial part will
   be used in the BiCGSTab(L) solver.

   Logically Collective on KSP

   Input Parameters:
+  ksp - iterative context obtained from KSPCreate
-  uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.

   Options Database Keys:

+  -ksp_bcgsl_cxpoly - use enhanced polynomial
.  -ksp_bcgsl_mrpoly - use standard polynomial

   Level: intermediate

.keywords: KSP, BiCGStab(L), set, polynomial

.seealso: @()
@*/
PetscErrorCode  KSPBCGSLSetPol(KSP ksp, PetscBool uMROR)
{
  KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  PetscValidLogicalCollectiveBool(ksp,uMROR,2);

  if (!ksp->setupstage) bcgsl->bConvex = uMROR;
  else if (bcgsl->bConvex != uMROR) {
    /* free the data structures,
       then create them again
     */
    ierr = VecDestroyVecs(ksp->nwork,&ksp->work);CHKERRQ(ierr);
    ierr = PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);CHKERRQ(ierr);
    ierr = PetscFree4(bcgsl->work,bcgsl->s,bcgsl->u,bcgsl->v);CHKERRQ(ierr);

    bcgsl->bConvex  = uMROR;
    ksp->setupstage = KSP_SETUP_NEW;
  }
  PetscFunctionReturn(0);
}
Exemple #22
0
/*
    MatGetOrdering_ND - Find the nested dissection ordering of a given matrix.
*/
PETSC_INTERN PetscErrorCode MatGetOrdering_ND(Mat mat,MatOrderingType type,IS *row,IS *col)
{
  PetscErrorCode ierr;
  PetscInt       i, *mask,*xls,*ls,nrow,*perm;
  const PetscInt *ia,*ja;
  PetscBool      done;

  PetscFunctionBegin;
  ierr = MatGetRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,&nrow,&ia,&ja,&done);CHKERRQ(ierr);
  if (!done) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot get rows for matrix type %s",((PetscObject)mat)->type_name);

  ierr = PetscMalloc4(nrow,&mask,nrow,&perm,nrow+1,&xls,nrow,&ls);CHKERRQ(ierr);
  SPARSEPACKgennd(&nrow,ia,ja,mask,perm,xls,ls);
  ierr = MatRestoreRowIJ(mat,1,PETSC_TRUE,PETSC_TRUE,NULL,&ia,&ja,&done);CHKERRQ(ierr);

  /* shift because Sparsepack indices start at one */
  for (i=0; i<nrow; i++) perm[i]--;

  ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,PETSC_COPY_VALUES,row);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PETSC_COMM_SELF,nrow,perm,PETSC_COPY_VALUES,col);CHKERRQ(ierr);
  ierr = PetscFree4(mask,perm,xls,ls);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #23
0
/*@
   EPSReset - Resets the EPS context to the initial state and removes any
   allocated objects.

   Collective on EPS

   Input Parameter:
.  eps - eigensolver context obtained from EPSCreate()

   Level: advanced

.seealso: EPSDestroy()
@*/
PetscErrorCode EPSReset(EPS eps)
{
    PetscErrorCode ierr;
    PetscInt       ncols;

    PetscFunctionBegin;
    PetscValidHeaderSpecific(eps,EPS_CLASSID,1);
    if (eps->ops->reset) {
        ierr = (eps->ops->reset)(eps);
        CHKERRQ(ierr);
    }
    if (eps->st) {
        ierr = STReset(eps->st);
        CHKERRQ(ierr);
    }
    if (eps->ds) {
        ierr = DSReset(eps->ds);
        CHKERRQ(ierr);
    }
    ierr = VecDestroy(&eps->D);
    CHKERRQ(ierr);
    ierr = BVGetSizes(eps->V,NULL,NULL,&ncols);
    CHKERRQ(ierr);
    if (ncols) {
        ierr = PetscFree4(eps->eigr,eps->eigi,eps->errest,eps->perm);
        CHKERRQ(ierr);
        ierr = PetscFree2(eps->rr,eps->ri);
        CHKERRQ(ierr);
    }
    ierr = BVDestroy(&eps->V);
    CHKERRQ(ierr);
    ierr = VecDestroyVecs(eps->nwork,&eps->work);
    CHKERRQ(ierr);
    eps->nwork = 0;
    eps->state = EPS_STATE_INITIAL;
    PetscFunctionReturn(0);
}
Exemple #24
0
/*
  FormFunctionLocal - Form the local residual F from the local input X

  Input Parameters:
+ dm - The mesh
. X  - Local input vector
- user - The user context

  Output Parameter:
. F  - Local output vector

  Note:
  We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator,
  like a GPU, or vectorize on a multicore machine.

.seealso: FormJacobianLocal()
*/
PetscErrorCode FormFunctionLocal(DM dm, Vec X, Vec F, AppCtx *user)
{
  const PetscInt   debug = user->debug;
  const PetscInt   dim   = user->dim;
  PetscReal       *coords, *v0, *J, *invJ, *detJ;
  PetscScalar     *elemVec, *u;
  const PetscInt   numCells = cEnd - cStart;
  PetscInt         cellDof  = 0;
  PetscInt         maxQuad  = 0;
  PetscInt         jacSize  = 1;
  PetscInt         cStart, cEnd, c, field;
  PetscErrorCode   ierr;

  PetscFunctionBegin;
  ierr = PetscLogEventBegin(user->residualEvent,0,0,0,0);CHKERRQ(ierr);
  ierr = VecSet(F, 0.0);CHKERRQ(ierr);
  ierr = DMDAGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr);
  for(field = 0; field < numFields; ++field) {
    PetscInt dof = 1;
    for(d = 0; d < dim; ++d) {dof *= user->q[field].numBasisFuncs*user->q[field].numComponents;}
    cellDof += dof;
    maxQuad  = PetscMax(maxQuad, user->q[field].numQuadPoints);
  }
  for(d = 0; d < dim; ++d) {jacSize *= maxQuad;}
  ierr = PetscMalloc3(dim,PetscReal,&coords,dim,PetscReal,&v0,jacSize,PetscReal,&J);CHKERRQ(ierr);
  ierr = PetscMalloc4(numCells*cellDof,PetscScalar,&u,numCells*jacSize,PetscReal,&invJ,numCells,PetscReal,&detJ,numCells*cellDof,PetscScalar,&elemVec);CHKERRQ(ierr);
  for(c = cStart; c < cEnd; ++c) {
    const PetscScalar *x;
    PetscInt           i;

    ierr = DMDAComputeCellGeometry(dm, c, v0, J, &invJ[c*jacSize], &detJ[c]);CHKERRQ(ierr);
    if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c);
    ierr = DMDAVecGetClosure(dm, PETSC_NULL, X, c, &x);CHKERRQ(ierr);

    for(i = 0; i < cellDof; ++i) {
      u[c*cellDof+i] = x[i];
    }
  }
  for(field = 0; field < numFields; ++field) {
    const PetscInt numQuadPoints = user->q[field].numQuadPoints;
    const PetscInt numBasisFuncs = user->q[field].numBasisFuncs;
    void (*f0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) = user->f0Funcs[field];
    void (*f1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) = user->f1Funcs[field];
    /* Conforming batches */
    PetscInt blockSize  = numBasisFuncs*numQuadPoints;
    PetscInt numBlocks  = 1;
    PetscInt batchSize  = numBlocks * blockSize;
    PetscInt numBatches = user->numBatches;
    PetscInt numChunks  = numCells / (numBatches*batchSize);
    ierr = IntegrateResidualBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, invJ, detJ, user->q, f0, f1, elemVec, user);CHKERRQ(ierr);
    /* Remainder */
    PetscInt numRemainder = numCells % (numBatches * batchSize);
    PetscInt offset       = numCells - numRemainder;
    ierr = IntegrateResidualBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset],
                                     user->q, f0, f1, &elemVec[offset*cellDof], user);CHKERRQ(ierr);
  }
  for(c = cStart; c < cEnd; ++c) {
    if (debug) {ierr = DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);CHKERRQ(ierr);}
    ierr = DMComplexVecSetClosure(dm, PETSC_NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);CHKERRQ(ierr);
  }
  ierr = PetscFree4(u,invJ,detJ,elemVec);CHKERRQ(ierr);
  ierr = PetscFree3(coords,v0,J);CHKERRQ(ierr);
  if (user->showResidual) {
    PetscInt p;

    ierr = PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");CHKERRQ(ierr);
    for(p = 0; p < user->numProcs; ++p) {
      if (p == user->rank) {
        Vec f;

        ierr = VecDuplicate(F, &f);CHKERRQ(ierr);
        ierr = VecCopy(F, f);CHKERRQ(ierr);
        ierr = VecChop(f, 1.0e-10);CHKERRQ(ierr);
        ierr = VecView(f, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
        ierr = VecDestroy(&f);CHKERRQ(ierr);
      }
      ierr = PetscBarrier((PetscObject) dm);CHKERRQ(ierr);
    }
  }
  ierr = PetscLogEventEnd(user->residualEvent,0,0,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #25
0
static PetscErrorCode GreedyColoringLocalDistanceTwo_Private(MatColoring mc,PetscReal *wts,PetscInt *lperm,ISColoringValue *colors)
{
  MC_Greedy       *gr = (MC_Greedy *) mc->data;
  PetscInt        i,j,k,l,s,e,n,nd,nd_global,n_global,idx,ncols,maxcolors,mcol,mcol_global,nd1cols,*mask,masksize,*d1cols,*bad,*badnext,nbad,badsize,ccol,no,cbad;
  Mat             m = mc->mat, mt;
  Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)m->data;
  Mat             md=NULL,mo=NULL;
  const PetscInt  *md_i,*mo_i,*md_j,*mo_j;
  const PetscInt  *rmd_i,*rmo_i,*rmd_j,*rmo_j;
  PetscBool       isMPIAIJ,isSEQAIJ;
  PetscInt        pcol,*dcolors,*ocolors;
  ISColoringValue *badidx;
  const PetscInt  *cidx;
  PetscReal       *owts,*colorweights;
  PetscInt        *oconf,*conf;
  PetscSF         sf;
  PetscLayout     layout;
  PetscErrorCode  ierr;

  PetscFunctionBegin;
  ierr = MatGetSize(m,&n_global,NULL);CHKERRQ(ierr);
  ierr = MatGetOwnershipRange(m,&s,&e);CHKERRQ(ierr);
  n=e-s;
  nd_global = 0;
  /* get the matrix communication structures */
  ierr = PetscObjectTypeCompare((PetscObject)m, MATMPIAIJ, &isMPIAIJ); CHKERRQ(ierr);
  ierr = PetscObjectTypeCompare((PetscObject)m, MATSEQAIJ, &isSEQAIJ); CHKERRQ(ierr);
  if (isMPIAIJ) {
    Mat_SeqAIJ *dseq;
    Mat_SeqAIJ *oseq;
    md=aij->A;
    dseq = (Mat_SeqAIJ*)md->data;
    mo=aij->B;
    oseq = (Mat_SeqAIJ*)mo->data;
    md_i = dseq->i;
    md_j = dseq->j;
    mo_i = oseq->i;
    mo_j = oseq->j;
    rmd_i = dseq->i;
    rmd_j = dseq->j;
    rmo_i = oseq->i;
    rmo_j = oseq->j;
  } else if (isSEQAIJ) {
    Mat_SeqAIJ *dseq;
    /* no off-processor nodes */
    md=m;
    dseq = (Mat_SeqAIJ*)md->data;
    md_i = dseq->i;
    md_j = dseq->j;
    mo_i = NULL;
    mo_j = NULL;
    rmd_i = dseq->i;
    rmd_j = dseq->j;
    rmo_i = NULL;
    rmo_j = NULL;
  } else SETERRQ(PetscObjectComm((PetscObject)mc),PETSC_ERR_ARG_WRONG,"Matrix must be AIJ for greedy coloring");
  if (!gr->symmetric) {
    ierr = MatTranspose(m, MAT_INITIAL_MATRIX, &mt);CHKERRQ(ierr);
    if (isSEQAIJ) {
      Mat_SeqAIJ *dseq = (Mat_SeqAIJ*) mt->data;
      rmd_i = dseq->i;
      rmd_j = dseq->j;
      rmo_i = NULL;
      rmo_j = NULL;
    } else SETERRQ(PetscObjectComm((PetscObject) mc), PETSC_ERR_SUP, "Nonsymmetric greedy coloring only works in serial");
  }
  /* create the vectors and communication structures if necessary */
  no=0;
  if (mo) {
    ierr = VecGetLocalSize(aij->lvec,&no);CHKERRQ(ierr);
    ierr = PetscSFCreate(PetscObjectComm((PetscObject)m),&sf);CHKERRQ(ierr);
    ierr = MatGetLayouts(m,&layout,NULL);CHKERRQ(ierr);
    ierr = PetscSFSetGraphLayout(sf,layout,no,NULL,PETSC_COPY_VALUES,aij->garray);CHKERRQ(ierr);
  }
  ierr = MatColoringGetMaxColors(mc,&maxcolors);CHKERRQ(ierr);
  masksize=n;
  nbad=0;
  badsize=n;
  ierr = PetscMalloc1(masksize,&mask);CHKERRQ(ierr);
  ierr = PetscMalloc4(n,&d1cols,n,&dcolors,n,&conf,n,&bad);CHKERRQ(ierr);
  ierr = PetscMalloc2(badsize,&badidx,badsize,&badnext);CHKERRQ(ierr);
  for(i=0;i<masksize;i++) {
    mask[i]=-1;
  }
  for (i=0;i<n;i++) {
    dcolors[i]=maxcolors;
    bad[i]=-1;
  }
  for (i=0;i<badsize;i++) {
    badnext[i]=-1;
  }
  if (mo) {
    ierr = PetscMalloc3(no,&owts,no,&oconf,no,&ocolors);CHKERRQ(ierr);
    ierr = PetscSFBcastBegin(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr);
    ierr = PetscSFBcastEnd(sf,MPIU_REAL,wts,owts);CHKERRQ(ierr);
    for (i=0;i<no;i++) {
      ocolors[i]=maxcolors;
    }
  } else {                      /* Appease overzealous -Wmaybe-initialized */
    owts = NULL;
    oconf = NULL;
    ocolors = NULL;
  }
  mcol=0;
  while (nd_global < n_global) {
    nd=n;
    /* assign lowest possible color to each local vertex */
    mcol_global=0;
    ierr = PetscLogEventBegin(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr);
    for (i=0;i<n;i++) {
      idx=lperm[i];
      if (dcolors[idx] == maxcolors) {
        /* entries in bad */
        cbad=bad[idx];
        while (cbad>=0) {
          ccol=badidx[cbad];
          if (ccol>=masksize) {
            PetscInt *newmask;
            ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr);
            for(k=0;k<2*masksize;k++) {
              newmask[k]=-1;
            }
            for(k=0;k<masksize;k++) {
              newmask[k]=mask[k];
            }
            ierr = PetscFree(mask);CHKERRQ(ierr);
            mask=newmask;
            masksize*=2;
          }
          mask[ccol]=idx;
          cbad=badnext[cbad];
        }
        /* diagonal distance-one rows */
        nd1cols=0;
        ncols = rmd_i[idx+1]-rmd_i[idx];
        cidx = &(rmd_j[rmd_i[idx]]);
        for (j=0;j<ncols;j++) {
          d1cols[nd1cols] = cidx[j];
          nd1cols++;
          ccol=dcolors[cidx[j]];
          if (ccol != maxcolors) {
            if (ccol>=masksize) {
              PetscInt *newmask;
              ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr);
              for(k=0;k<2*masksize;k++) {
                newmask[k]=-1;
              }
              for(k=0;k<masksize;k++) {
                newmask[k]=mask[k];
              }
              ierr = PetscFree(mask);CHKERRQ(ierr);
              mask=newmask;
              masksize*=2;
            }
            mask[ccol]=idx;
          }
        }
        /* off-diagonal distance-one rows */
        if (mo) {
          ncols = rmo_i[idx+1]-rmo_i[idx];
          cidx = &(rmo_j[rmo_i[idx]]);
          for (j=0;j<ncols;j++) {
            ccol=ocolors[cidx[j]];
            if (ccol != maxcolors) {
              if (ccol>=masksize) {
                PetscInt *newmask;
                ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr);
                for(k=0;k<2*masksize;k++) {
                  newmask[k]=-1;
                }
                for(k=0;k<masksize;k++) {
                  newmask[k]=mask[k];
                }
                ierr = PetscFree(mask);CHKERRQ(ierr);
                mask=newmask;
                masksize*=2;
              }
              mask[ccol]=idx;
            }
          }
        }
        /* diagonal distance-two rows */
        for (j=0;j<nd1cols;j++) {
          ncols = md_i[d1cols[j]+1]-md_i[d1cols[j]];
          cidx = &(md_j[md_i[d1cols[j]]]);
          for (l=0;l<ncols;l++) {
            ccol=dcolors[cidx[l]];
            if (ccol != maxcolors) {
              if (ccol>=masksize) {
                PetscInt *newmask;
                ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr);
                for(k=0;k<2*masksize;k++) {
                  newmask[k]=-1;
                }
                for(k=0;k<masksize;k++) {
                  newmask[k]=mask[k];
                }
                ierr = PetscFree(mask);CHKERRQ(ierr);
                mask=newmask;
                masksize*=2;
              }
              mask[ccol]=idx;
            }
          }
        }
        /* off-diagonal distance-two rows */
        if (mo) {
          for (j=0;j<nd1cols;j++) {
            ncols = mo_i[d1cols[j]+1]-mo_i[d1cols[j]];
            cidx = &(mo_j[mo_i[d1cols[j]]]);
            for (l=0;l<ncols;l++) {
              ccol=ocolors[cidx[l]];
              if (ccol != maxcolors) {
                if (ccol>=masksize) {
                  PetscInt *newmask;
                  ierr = PetscMalloc1(masksize*2,&newmask);CHKERRQ(ierr);
                  for(k=0;k<2*masksize;k++) {
                    newmask[k]=-1;
                  }
                  for(k=0;k<masksize;k++) {
                    newmask[k]=mask[k];
                  }
                  ierr = PetscFree(mask);CHKERRQ(ierr);
                  mask=newmask;
                  masksize*=2;
                }
                mask[ccol]=idx;
              }
            }
          }
        }
        /* assign this one the lowest color possible by seeing if there's a gap in the sequence of sorted neighbor colors */
        for (j=0;j<masksize;j++) {
          if (mask[j]!=idx) {
            break;
          }
        }
        pcol=j;
        if (pcol>maxcolors) pcol=maxcolors;
        dcolors[idx]=pcol;
        if (pcol>mcol) mcol=pcol;
      }
    }
    ierr = PetscLogEventEnd(MATCOLORING_Local,mc,0,0,0);CHKERRQ(ierr);
    if (mo) {
      /* transfer neighbor colors */
      ierr = PetscSFBcastBegin(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr);
      ierr = PetscSFBcastEnd(sf,MPIU_INT,dcolors,ocolors);CHKERRQ(ierr);
      /* find the maximum color assigned locally and allocate a mask */
      ierr = MPIU_Allreduce(&mcol,&mcol_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr);
      ierr = PetscMalloc1(mcol_global+1,&colorweights);CHKERRQ(ierr);
      /* check for conflicts */
      for (i=0;i<n;i++) {
        conf[i]=PETSC_FALSE;
      }
      for (i=0;i<no;i++) {
        oconf[i]=PETSC_FALSE;
      }
      for (i=0;i<n;i++) {
        ncols = mo_i[i+1]-mo_i[i];
        cidx = &(mo_j[mo_i[i]]);
        if (ncols > 0) {
          /* fill in the mask */
          for (j=0;j<mcol_global+1;j++) {
            colorweights[j]=0;
          }
          colorweights[dcolors[i]]=wts[i];
          /* fill in the off-diagonal part of the mask */
          for (j=0;j<ncols;j++) {
            ccol=ocolors[cidx[j]];
            if (ccol < maxcolors) {
              if (colorweights[ccol] < owts[cidx[j]]) {
                colorweights[ccol] = owts[cidx[j]];
              }
            }
          }
          /* fill in the on-diagonal part of the mask */
          ncols = md_i[i+1]-md_i[i];
          cidx = &(md_j[md_i[i]]);
          for (j=0;j<ncols;j++) {
            ccol=dcolors[cidx[j]];
            if (ccol < maxcolors) {
              if (colorweights[ccol] < wts[cidx[j]]) {
                colorweights[ccol] = wts[cidx[j]];
              }
            }
          }
          /* go back through and set up on and off-diagonal conflict vectors */
          ncols = md_i[i+1]-md_i[i];
          cidx = &(md_j[md_i[i]]);
          for (j=0;j<ncols;j++) {
            ccol=dcolors[cidx[j]];
            if (ccol < maxcolors) {
              if (colorweights[ccol] > wts[cidx[j]]) {
                conf[cidx[j]]=PETSC_TRUE;
              }
            }
          }
          ncols = mo_i[i+1]-mo_i[i];
          cidx = &(mo_j[mo_i[i]]);
          for (j=0;j<ncols;j++) {
            ccol=ocolors[cidx[j]];
            if (ccol < maxcolors) {
              if (colorweights[ccol] > owts[cidx[j]]) {
                oconf[cidx[j]]=PETSC_TRUE;
              }
            }
          }
        }
      }
      nd_global=0;
      ierr = PetscFree(colorweights);CHKERRQ(ierr);
      ierr = PetscLogEventBegin(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr);
      ierr = PetscSFReduceBegin(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr);
      ierr = PetscSFReduceEnd(sf,MPIU_INT,oconf,conf,MPIU_SUM);CHKERRQ(ierr);
      ierr = PetscLogEventEnd(MATCOLORING_Comm,mc,0,0,0);CHKERRQ(ierr);
      /* go through and unset local colors that have conflicts */
      for (i=0;i<n;i++) {
        if (conf[i]>0) {
          /* push this color onto the bad stack */
          badidx[nbad]=dcolors[i];
          badnext[nbad]=bad[i];
          bad[i]=nbad;
          nbad++;
          if (nbad>=badsize) {
            PetscInt *newbadnext;
            ISColoringValue *newbadidx;
            ierr = PetscMalloc2(badsize*2,&newbadidx,badsize*2,&newbadnext);CHKERRQ(ierr);
            for(k=0;k<2*badsize;k++) {
              newbadnext[k]=-1;
            }
            for(k=0;k<badsize;k++) {
              newbadidx[k]=badidx[k];
              newbadnext[k]=badnext[k];
            }
            ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr);
            badidx=newbadidx;
            badnext=newbadnext;
            badsize*=2;
          }
          dcolors[i] = maxcolors;
          nd--;
        }
      }
    }
    ierr = MPIU_Allreduce(&nd,&nd_global,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mc));CHKERRQ(ierr);
  }
  if (mo) {
    ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
    ierr = PetscFree3(owts,oconf,ocolors);CHKERRQ(ierr);
  }
  for (i=0;i<n;i++) {
    colors[i]=dcolors[i];
  }
  ierr = PetscFree(mask);CHKERRQ(ierr);
  ierr = PetscFree4(d1cols,dcolors,conf,bad);CHKERRQ(ierr);
  ierr = PetscFree2(badidx,badnext);CHKERRQ(ierr);
  if (!gr->symmetric) {ierr = MatDestroy(&mt);CHKERRQ(ierr);}
  PetscFunctionReturn(0);
}
Exemple #26
0
static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C,PetscInt is_max,IS is[])
{
  Mat_MPISBAIJ  *c = (Mat_MPISBAIJ*)C->data;
  PetscErrorCode ierr;
  PetscMPIInt    size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len;
  const PetscInt *idx_i;
  PetscInt       idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i,
                 Mbs,i,j,k,*odata1,*odata2,
                 proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est;
  PetscInt       proc_end=0,*iwork,len_unused,nodata2;
  PetscInt       ois_max; /* max no of is[] in each of processor */
  char           *t_p;
  MPI_Comm       comm;
  MPI_Request    *s_waits1,*s_waits2,r_req;
  MPI_Status     *s_status,r_status;
  PetscBT        *table;  /* mark indices of this processor's is[] */
  PetscBT        table_i;
  PetscBT        otable; /* mark indices of other processors' is[] */ 
  PetscInt       bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners;  
  IS             garray_local,garray_gl;

  PetscFunctionBegin;
  comm = ((PetscObject)C)->comm;
  size = c->size;
  rank = c->rank;
  Mbs  = c->Mbs;

  ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);
  ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr);

  /* create tables used in
     step 1: table[i] - mark c->garray of proc [i]
     step 3: table[i] - mark indices of is[i] when whose=MINE     
             table[0] - mark incideces of is[] when whose=OTHER */
  len = PetscMax(is_max, size);CHKERRQ(ierr);
  ierr = PetscMalloc2(len,PetscBT,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,char,&t_p);CHKERRQ(ierr);
  for (i=0; i<len; i++) {
    table[i]  = t_p  + (Mbs/PETSC_BITS_PER_BYTE+1)*i; 
  }

  ierr = MPI_Allreduce(&is_max,&ois_max,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
  
  /* 1. Send this processor's is[] to other processors */
  /*---------------------------------------------------*/
  /* allocate spaces */
  ierr = PetscMalloc(is_max*sizeof(PetscInt),&n);CHKERRQ(ierr);
  len = 0;
  for (i=0; i<is_max; i++) {
    ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr);
    len += n[i]; 
  }
  if (!len) { 
    is_max = 0;
  } else {
    len += 1 + is_max; /* max length of data1 for one processor */
  }

 
  ierr = PetscMalloc((size*len+1)*sizeof(PetscInt),&data1);CHKERRQ(ierr);
  ierr = PetscMalloc(size*sizeof(PetscInt*),&data1_start);CHKERRQ(ierr);
  for (i=0; i<size; i++) data1_start[i] = data1 + i*len;

  ierr = PetscMalloc4(size,PetscInt,&len_s,size,PetscInt,&btable,size,PetscInt,&iwork,size+1,PetscInt,&Bowners);CHKERRQ(ierr);

  /* gather c->garray from all processors */
  ierr = ISCreateGeneral(comm,Bnbs,c->garray,&garray_local);CHKERRQ(ierr);
  ierr = ISAllGather(garray_local, &garray_gl);CHKERRQ(ierr);
  ierr = ISDestroy(garray_local);CHKERRQ(ierr);
  ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
  Bowners[0] = 0;
  for (i=0; i<size; i++) Bowners[i+1] += Bowners[i];
  
  if (is_max){ 
    /* hash table ctable which maps c->row to proc_id) */
    ierr = PetscMalloc(Mbs*sizeof(PetscInt),&ctable);CHKERRQ(ierr);
    for (proc_id=0,j=0; proc_id<size; proc_id++) {
      for (; j<C->rmap->range[proc_id+1]/bs; j++) {
        ctable[j] = proc_id;
      }
    }

    /* hash tables marking c->garray */
    ierr = ISGetIndices(garray_gl,&idx_i);
    for (i=0; i<size; i++){
      table_i = table[i]; 
      ierr    = PetscBTMemzero(Mbs,table_i);CHKERRQ(ierr);
      for (j = Bowners[i]; j<Bowners[i+1]; j++){ /* go through B cols of proc[i]*/
        ierr = PetscBTSet(table_i,idx_i[j]);CHKERRQ(ierr);
      }
    }
    ierr = ISRestoreIndices(garray_gl,&idx_i);CHKERRQ(ierr);
  }  /* if (is_max) */
  ierr = ISDestroy(garray_gl);CHKERRQ(ierr); 

  /* evaluate communication - mesg to who, length, and buffer space */
  for (i=0; i<size; i++) len_s[i] = 0;
  
  /* header of data1 */
  for (proc_id=0; proc_id<size; proc_id++){
    iwork[proc_id] = 0;
    *data1_start[proc_id] = is_max; 
    data1_start[proc_id]++;
    for (j=0; j<is_max; j++) { 
      if (proc_id == rank){
        *data1_start[proc_id] = n[j]; 
      } else {
        *data1_start[proc_id] = 0;  
      }
      data1_start[proc_id]++;
    }
  }
  
  for (i=0; i<is_max; i++) { 
    ierr = ISGetIndices(is[i],&idx_i);CHKERRQ(ierr); 
    for (j=0; j<n[i]; j++){
      idx = idx_i[j];
      *data1_start[rank] = idx; data1_start[rank]++; /* for local proccessing */
      proc_end = ctable[idx];
      for (proc_id=0;  proc_id<=proc_end; proc_id++){ /* for others to process */
        if (proc_id == rank ) continue; /* done before this loop */
        if (proc_id < proc_end && !PetscBTLookup(table[proc_id],idx)) 
          continue;   /* no need for sending idx to [proc_id] */
        *data1_start[proc_id] = idx; data1_start[proc_id]++;
        len_s[proc_id]++;
      }
    } 
    /* update header data */
    for (proc_id=0; proc_id<size; proc_id++){ 
      if (proc_id== rank) continue;
      *(data1 + proc_id*len + 1 + i) = len_s[proc_id] - iwork[proc_id];
      iwork[proc_id] = len_s[proc_id] ;
    } 
    ierr = ISRestoreIndices(is[i],&idx_i);CHKERRQ(ierr);
  } 

  nrqs = 0; nrqr = 0;
  for (i=0; i<size; i++){
    data1_start[i] = data1 + i*len;
    if (len_s[i]){
      nrqs++;
      len_s[i] += 1 + is_max; /* add no. of header msg */
    }
  }

  for (i=0; i<is_max; i++) { 
    ierr = ISDestroy(is[i]);CHKERRQ(ierr); 
  }
  ierr = PetscFree(n);CHKERRQ(ierr);
  ierr = PetscFree(ctable);CHKERRQ(ierr);

  /* Determine the number of messages to expect, their lengths, from from-ids */
  ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&nrqr);CHKERRQ(ierr);
  ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,len_s,&id_r1,&len_r1);CHKERRQ(ierr); 
  
  /*  Now  post the sends */
  ierr = PetscMalloc2(size,MPI_Request,&s_waits1,size,MPI_Request,&s_waits2);CHKERRQ(ierr);
  k = 0;
  for (proc_id=0; proc_id<size; proc_id++){  /* send data1 to processor [proc_id] */
    if (len_s[proc_id]){
      ierr = MPI_Isend(data1_start[proc_id],len_s[proc_id],MPIU_INT,proc_id,tag1,comm,s_waits1+k);CHKERRQ(ierr);
      k++;
    }
  }

  /* 2. Receive other's is[] and process. Then send back */
  /*-----------------------------------------------------*/
  len = 0;
  for (i=0; i<nrqr; i++){
    if (len_r1[i] > len)len = len_r1[i];
  }
  ierr = PetscFree(len_r1);CHKERRQ(ierr);
  ierr = PetscFree(id_r1);CHKERRQ(ierr);

  for (proc_id=0; proc_id<size; proc_id++)
    len_s[proc_id] = iwork[proc_id] = 0;
  
  ierr = PetscMalloc((len+1)*sizeof(PetscInt),&odata1);CHKERRQ(ierr);
  ierr = PetscMalloc(size*sizeof(PetscInt**),&odata2_ptr);CHKERRQ(ierr); 
  ierr = PetscBTCreate(Mbs,otable);CHKERRQ(ierr);

  len_max = ois_max*(Mbs+1);  /* max space storing all is[] for each receive */
  len_est = 2*len_max; /* estimated space of storing is[] for all receiving messages */
  ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr);
  nodata2 = 0;       /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */
  odata2_ptr[nodata2] = odata2;
  len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max  */
  
  k = 0;
  while (k < nrqr){
    /* Receive messages */
    ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);CHKERRQ(ierr);
    if (flag){ 
      ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); 
      proc_id = r_status.MPI_SOURCE;
      ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr);
      ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr);

      /*  Process messages */
      /*  make sure there is enough unused space in odata2 array */
      if (len_unused < len_max){ /* allocate more space for odata2 */
        ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr);
        odata2_ptr[++nodata2] = odata2;
        len_unused = len_est;
      }

      ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable);CHKERRQ(ierr);
      len = 1 + odata2[0];
      for (i=0; i<odata2[0]; i++){
        len += odata2[1 + i];
      }

      /* Send messages back */
      ierr = MPI_Isend(odata2,len,MPIU_INT,proc_id,tag2,comm,s_waits2+k);CHKERRQ(ierr);
      k++;
      odata2     += len;
      len_unused -= len;
      len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */
    } 
  } 
  ierr = PetscFree(odata1);CHKERRQ(ierr); 
  ierr = PetscBTDestroy(otable);CHKERRQ(ierr); 

  /* 3. Do local work on this processor's is[] */
  /*-------------------------------------------*/
  /* make sure there is enough unused space in odata2(=data) array */
  len_max = is_max*(Mbs+1); /* max space storing all is[] for this processor */
  if (len_unused < len_max){ /* allocate more space for odata2 */
    ierr = PetscMalloc((len_est+1)*sizeof(PetscInt),&odata2);CHKERRQ(ierr);
    odata2_ptr[++nodata2] = odata2;
    len_unused = len_est;
  }

  data = odata2;
  ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,data1_start[rank],MINE,data,table);CHKERRQ(ierr);
  ierr = PetscFree(data1_start);CHKERRQ(ierr);

  /* 4. Receive work done on other processors, then merge */
  /*------------------------------------------------------*/
  /* get max number of messages that this processor expects to recv */
  ierr = MPI_Allreduce(len_s,iwork,size,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
  ierr = PetscMalloc((iwork[rank]+1)*sizeof(PetscInt),&data2);CHKERRQ(ierr);
  ierr = PetscFree4(len_s,btable,iwork,Bowners);CHKERRQ(ierr);

  k = 0;
  while (k < nrqs){
    /* Receive messages */
    ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status);
    if (flag){
      ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr);
      proc_id = r_status.MPI_SOURCE;
      ierr = MPI_Irecv(data2,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr);
      ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr);
      if (len > 1+is_max){ /* Add data2 into data */
        data2_i = data2 + 1 + is_max;
        for (i=0; i<is_max; i++){
          table_i = table[i];
          data_i  = data + 1 + is_max + Mbs*i;
          isz     = data[1+i]; 
          for (j=0; j<data2[1+i]; j++){
            col = data2_i[j];
            if (!PetscBTLookupSet(table_i,col)) {data_i[isz++] = col;}
          }
          data[1+i] = isz;
          if (i < is_max - 1) data2_i += data2[1+i]; 
        } 
      } 
      k++;
    } 
  } 
  ierr = PetscFree(data2);CHKERRQ(ierr);
  ierr = PetscFree2(table,t_p);CHKERRQ(ierr);

  /* phase 1 sends are complete */
  ierr = PetscMalloc(size*sizeof(MPI_Status),&s_status);CHKERRQ(ierr);
  if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);}
  ierr = PetscFree(data1);CHKERRQ(ierr); 
       
  /* phase 2 sends are complete */
  if (nrqr){ierr = MPI_Waitall(nrqr,s_waits2,s_status);CHKERRQ(ierr);}
  ierr = PetscFree2(s_waits1,s_waits2);CHKERRQ(ierr);
  ierr = PetscFree(s_status);CHKERRQ(ierr); 

  /* 5. Create new is[] */
  /*--------------------*/ 
  for (i=0; i<is_max; i++) {
    data_i = data + 1 + is_max + Mbs*i;
    ierr = ISCreateGeneral(PETSC_COMM_SELF,data[1+i],data_i,is+i);CHKERRQ(ierr);
  }
  for (k=0; k<=nodata2; k++){
    ierr = PetscFree(odata2_ptr[k]);CHKERRQ(ierr); 
  }
  ierr = PetscFree(odata2_ptr);CHKERRQ(ierr);

  PetscFunctionReturn(0);
}
Exemple #27
0
/*
  PEPBuildDiagonalScaling - compute two diagonal matrices to be applied for balancing 
  in polynomial eigenproblems.
*/
PetscErrorCode PEPBuildDiagonalScaling(PEP pep)
{
  PetscErrorCode ierr;
  PetscInt       it,i,j,k,nmat,nr,e,nz,lst,lend,nc=0,*cols,emax,emin,emaxl,eminl;
  const PetscInt *cidx,*ridx;
  Mat            M,*T,A;
  PetscMPIInt    n;
  PetscBool      cont=PETSC_TRUE,flg=PETSC_FALSE;
  PetscScalar    *array,*Dr,*Dl,t;
  PetscReal      l2,d,*rsum,*aux,*csum,w=1.0;
  MatStructure   str;
  MatInfo        info;

  PetscFunctionBegin;
  l2 = 2*PetscLogReal(2.0);
  nmat = pep->nmat;
  ierr = PetscMPIIntCast(pep->n,&n);
  ierr = STGetMatStructure(pep->st,&str);CHKERRQ(ierr);
  ierr = PetscMalloc1(nmat,&T);CHKERRQ(ierr);
  for (k=0;k<nmat;k++) {
    ierr = STGetTOperators(pep->st,k,&T[k]);CHKERRQ(ierr);
  }
  /* Form local auxiliar matrix M */
  ierr = PetscObjectTypeCompareAny((PetscObject)T[0],&cont,MATMPIAIJ,MATSEQAIJ);CHKERRQ(ierr);
  if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]),PETSC_ERR_SUP,"Only for MPIAIJ or SEQAIJ matrix types");
  ierr = PetscObjectTypeCompare((PetscObject)T[0],MATMPIAIJ,&cont);CHKERRQ(ierr);
  if (cont) {
    ierr = MatMPIAIJGetLocalMat(T[0],MAT_INITIAL_MATRIX,&M);CHKERRQ(ierr);
    flg = PETSC_TRUE; 
  } else {
    ierr = MatDuplicate(T[0],MAT_COPY_VALUES,&M);CHKERRQ(ierr);
  }
  ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr);
  nz = info.nz_used;
  ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr);
  for (i=0;i<nz;i++) {
    t = PetscAbsScalar(array[i]);
    array[i] = t*t;
  }
  ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr);
  for (k=1;k<nmat;k++) {
    if (flg) {
      ierr = MatMPIAIJGetLocalMat(T[k],MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
    } else {
      if (str==SAME_NONZERO_PATTERN) {
        ierr = MatCopy(T[k],A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
      } else {
        ierr = MatDuplicate(T[k],MAT_COPY_VALUES,&A);CHKERRQ(ierr);
      }
    }
    ierr = MatGetInfo(A,MAT_LOCAL,&info);CHKERRQ(ierr);
    nz = info.nz_used;
    ierr = MatSeqAIJGetArray(A,&array);CHKERRQ(ierr);
    for (i=0;i<nz;i++) {
      t = PetscAbsScalar(array[i]);
      array[i] = t*t;
    }
    ierr = MatSeqAIJRestoreArray(A,&array);CHKERRQ(ierr);
    w *= pep->slambda*pep->slambda*pep->sfactor;
    ierr = MatAXPY(M,w,A,str);CHKERRQ(ierr);
    if (flg || str!=SAME_NONZERO_PATTERN || k==nmat-2) {
      ierr = MatDestroy(&A);CHKERRQ(ierr);
    } 
  }
  ierr = MatGetRowIJ(M,0,PETSC_FALSE,PETSC_FALSE,&nr,&ridx,&cidx,&cont);CHKERRQ(ierr);
  if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]), PETSC_ERR_SUP,"It is not possible to compute scaling diagonals for these PEP matrices");
  ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr);
  nz = info.nz_used;
  ierr = VecGetOwnershipRange(pep->Dl,&lst,&lend);CHKERRQ(ierr);
  ierr = PetscMalloc4(nr,&rsum,pep->n,&csum,pep->n,&aux,PetscMin(pep->n-lend+lst,nz),&cols);CHKERRQ(ierr);
  ierr = VecSet(pep->Dr,1.0);CHKERRQ(ierr);
  ierr = VecSet(pep->Dl,1.0);CHKERRQ(ierr);
  ierr = VecGetArray(pep->Dl,&Dl);CHKERRQ(ierr);
  ierr = VecGetArray(pep->Dr,&Dr);CHKERRQ(ierr);
  ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr);
  ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr);
  for (j=0;j<nz;j++) {
    /* Search non-zero columns outsize lst-lend */
    if (aux[cidx[j]]==0 && (cidx[j]<lst || lend<=cidx[j])) cols[nc++] = cidx[j];
    /* Local column sums */
    aux[cidx[j]] += PetscAbsScalar(array[j]);
  }
  for (it=0;it<pep->sits && cont;it++) {
    emaxl = 0; eminl = 0;
    /* Column sum  */    
    if (it>0) { /* it=0 has been already done*/
      ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr);
      ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr);
      for (j=0;j<nz;j++) aux[cidx[j]] += PetscAbsScalar(array[j]);
      ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); 
    }
    ierr = MPI_Allreduce(aux,csum,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)pep->Dr));
    /* Update Dr */
    for (j=lst;j<lend;j++) {
      d = PetscLogReal(csum[j])/l2;
      e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5));
      d = PetscPowReal(2.0,e);
      Dr[j-lst] *= d;
      aux[j] = d*d;
      emaxl = PetscMax(emaxl,e);
      eminl = PetscMin(eminl,e);
    }
    for (j=0;j<nc;j++) {
      d = PetscLogReal(csum[cols[j]])/l2;
      e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5));
      d = PetscPowReal(2.0,e);
      aux[cols[j]] = d*d;
      emaxl = PetscMax(emaxl,e);
      eminl = PetscMin(eminl,e);
    }
    /* Scale M */
    ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr);
    for (j=0;j<nz;j++) {
      array[j] *= aux[cidx[j]];
    }
    ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr);
    /* Row sum */    
    ierr = PetscMemzero(rsum,nr*sizeof(PetscReal));CHKERRQ(ierr);
    ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr);
    for (i=0;i<nr;i++) {
      for (j=ridx[i];j<ridx[i+1];j++) rsum[i] += PetscAbsScalar(array[j]);
      /* Update Dl */
      d = PetscLogReal(rsum[i])/l2;
      e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5));
      d = PetscPowReal(2.0,e);
      Dl[i] *= d;
      /* Scale M */
      for (j=ridx[i];j<ridx[i+1];j++) array[j] *= d*d;
      emaxl = PetscMax(emaxl,e);
      eminl = PetscMin(eminl,e);      
    }
    ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr);  
    /* Compute global max and min */
    ierr = MPI_Allreduce(&emaxl,&emax,1,MPIU_INT,MPIU_MAX,PetscObjectComm((PetscObject)pep->Dl));
    ierr = MPI_Allreduce(&eminl,&emin,1,MPIU_INT,MPIU_MIN,PetscObjectComm((PetscObject)pep->Dl));
    if (emax<=emin+2) cont = PETSC_FALSE;
  }
  ierr = VecRestoreArray(pep->Dr,&Dr);CHKERRQ(ierr);
  ierr = VecRestoreArray(pep->Dl,&Dl);CHKERRQ(ierr);
  /* Free memory*/
  ierr = MatDestroy(&M);CHKERRQ(ierr);
  ierr = PetscFree4(rsum,csum,aux,cols);CHKERRQ(ierr);
  ierr = PetscFree(T);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #28
0
PetscErrorCode EPSSolve_Lanczos(EPS eps)
{
  EPS_LANCZOS    *lanczos = (EPS_LANCZOS*)eps->data;
  PetscErrorCode ierr;
  PetscInt       nconv,i,j,k,l,x,n,*perm,restart,ncv=eps->ncv,r,ld;
  Vec            vi,vj,w;
  Mat            U;
  PetscScalar    *Y,*ritz,stmp;
  PetscReal      *d,*e,*bnd,anorm,beta,norm,rtmp,resnorm;
  PetscBool      breakdown;
  char           *conv,ctmp;

  PetscFunctionBegin;
  ierr = DSGetLeadingDimension(eps->ds,&ld);CHKERRQ(ierr);
  ierr = PetscMalloc4(ncv,&ritz,ncv,&bnd,ncv,&perm,ncv,&conv);CHKERRQ(ierr);

  /* The first Lanczos vector is the normalized initial vector */
  ierr = EPSGetStartVector(eps,0,NULL);CHKERRQ(ierr);

  anorm = -1.0;
  nconv = 0;

  /* Restart loop */
  while (eps->reason == EPS_CONVERGED_ITERATING) {
    eps->its++;

    /* Compute an ncv-step Lanczos factorization */
    n = PetscMin(nconv+eps->mpd,ncv);
    ierr = DSGetArrayReal(eps->ds,DS_MAT_T,&d);CHKERRQ(ierr);
    e = d + ld;
    ierr = EPSBasicLanczos(eps,d,e,nconv,&n,&breakdown,anorm);CHKERRQ(ierr);
    beta = e[n-1];
    ierr = DSRestoreArrayReal(eps->ds,DS_MAT_T,&d);CHKERRQ(ierr);
    ierr = DSSetDimensions(eps->ds,n,0,nconv,0);CHKERRQ(ierr);
    ierr = DSSetState(eps->ds,DS_STATE_INTERMEDIATE);CHKERRQ(ierr);
    ierr = BVSetActiveColumns(eps->V,nconv,n);CHKERRQ(ierr);

    /* Solve projected problem */
    ierr = DSSolve(eps->ds,ritz,NULL);CHKERRQ(ierr);
    ierr = DSSort(eps->ds,ritz,NULL,NULL,NULL,NULL);CHKERRQ(ierr);

    /* Estimate ||A|| */
    for (i=nconv;i<n;i++)
      anorm = PetscMax(anorm,PetscAbsReal(PetscRealPart(ritz[i])));

    /* Compute residual norm estimates as beta*abs(Y(m,:)) + eps*||A|| */
    ierr = DSGetArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr);
    for (i=nconv;i<n;i++) {
      resnorm = beta*PetscAbsScalar(Y[n-1+i*ld]) + PETSC_MACHINE_EPSILON*anorm;
      ierr = (*eps->converged)(eps,ritz[i],eps->eigi[i],resnorm,&bnd[i],eps->convergedctx);CHKERRQ(ierr);
      if (bnd[i]<eps->tol) conv[i] = 'C';
      else conv[i] = 'N';
    }
    ierr = DSRestoreArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr);

    /* purge repeated ritz values */
    if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_LOCAL) {
      for (i=nconv+1;i<n;i++) {
        if (conv[i] == 'C' && PetscAbsScalar((ritz[i]-ritz[i-1])/ritz[i]) < eps->tol) conv[i] = 'R';
      }
    }

    /* Compute restart vector */
    if (breakdown) {
      ierr = PetscInfo2(eps,"Breakdown in Lanczos method (it=%D norm=%g)\n",eps->its,(double)beta);CHKERRQ(ierr);
    } else {
      restart = nconv;
      while (restart<n && conv[restart] != 'N') restart++;
      if (restart >= n) {
        breakdown = PETSC_TRUE;
      } else {
        for (i=restart+1;i<n;i++) {
          if (conv[i] == 'N') {
            ierr = SlepcSCCompare(eps->sc,ritz[restart],0.0,ritz[i],0.0,&r);CHKERRQ(ierr);
            if (r>0) restart = i;
          }
        }
        ierr = DSGetArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr);
        ierr = BVMultColumn(eps->V,1.0,0.0,n,Y+restart*ld+nconv);CHKERRQ(ierr);
        ierr = DSRestoreArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr);
      }
    }

    /* Count and put converged eigenvalues first */
    for (i=nconv;i<n;i++) perm[i] = i;
    for (k=nconv;k<n;k++) {
      if (conv[perm[k]] != 'C') {
        j = k + 1;
        while (j<n && conv[perm[j]] != 'C') j++;
        if (j>=n) break;
        l = perm[k]; perm[k] = perm[j]; perm[j] = l;
      }
    }

    /* Sort eigenvectors according to permutation */
    ierr = DSGetArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr);
    for (i=nconv;i<k;i++) {
      x = perm[i];
      if (x != i) {
        j = i + 1;
        while (perm[j] != i) j++;
        /* swap eigenvalues i and j */
        stmp = ritz[x]; ritz[x] = ritz[i]; ritz[i] = stmp;
        rtmp = bnd[x]; bnd[x] = bnd[i]; bnd[i] = rtmp;
        ctmp = conv[x]; conv[x] = conv[i]; conv[i] = ctmp;
        perm[j] = x; perm[i] = i;
        /* swap eigenvectors i and j */
        for (l=0;l<n;l++) {
          stmp = Y[l+x*ld]; Y[l+x*ld] = Y[l+i*ld]; Y[l+i*ld] = stmp;
        }
      }
    }
    ierr = DSRestoreArray(eps->ds,DS_MAT_Q,&Y);CHKERRQ(ierr);

    /* compute converged eigenvectors */
    ierr = DSGetMat(eps->ds,DS_MAT_Q,&U);CHKERRQ(ierr);
    ierr = BVMultInPlace(eps->V,U,nconv,k);CHKERRQ(ierr);
    ierr = MatDestroy(&U);CHKERRQ(ierr);

    /* purge spurious ritz values */
    if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_LOCAL) {
      for (i=nconv;i<k;i++) {
        ierr = BVGetColumn(eps->V,i,&vi);CHKERRQ(ierr);
        ierr = VecNorm(vi,NORM_2,&norm);CHKERRQ(ierr);
        ierr = VecScale(vi,1.0/norm);CHKERRQ(ierr);
        w = eps->work[0];
        ierr = STApply(eps->st,vi,w);CHKERRQ(ierr);
        ierr = VecAXPY(w,-ritz[i],vi);CHKERRQ(ierr);
        ierr = BVRestoreColumn(eps->V,i,&vi);CHKERRQ(ierr);
        ierr = VecNorm(w,NORM_2,&norm);CHKERRQ(ierr);
        ierr = (*eps->converged)(eps,ritz[i],eps->eigi[i],norm,&bnd[i],eps->convergedctx);CHKERRQ(ierr);
        if (bnd[i]>=eps->tol) conv[i] = 'S';
      }
      for (i=nconv;i<k;i++) {
        if (conv[i] != 'C') {
          j = i + 1;
          while (j<k && conv[j] != 'C') j++;
          if (j>=k) break;
          /* swap eigenvalues i and j */
          stmp = ritz[j]; ritz[j] = ritz[i]; ritz[i] = stmp;
          rtmp = bnd[j]; bnd[j] = bnd[i]; bnd[i] = rtmp;
          ctmp = conv[j]; conv[j] = conv[i]; conv[i] = ctmp;
          /* swap eigenvectors i and j */
          ierr = BVGetColumn(eps->V,i,&vi);CHKERRQ(ierr);
          ierr = BVGetColumn(eps->V,j,&vj);CHKERRQ(ierr);
          ierr = VecSwap(vi,vj);CHKERRQ(ierr);
          ierr = BVRestoreColumn(eps->V,i,&vi);CHKERRQ(ierr);
          ierr = BVRestoreColumn(eps->V,j,&vj);CHKERRQ(ierr);
        }
      }
      k = i;
    }

    /* store ritz values and estimated errors */
    for (i=nconv;i<n;i++) {
      eps->eigr[i] = ritz[i];
      eps->errest[i] = bnd[i];
    }
    ierr = EPSMonitor(eps,eps->its,nconv,eps->eigr,eps->eigi,eps->errest,n);CHKERRQ(ierr);
    nconv = k;
    if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
    if (nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL;

    if (eps->reason == EPS_CONVERGED_ITERATING) { /* copy restart vector */
      ierr = BVCopyColumn(eps->V,n,nconv);CHKERRQ(ierr);
      if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_LOCAL && !breakdown) {
        /* Reorthonormalize restart vector */
        ierr = BVOrthogonalizeColumn(eps->V,nconv,NULL,&norm,&breakdown);CHKERRQ(ierr);
        ierr = BVScaleColumn(eps->V,nconv,1.0/norm);CHKERRQ(ierr);
      }
      if (breakdown) {
        /* Use random vector for restarting */
        ierr = PetscInfo(eps,"Using random vector for restart\n");CHKERRQ(ierr);
        ierr = EPSGetStartVector(eps,nconv,&breakdown);CHKERRQ(ierr);
      }
      if (breakdown) { /* give up */
        eps->reason = EPS_DIVERGED_BREAKDOWN;
        ierr = PetscInfo(eps,"Unable to generate more start vectors\n");CHKERRQ(ierr);
      }
    }
  }
  eps->nconv = nconv;

  ierr = PetscFree4(ritz,bnd,perm,conv);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #29
0
int main(int argc, char *args[])
{
  PFLOTRANMesh    data;
  Mat             Adj;       /* The adjacency matrix of the mesh */
  PetscInt        bs = 3;
  PetscScalar     values[9],*cc;
  PetscMPIInt     size;
  PetscInt        i;
  PetscErrorCode  ierr;
  PetscViewer     binaryviewer;
  Vec             cellCenters;
  PetscViewer    hdf5viewer;
  hid_t          file_id, dataset_id, dataspace_id;
  herr_t         status;
  
  PetscFunctionBegin;
  ierr = PetscInitialize(&argc, &args, (char *) 0, help);
  ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
  if (size > 1) SETERRQ(PETSC_ERR_SUP,"This preprocessor runs only on one process");

  /* Open Glenn's file */
  ierr = PetscViewerCreate(PETSC_COMM_SELF, &hdf5viewer);CHKERRQ(ierr);
  ierr = PetscViewerSetType(hdf5viewer, PETSC_VIEWER_HDF5);CHKERRQ(ierr);
  ierr = PetscViewerFileSetMode(hdf5viewer, FILE_MODE_READ);CHKERRQ(ierr);
  ierr = PetscViewerFileSetName(hdf5viewer, "mesh.h5");CHKERRQ(ierr);
  ierr = PetscViewerHDF5GetFileId(hdf5viewer, &file_id);CHKERRQ(ierr);

  /* get number of cells and then number of edges */
  dataset_id = H5Dopen(file_id, "/Cells/Natural IDs");
  dataspace_id = H5Dget_space(dataset_id);
  status = H5Sget_simple_extent_dims(dataspace_id, &data.numCells, NULL);if (status < 0) SETERRQ(PETSC_ERR_LIB,"Bad dimension");
  status = H5Sclose(dataspace_id);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Connections/Areas");
  dataspace_id = H5Dget_space(dataset_id);
  status = H5Sget_simple_extent_dims(dataspace_id, &data.numFaces, NULL);if (status < 0) SETERRQ(PETSC_ERR_LIB,"Bad dimension");
  status = H5Sclose(dataspace_id);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  ierr = PetscPrintf(PETSC_COMM_SELF, "Number of cells %D Number of faces %D \n",(PetscInt)data.numCells,(PetscInt)data.numFaces);CHKERRQ(ierr);

  /* read face data */
  ierr = PetscMalloc5(data.numFaces,double,&data.faceAreas,data.numFaces,int,&data.downCells,data.numFaces,double,&data.downX,data.numFaces,double,&data.downY,data.numFaces,double,&data.downZ);CHKERRQ(ierr);
  dataset_id = H5Dopen(file_id, "/Connections/Areas");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.faceAreas);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Connections/Downwind Cell IDs");
  status = H5Dread(dataset_id, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downCells);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Connections/Downwind Distance X");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downX);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Connections/Downwind Distance Y");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downY);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Connections/Downwind Distance Z");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.downZ);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  ierr = PetscMalloc4(data.numFaces,int,&data.upCells,data.numFaces,double,&data.upX,data.numFaces,double,&data.upY,data.numFaces,double,&data.upZ);CHKERRQ(ierr);
  dataset_id = H5Dopen(file_id, "/Connections/Upwind Cell IDs");
  status = H5Dread(dataset_id, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upCells);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Connections/Upwind Distance X");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upX);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Connections/Upwind Distance Y");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upY);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Connections/Upwind Distance Z");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.upZ);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);


  // Put face data into matrix 
  ierr = MatCreate(PETSC_COMM_WORLD, &Adj);CHKERRQ(ierr);
  ierr = MatSetSizes(Adj, data.numCells*bs, data.numCells*bs, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr);
  ierr = MatSetFromOptions(Adj);CHKERRQ(ierr);
  ierr = MatSetType(Adj,MATSEQBAIJ);CHKERRQ(ierr);
  ierr = MatSeqBAIJSetPreallocation(Adj, bs, 6,PETSC_NULL);CHKERRQ(ierr);
  //ierr = MatSetType(Adj,MATSEQAIJ);CHKERRQ(ierr);
  //ierr = MatSeqAIJSetPreallocation(Adj, 6,PETSC_NULL);CHKERRQ(ierr);
  for(i = 0; i < data.numFaces; ++i) {
    values[0] = data.faceAreas[i];
    values[1] = data.downCells[i];
    values[2] = data.downX[i];
    values[3] = data.downY[i];
    values[4] = data.downZ[i];
    values[5] = data.upCells[i];
    values[6] = data.upX[i];
    values[7] = data.upY[i];
    values[8] = data.upZ[i];
    ierr = MatSetValuesBlocked(Adj, 1, &data.downCells[i], 1, &data.upCells[i], values, INSERT_VALUES);CHKERRQ(ierr);
    ierr = MatSetValuesBlocked(Adj, 1, &data.upCells[i], 1, &data.downCells[i], values, INSERT_VALUES);CHKERRQ(ierr);
    //ierr = MatSetValues(Adj, 1, &data.downCells[i], 1, &data.upCells[i], values, INSERT_VALUES);CHKERRQ(ierr);
    //ierr = MatSetValues(Adj, 1, &data.upCells[i], 1, &data.downCells[i], values, INSERT_VALUES);CHKERRQ(ierr);
  }
  ierr = MatAssemblyBegin(Adj, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(Adj, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = PetscFree5(data.faceAreas, data.downCells, data.downX, data.downY, data.downZ);CHKERRQ(ierr);
  ierr = PetscFree4(data.upCells, data.upX, data.upY, data.upZ);CHKERRQ(ierr);

  ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,"mesh.petsc", FILE_MODE_WRITE,&binaryviewer);CHKERRQ(ierr);
  ierr = MatView(Adj, binaryviewer);CHKERRQ(ierr);
  ierr = MatDestroy(Adj);CHKERRQ(ierr);

  /* read cell information */
  ierr = PetscMalloc5(data.numCells,int,&data.cellIds,data.numCells,double,&data.cellVols,data.numCells,double,&data.cellX,data.numCells,double,&data.cellY,data.numCells,double,&data.cellZ);CHKERRQ(ierr);
  dataset_id = H5Dopen(file_id, "/Cells/Natural IDs");
  status = H5Dread(dataset_id, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellIds);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Cells/Volumes");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellVols);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Cells/X-Coordinates");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellX);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Cells/Y-Coordinates");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellY);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  dataset_id = H5Dopen(file_id, "/Cells/Z-Coordinates");
  status = H5Dread(dataset_id, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data.cellZ);CHKERRQ(status);
  status = H5Dclose(dataset_id);CHKERRQ(status);
  ierr = PetscViewerDestroy(hdf5viewer);CHKERRQ(ierr);

  /* put cell information into vectors */
  ierr = VecCreateSeq(PETSC_COMM_SELF,3*data.numCells,&cellCenters);CHKERRQ(ierr);
  ierr = VecSetBlockSize(cellCenters,3);CHKERRQ(ierr);
  ierr = VecGetArray(cellCenters,&cc);CHKERRQ(ierr);
  for (i=0; i<data.numCells; i++) {
    cc[3*i]   = data.cellX[i];
    cc[3*i+1] = data.cellY[i];
    cc[3*i+2] = data.cellZ[i];
  }
  ierr = VecRestoreArray(cellCenters,&cc);CHKERRQ(ierr);
  ierr = VecView(cellCenters,binaryviewer);CHKERRQ(ierr);

  ierr = VecGetArray(cellCenters,&cc);CHKERRQ(ierr);
  for (i=0; i<data.numCells; i++) {
    cc[3*i]   = data.cellIds[i];
    cc[3*i+1] = data.cellVols[i];
    cc[3*i+2] = 0.0;
  }
  ierr = VecRestoreArray(cellCenters,&cc);CHKERRQ(ierr);
  ierr = VecView(cellCenters,binaryviewer);CHKERRQ(ierr);
  ierr = PetscFree5(data.cellIds, data.cellVols, data.cellX, data.cellY, data.cellZ);CHKERRQ(ierr);
  ierr = VecDestroy(cellCenters);
  ierr = PetscViewerDestroy(binaryviewer);CHKERRQ(ierr);

  ierr = PetscFinalize();CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #30
0
PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
{ 
  Mat_MPIDense   *c = (Mat_MPIDense*)C->data;
  Mat            A = c->A;
  Mat_SeqDense   *a = (Mat_SeqDense*)A->data,*mat;
  PetscErrorCode ierr;
  PetscMPIInt    rank,size,tag0,tag1,idex,end,i;
  PetscInt       N = C->cmap->N,rstart = C->rmap->rstart,count;
  const PetscInt **irow,**icol,*irow_i;
  PetscInt       *nrow,*ncol,*w1,*w3,*w4,*rtable,start;
  PetscInt       **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc;
  PetscInt       nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr;
  PetscInt       is_no,jmax,**rmap,*rmap_i;
  PetscInt       ctr_j,*sbuf1_j,*rbuf1_i;
  MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2;
  MPI_Status     *r_status1,*r_status2,*s_status1,*s_status2;
  MPI_Comm       comm;
  PetscScalar    **rbuf2,**sbuf2;
  PetscBool      sorted;

  PetscFunctionBegin;
  comm   = ((PetscObject)C)->comm;
  tag0   = ((PetscObject)C)->tag;
  size   = c->size;
  rank   = c->rank;
  m      = C->rmap->N;
  
  /* Get some new tags to keep the communication clean */
  ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);

    /* Check if the col indices are sorted */
  for (i=0; i<ismax; i++) {
    ierr = ISSorted(isrow[i],&sorted);CHKERRQ(ierr);
    if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"ISrow is not sorted");
    ierr = ISSorted(iscol[i],&sorted);CHKERRQ(ierr);
    if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IScol is not sorted");
  }

  ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,m,PetscInt,&rtable);CHKERRQ(ierr);
  for (i=0; i<ismax; i++) { 
    ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr);
    ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr);
    ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr);
    ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr);
  }

  /* Create hash table for the mapping :row -> proc*/
  for (i=0,j=0; i<size; i++) {
    jmax = C->rmap->range[i+1];
    for (; j<jmax; j++) {
      rtable[j] = i;
    }
  }

  /* evaluate communication - mesg to who,length of mesg, and buffer space
     required. Based on this, buffers are allocated, and data copied into them*/
  ierr   = PetscMalloc3(2*size,PetscInt,&w1,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr);
  ierr = PetscMemzero(w1,size*2*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/
  ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/
  for (i=0; i<ismax; i++) { 
    ierr   = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialize work vector*/
    jmax   = nrow[i];
    irow_i = irow[i];
    for (j=0; j<jmax; j++) {
      row  = irow_i[j];
      proc = rtable[row];
      w4[proc]++;
    }
    for (j=0; j<size; j++) { 
      if (w4[j]) { w1[2*j] += w4[j];  w3[j]++;} 
    }
  }
  
  nrqs       = 0;              /* no of outgoing messages */
  msz        = 0;              /* total mesg length (for all procs) */
  w1[2*rank] = 0;              /* no mesg sent to self */
  w3[rank]   = 0;
  for (i=0; i<size; i++) {
    if (w1[2*i])  { w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
  }
  ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/
  for (i=0,j=0; i<size; i++) {
    if (w1[2*i]) { pa[j] = i; j++; }
  } 

  /* Each message would have a header = 1 + 2*(no of IS) + data */
  for (i=0; i<nrqs; i++) {
    j       = pa[i];
    w1[2*j] += w1[2*j+1] + 2* w3[j];   
    msz     += w1[2*j];  
  }
  /* Do a global reduction to determine how many messages to expect*/
  ierr = PetscMaxSum(comm,w1,&bsz,&nrqr);CHKERRQ(ierr);

  /* Allocate memory for recv buffers . Make sure rbuf1[0] exists by adding 1 to the buffer length */
  ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&rbuf1);CHKERRQ(ierr);
  ierr = PetscMalloc(nrqr*bsz*sizeof(PetscInt),&rbuf1[0]);CHKERRQ(ierr);
  for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;
  
  /* Post the receives */
  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr);
  for (i=0; i<nrqr; ++i) {
    ierr = MPI_Irecv(rbuf1[i],bsz,MPIU_INT,MPI_ANY_SOURCE,tag0,comm,r_waits1+i);CHKERRQ(ierr);
  }

  /* Allocate Memory for outgoing messages */
  ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr);
  ierr  = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr);
  ierr  = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr);
  {
    PetscInt *iptr = tmp,ict = 0;
    for (i=0; i<nrqs; i++) {
      j         = pa[i];
      iptr     += ict;
      sbuf1[j]  = iptr;
      ict       = w1[2*j];
    }
  }

  /* Form the outgoing messages */
  /* Initialize the header space */
  for (i=0; i<nrqs; i++) {
    j           = pa[i];
    sbuf1[j][0] = 0;
    ierr        = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr);
    ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
  }
  
  /* Parse the isrow and copy data into outbuf */
  for (i=0; i<ismax; i++) {
    ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr);
    irow_i = irow[i];
    jmax   = nrow[i];
    for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
      row  = irow_i[j];
      proc = rtable[row];
      if (proc != rank) { /* copy to the outgoing buf*/
        ctr[proc]++;
        *ptr[proc] = row;
        ptr[proc]++;
      }
    }
    /* Update the headers for the current IS */
    for (j=0; j<size; j++) { /* Can Optimise this loop too */
      if ((ctr_j = ctr[j])) {
        sbuf1_j        = sbuf1[j];
        k              = ++sbuf1_j[0];
        sbuf1_j[2*k]   = ctr_j;
        sbuf1_j[2*k-1] = i;
      }
    }
  }

  /*  Now  post the sends */
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr);
  for (i=0; i<nrqs; ++i) {
    j = pa[i];
    ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr);
  }

  /* Post recieves to capture the row_data from other procs */
  ierr  = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr);
  ierr  = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr);
  for (i=0; i<nrqs; i++) {
    j        = pa[i];
    count    = (w1[2*j] - (2*sbuf1[j][0] + 1))*N;
    ierr     = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr);
    ierr     = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr);
  }

  /* Receive messages(row_nos) and then, pack and send off the rowvalues
     to the correct processors */

  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqr+1)*sizeof(PetscScalar*),&sbuf2);CHKERRQ(ierr);
 
  {
    PetscScalar *sbuf2_i,*v_start;
    PetscInt         s_proc;
    for (i=0; i<nrqr; ++i) {
      ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr);
      s_proc          = r_status1[i].MPI_SOURCE; /* send processor */
      rbuf1_i         = rbuf1[idex]; /* Actual message from s_proc */
      /* no of rows = end - start; since start is array idex[], 0idex, whel end
         is length of the buffer - which is 1idex */
      start           = 2*rbuf1_i[0] + 1;
      ierr            = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr);
      /* allocate memory sufficinet to hold all the row values */
      ierr = PetscMalloc((end-start)*N*sizeof(PetscScalar),&sbuf2[idex]);CHKERRQ(ierr);
      sbuf2_i      = sbuf2[idex];
      /* Now pack the data */
      for (j=start; j<end; j++) {
        row = rbuf1_i[j] - rstart;
        v_start = a->v + row;
        for (k=0; k<N; k++) {
          sbuf2_i[0] = v_start[0];
          sbuf2_i++; v_start += C->rmap->n;
        }
      }
      /* Now send off the data */
      ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr);
    }
  }
  /* End Send-Recv of IS + row_numbers */
  ierr = PetscFree(r_status1);CHKERRQ(ierr);
  ierr = PetscFree(r_waits1);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr);
  if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);}
  ierr = PetscFree(s_status1);CHKERRQ(ierr);
  ierr = PetscFree(s_waits1);CHKERRQ(ierr);

  /* Create the submatrices */
  if (scall == MAT_REUSE_MATRIX) {
    for (i=0; i<ismax; i++) {
      mat = (Mat_SeqDense *)(submats[i]->data);
      if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
      ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr);
      submats[i]->factortype = C->factortype;
    }
  } else {
    for (i=0; i<ismax; i++) {
      ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr);
      ierr = MatSetSizes(submats[i],nrow[i],ncol[i],nrow[i],ncol[i]);CHKERRQ(ierr);
      ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr);
      ierr = MatSeqDenseSetPreallocation(submats[i],PETSC_NULL);CHKERRQ(ierr);
    }
  }
  
  /* Assemble the matrices */
  {
    PetscInt         col;
    PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi;
  
    for (i=0; i<ismax; i++) {
      mat       = (Mat_SeqDense*)submats[i]->data;
      mat_v     = a->v;
      imat_v    = mat->v;
      irow_i    = irow[i];
      m         = nrow[i];
      for (j=0; j<m; j++) {
        row      = irow_i[j] ;
        proc     = rtable[row];
        if (proc == rank) {
          row      = row - rstart;
          mat_vi   = mat_v + row;
          imat_vi  = imat_v + j;
          for (k=0; k<ncol[i]; k++) {
            col = icol[i][k];
            imat_vi[k*m] = mat_vi[col*C->rmap->n];
          }
        } 
      }
    }
  }

  /* Create row map-> This maps c->row to submat->row for each submat*/
  /* this is a very expensive operation wrt memory usage */
  ierr    = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr);
  ierr    = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr);
  ierr    = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr);
  for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + C->rmap->N;}
  for (i=0; i<ismax; i++) {
    rmap_i = rmap[i];
    irow_i = irow[i];
    jmax   = nrow[i];
    for (j=0; j<jmax; j++) { 
      rmap_i[irow_i[j]] = j; 
    }
  }
 
  /* Now Receive the row_values and assemble the rest of the matrix */
  ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr);
  {
    PetscInt    is_max,tmp1,col,*sbuf1_i,is_sz;
    PetscScalar *rbuf2_i,*imat_v,*imat_vi;
  
    for (tmp1=0; tmp1<nrqs; tmp1++) { /* For each message */
      ierr = MPI_Waitany(nrqs,r_waits2,&i,r_status2+tmp1);CHKERRQ(ierr);
      /* Now dig out the corresponding sbuf1, which contains the IS data_structure */
      sbuf1_i = sbuf1[pa[i]];
      is_max  = sbuf1_i[0];
      ct1     = 2*is_max+1;
      rbuf2_i = rbuf2[i];
      for (j=1; j<=is_max; j++) { /* For each IS belonging to the message */
        is_no     = sbuf1_i[2*j-1];
        is_sz     = sbuf1_i[2*j];
        mat       = (Mat_SeqDense*)submats[is_no]->data;
        imat_v    = mat->v;
        rmap_i    = rmap[is_no];
        m         = nrow[is_no];
        for (k=0; k<is_sz; k++,rbuf2_i+=N) {  /* For each row */
          row      = sbuf1_i[ct1]; ct1++;
          row      = rmap_i[row];
          imat_vi  = imat_v + row;
          for (l=0; l<ncol[is_no]; l++) { /* For each col */
            col = icol[is_no][l];
            imat_vi[l*m] = rbuf2_i[col];
          }
        }
      }
    }
  }
  /* End Send-Recv of row_values */
  ierr = PetscFree(r_status2);CHKERRQ(ierr);
  ierr = PetscFree(r_waits2);CHKERRQ(ierr);
  ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr);
  if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);}
  ierr = PetscFree(s_status2);CHKERRQ(ierr);
  ierr = PetscFree(s_waits2);CHKERRQ(ierr);

  /* Restore the indices */
  for (i=0; i<ismax; i++) {
    ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr);
    ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr);
  }

  /* Destroy allocated memory */
  ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr);
  ierr = PetscFree3(w1,w3,w4);CHKERRQ(ierr);
  ierr = PetscFree(pa);CHKERRQ(ierr);

  for (i=0; i<nrqs; ++i) {
    ierr = PetscFree(rbuf2[i]);CHKERRQ(ierr);
  }
  ierr = PetscFree(rbuf2);CHKERRQ(ierr);
  ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr);
  ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr);
  ierr = PetscFree(rbuf1);CHKERRQ(ierr);

  for (i=0; i<nrqr; ++i) {
    ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr);
  }

  ierr = PetscFree(sbuf2);CHKERRQ(ierr);
  ierr = PetscFree(rmap[0]);CHKERRQ(ierr);
  ierr = PetscFree(rmap);CHKERRQ(ierr);

  for (i=0; i<ismax; i++) {
    ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}