コード例 #1
0
ファイル: geo.c プロジェクト: 00liujj/petsc
static PetscErrorCode triangulateAndFormProl(IS  selected_2, /* list of selected local ID, includes selected ghosts */
                                             const PetscInt data_stride,
                                             const PetscReal coords[], /* column vector of local coordinates w/ ghosts */
                                             const PetscInt nselected_1, /* list of selected local ID, includes selected ghosts */
                                             const PetscInt clid_lid_1[],
                                             const PetscCoarsenData *agg_lists_1, /* selected_1 vertices of aggregate unselected vertices */
                                             const PetscInt crsGID[],
                                             const PetscInt bs,
                                             Mat a_Prol, /* prolongation operator (output) */
                                             PetscReal *a_worst_best) /* measure of worst missed fine vertex, 0 is no misses */
{
#if defined(PETSC_HAVE_TRIANGLE)
  PetscErrorCode       ierr;
  PetscInt             jj,tid,tt,idx,nselected_2;
  struct triangulateio in,mid;
  const PetscInt       *selected_idx_2;
  PetscMPIInt          rank,size;
  PetscInt             Istart,Iend,nFineLoc,myFine0;
  int                  kk,nPlotPts,sid;
  MPI_Comm             comm;
  PetscReal            tm;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)a_Prol,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr = ISGetSize(selected_2, &nselected_2);CHKERRQ(ierr);
  if (nselected_2 == 1 || nselected_2 == 2) { /* 0 happens on idle processors */
    *a_worst_best = 100.0; /* this will cause a stop, but not globalized (should not happen) */
  } else *a_worst_best = 0.0;
  ierr = MPI_Allreduce(a_worst_best, &tm, 1, MPIU_REAL, MPIU_MAX, comm);CHKERRQ(ierr);
  if (tm > 0.0) {
    *a_worst_best = 100.0;
    PetscFunctionReturn(0);
  }
  ierr     = MatGetOwnershipRange(a_Prol, &Istart, &Iend);CHKERRQ(ierr);
  nFineLoc = (Iend-Istart)/bs; myFine0 = Istart/bs;
  nPlotPts = nFineLoc; /* locals */
  /* traingle */
  /* Define input points - in*/
  in.numberofpoints          = nselected_2;
  in.numberofpointattributes = 0;
  /* get nselected points */
  ierr = PetscMalloc1(2*(nselected_2), &in.pointlist);CHKERRQ(ierr);
  ierr = ISGetIndices(selected_2, &selected_idx_2);CHKERRQ(ierr);

  for (kk=0,sid=0; kk<nselected_2; kk++,sid += 2) {
    PetscInt lid = selected_idx_2[kk];
    in.pointlist[sid]   = coords[lid];
    in.pointlist[sid+1] = coords[data_stride + lid];
    if (lid>=nFineLoc) nPlotPts++;
  }
  if (sid != 2*nselected_2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != 2*nselected_2 %D",sid,nselected_2);

  in.numberofsegments      = 0;
  in.numberofedges         = 0;
  in.numberofholes         = 0;
  in.numberofregions       = 0;
  in.trianglelist          = 0;
  in.segmentmarkerlist     = 0;
  in.pointattributelist    = 0;
  in.pointmarkerlist       = 0;
  in.triangleattributelist = 0;
  in.trianglearealist      = 0;
  in.segmentlist           = 0;
  in.holelist              = 0;
  in.regionlist            = 0;
  in.edgelist              = 0;
  in.edgemarkerlist        = 0;
  in.normlist              = 0;

  /* triangulate */
  mid.pointlist = 0;            /* Not needed if -N switch used. */
  /* Not needed if -N switch used or number of point attributes is zero: */
  mid.pointattributelist = 0;
  mid.pointmarkerlist    = 0; /* Not needed if -N or -B switch used. */
  mid.trianglelist       = 0;    /* Not needed if -E switch used. */
  /* Not needed if -E switch used or number of triangle attributes is zero: */
  mid.triangleattributelist = 0;
  mid.neighborlist          = 0; /* Needed only if -n switch used. */
  /* Needed only if segments are output (-p or -c) and -P not used: */
  mid.segmentlist = 0;
  /* Needed only if segments are output (-p or -c) and -P and -B not used: */
  mid.segmentmarkerlist = 0;
  mid.edgelist          = 0;    /* Needed only if -e switch used. */
  mid.edgemarkerlist    = 0; /* Needed if -e used and -B not used. */
  mid.numberoftriangles = 0;

  /* Triangulate the points.  Switches are chosen to read and write a  */
  /*   PSLG (p), preserve the convex hull (c), number everything from  */
  /*   zero (z), assign a regional attribute to each element (A), and  */
  /*   produce an edge list (e), a Voronoi diagram (v), and a triangle */
  /*   neighbor list (n).                                            */
  if (nselected_2 != 0) { /* inactive processor */
    char args[] = "npczQ"; /* c is needed ? */
    triangulate(args, &in, &mid, (struct triangulateio*) NULL);
    /* output .poly files for 'showme' */
    if (!PETSC_TRUE) {
      static int level = 1;
      FILE       *file; char fname[32];

      sprintf(fname,"C%d_%d.poly",level,rank); file = fopen(fname, "w");
      /*First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)>*/
      fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0);
      /*Following lines: <vertex #> <x> <y> */
      for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid += 2) {
        fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
      }
      /*One line: <# of segments> <# of boundary markers (0 or 1)> */
      fprintf(file, "%d  %d\n",0,0);
      /*Following lines: <segment #> <endpoint> <endpoint> [boundary marker] */
      /* One line: <# of holes> */
      fprintf(file, "%d\n",0);
      /* Following lines: <hole #> <x> <y> */
      /* Optional line: <# of regional attributes and/or area constraints> */
      /* Optional following lines: <region #> <x> <y> <attribute> <maximum area> */
      fclose(file);

      /* elems */
      sprintf(fname,"C%d_%d.ele",level,rank); file = fopen(fname, "w");
      /* First line: <# of triangles> <nodes per triangle> <# of attributes> */
      fprintf(file, "%d %d %d\n",mid.numberoftriangles,3,0);
      /* Remaining lines: <triangle #> <node> <node> <node> ... [attributes] */
      for (kk=0,sid=0; kk<mid.numberoftriangles; kk++,sid += 3) {
        fprintf(file, "%d %d %d %d\n",kk,mid.trianglelist[sid],mid.trianglelist[sid+1],mid.trianglelist[sid+2]);
      }
      fclose(file);

      sprintf(fname,"C%d_%d.node",level,rank); file = fopen(fname, "w");
      /* First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)> */
      /* fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0); */
      fprintf(file, "%d  %d  %d  %d\n",nPlotPts,2,0,0);
      /*Following lines: <vertex #> <x> <y> */
      for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid+=2) {
        fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
      }

      sid /= 2;
      for (jj=0; jj<nFineLoc; jj++) {
        PetscBool sel = PETSC_TRUE;
        for (kk=0; kk<nselected_2 && sel; kk++) {
          PetscInt lid = selected_idx_2[kk];
          if (lid == jj) sel = PETSC_FALSE;
        }
        if (sel) fprintf(file, "%d %e %e\n",sid++,coords[jj],coords[data_stride + jj]);
      }
      fclose(file);
      if (sid != nPlotPts) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != nPlotPts %D",sid,nPlotPts);
      level++;
    }
  }
#if defined PETSC_GAMG_USE_LOG
  ierr = PetscLogEventBegin(petsc_gamg_setup_events[FIND_V],0,0,0,0);CHKERRQ(ierr);
#endif
  { /* form P - setup some maps */
    PetscInt clid,mm,*nTri,*node_tri;

    ierr = PetscMalloc1(nselected_2, &node_tri);CHKERRQ(ierr);
    ierr = PetscMalloc1(nselected_2, &nTri);CHKERRQ(ierr);

    /* need list of triangles on node */
    for (kk=0; kk<nselected_2; kk++) nTri[kk] = 0;
    for (tid=0,kk=0; tid<mid.numberoftriangles; tid++) {
      for (jj=0; jj<3; jj++) {
        PetscInt cid = mid.trianglelist[kk++];
        if (nTri[cid] == 0) node_tri[cid] = tid;
        nTri[cid]++;
      }
    }
#define EPS 1.e-12
    /* find points and set prolongation */
    for (mm = clid = 0; mm < nFineLoc; mm++) {
      PetscBool ise;
      ierr = PetscCDEmptyAt(agg_lists_1,mm,&ise);CHKERRQ(ierr);
      if (!ise) {
        const PetscInt lid = mm;
        /* for (clid_iterator=0;clid_iterator<nselected_1;clid_iterator++) { */
        PetscScalar  AA[3][3];
        PetscBLASInt N=3,NRHS=1,LDA=3,IPIV[3],LDB=3,INFO;
        PetscCDPos   pos;
        ierr = PetscCDGetHeadPos(agg_lists_1,lid,&pos);CHKERRQ(ierr);
        while (pos) {
          PetscInt flid;
          ierr = PetscLLNGetID(pos, &flid);CHKERRQ(ierr);
          ierr = PetscCDGetNextPos(agg_lists_1,lid,&pos);CHKERRQ(ierr);

          if (flid < nFineLoc) {  /* could be a ghost */
            PetscInt       bestTID = -1; PetscReal best_alpha = 1.e10;
            const PetscInt fgid    = flid + myFine0;
            /* compute shape function for gid */
            const PetscReal fcoord[3] = {coords[flid],coords[data_stride+flid],1.0};
            PetscBool       haveit    =PETSC_FALSE; PetscScalar alpha[3]; PetscInt clids[3];

            /* look for it */
            for (tid = node_tri[clid], jj=0;
                 jj < 5 && !haveit && tid != -1;
                 jj++) {
              for (tt=0; tt<3; tt++) {
                PetscInt cid2 = mid.trianglelist[3*tid + tt];
                PetscInt lid2 = selected_idx_2[cid2];
                AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0;
                clids[tt] = cid2; /* store for interp */
              }

              for (tt=0; tt<3; tt++) alpha[tt] = (PetscScalar)fcoord[tt];

              /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */
              PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO));
              {
                PetscBool have=PETSC_TRUE;  PetscReal lowest=1.e10;
                for (tt = 0, idx = 0; tt < 3; tt++) {
                  if (PetscRealPart(alpha[tt]) > (1.0+EPS) || PetscRealPart(alpha[tt]) < -EPS) have = PETSC_FALSE;
                  if (PetscRealPart(alpha[tt]) < lowest) {
                    lowest = PetscRealPart(alpha[tt]);
                    idx    = tt;
                  }
                }
                haveit = have;
              }
              tid = mid.neighborlist[3*tid + idx];
            }

            if (!haveit) {
              /* brute force */
              for (tid=0; tid<mid.numberoftriangles && !haveit; tid++) {
                for (tt=0; tt<3; tt++) {
                  PetscInt cid2 = mid.trianglelist[3*tid + tt];
                  PetscInt lid2 = selected_idx_2[cid2];
                  AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0;
                  clids[tt] = cid2; /* store for interp */
                }
                for (tt=0; tt<3; tt++) alpha[tt] = fcoord[tt];
                /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */
                PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO));
                {
                  PetscBool have=PETSC_TRUE;  PetscReal worst=0.0, v;
                  for (tt=0; tt<3 && have; tt++) {
                    if (PetscRealPart(alpha[tt]) > 1.0+EPS || PetscRealPart(alpha[tt]) < -EPS) have=PETSC_FALSE;
                    if ((v=PetscAbs(PetscRealPart(alpha[tt])-0.5)) > worst) worst = v;
                  }
                  if (worst < best_alpha) {
                    best_alpha = worst; bestTID = tid;
                  }
                  haveit = have;
                }
              }
            }
            if (!haveit) {
              if (best_alpha > *a_worst_best) *a_worst_best = best_alpha;
              /* use best one */
              for (tt=0; tt<3; tt++) {
                PetscInt cid2 = mid.trianglelist[3*bestTID + tt];
                PetscInt lid2 = selected_idx_2[cid2];
                AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0;
                clids[tt] = cid2; /* store for interp */
              }
              for (tt=0; tt<3; tt++) alpha[tt] = fcoord[tt];
              /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */
              PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO));
            }

            /* put in row of P */
            for (idx=0; idx<3; idx++) {
              PetscScalar shp = alpha[idx];
              if (PetscAbs(PetscRealPart(shp)) > 1.e-6) {
                PetscInt cgid = crsGID[clids[idx]];
                PetscInt jj   = cgid*bs, ii = fgid*bs; /* need to gloalize */
                for (tt=0; tt < bs; tt++, ii++, jj++) {
                  ierr = MatSetValues(a_Prol,1,&ii,1,&jj,&shp,INSERT_VALUES);CHKERRQ(ierr);
                }
              }
            }
          }
        } /* aggregates iterations */
        clid++;
      } /* a coarse agg */
    } /* for all fine nodes */

    ierr = ISRestoreIndices(selected_2, &selected_idx_2);CHKERRQ(ierr);
    ierr = MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

    ierr = PetscFree(node_tri);CHKERRQ(ierr);
    ierr = PetscFree(nTri);CHKERRQ(ierr);
  }
#if defined PETSC_GAMG_USE_LOG
  ierr = PetscLogEventEnd(petsc_gamg_setup_events[FIND_V],0,0,0,0);CHKERRQ(ierr);
#endif
  free(mid.trianglelist);
  free(mid.neighborlist);
  ierr = PetscFree(in.pointlist);CHKERRQ(ierr);
  PetscFunctionReturn(0);
#else
  SETERRQ(PetscObjectComm((PetscObject)a_Prol),PETSC_ERR_PLIB,"configure with TRIANGLE to use geometric MG");
#endif
}
コード例 #2
0
ファイル: classical.c プロジェクト: ZJLi2013/petsc
PetscErrorCode PCGAMGProlongator_Classical_Direct(PC pc, const Mat A, const Mat G, PetscCoarsenData *agg_lists,Mat *P)
{
  PetscErrorCode    ierr;
  MPI_Comm          comm;
  PetscReal         *Amax_pos,*Amax_neg;
  Mat               lA,gA;                     /* on and off diagonal matrices */
  PetscInt          fn;                        /* fine local blocked sizes */
  PetscInt          cn;                        /* coarse local blocked sizes */
  PetscInt          gn;                        /* size of the off-diagonal fine vector */
  PetscInt          fs,fe;                     /* fine (row) ownership range*/
  PetscInt          cs,ce;                     /* coarse (column) ownership range */
  PetscInt          i,j;                       /* indices! */
  PetscBool         iscoarse;                  /* flag for determining if a node is coarse */
  PetscInt          *lcid,*gcid;               /* on and off-processor coarse unknown IDs */
  PetscInt          *lsparse,*gsparse;         /* on and off-processor sparsity patterns for prolongator */
  PetscScalar       pij;
  const PetscScalar *rval;
  const PetscInt    *rcol;
  PetscScalar       g_pos,g_neg,a_pos,a_neg,diag,invdiag,alpha,beta;
  Vec               F;   /* vec of coarse size */
  Vec               C;   /* vec of fine size */
  Vec               gF;  /* vec of off-diagonal fine size */
  MatType           mtype;
  PetscInt          c_indx;
  PetscScalar       c_scalar;
  PetscInt          ncols,col;
  PetscInt          row_f,row_c;
  PetscInt          cmax=0,idx;
  PetscScalar       *pvals;
  PetscInt          *pcols;
  PC_MG             *mg          = (PC_MG*)pc->data;
  PC_GAMG           *gamg        = (PC_GAMG*)mg->innerctx;

  PetscFunctionBegin;
  comm = ((PetscObject)pc)->comm;
  ierr = MatGetOwnershipRange(A,&fs,&fe); CHKERRQ(ierr);
  fn = (fe - fs);

  ierr = MatGetVecs(A,&F,NULL);CHKERRQ(ierr);

  /* get the number of local unknowns and the indices of the local unknowns */

  ierr = PetscMalloc(sizeof(PetscInt)*fn,&lsparse);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscInt)*fn,&gsparse);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscInt)*fn,&lcid);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscReal)*fn,&Amax_pos);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscReal)*fn,&Amax_neg);CHKERRQ(ierr);

  /* count the number of coarse unknowns */
  cn = 0;
  for (i=0;i<fn;i++) {
    /* filter out singletons */
    ierr = PetscCDEmptyAt(agg_lists,i,&iscoarse); CHKERRQ(ierr);
    lcid[i] = -1;
    if (!iscoarse) {
      cn++;
    }
  }

   /* create the coarse vector */
  ierr = VecCreateMPI(comm,cn,PETSC_DECIDE,&C);CHKERRQ(ierr);
  ierr = VecGetOwnershipRange(C,&cs,&ce);CHKERRQ(ierr);

  /* construct a global vector indicating the global indices of the coarse unknowns */
  cn = 0;
  for (i=0;i<fn;i++) {
    ierr = PetscCDEmptyAt(agg_lists,i,&iscoarse); CHKERRQ(ierr);
    if (!iscoarse) {
      lcid[i] = cs+cn;
      cn++;
    } else {
      lcid[i] = -1;
    }
    *((PetscInt *)&c_scalar) = lcid[i];
    c_indx = fs+i;
    ierr = VecSetValues(F,1,&c_indx,&c_scalar,INSERT_VALUES);CHKERRQ(ierr);
  }

  ierr = VecAssemblyBegin(F);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(F);CHKERRQ(ierr);

  /* determine the biggest off-diagonal entries in each row */
  for (i=fs;i<fe;i++) {
    Amax_pos[i-fs] = 0.;
    Amax_neg[i-fs] = 0.;
    ierr = MatGetRow(A,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
    for(j=0;j<ncols;j++){
      if ((PetscRealPart(-rval[j]) > Amax_neg[i-fs]) && i != rcol[j]) Amax_neg[i-fs] = PetscAbsScalar(rval[j]);
      if ((PetscRealPart(rval[j])  > Amax_pos[i-fs]) && i != rcol[j]) Amax_pos[i-fs] = PetscAbsScalar(rval[j]);
    }
    if (ncols > cmax) cmax = ncols;
    ierr = MatRestoreRow(A,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
  }
  ierr = PetscMalloc(sizeof(PetscInt)*cmax,&pcols);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscScalar)*cmax,&pvals);CHKERRQ(ierr);

  /* split the operator into two */
  ierr = PCGAMGClassicalGraphSplitting_Private(A,&lA,&gA);CHKERRQ(ierr);

  /* scatter to the ghost vector */
  ierr = PCGAMGClassicalCreateGhostVector_Private(A,&gF,NULL);CHKERRQ(ierr);
  ierr = PCGAMGClassicalGhost_Private(A,F,gF);CHKERRQ(ierr);

  if (gA) {
    ierr = VecGetSize(gF,&gn);CHKERRQ(ierr);
    ierr = PetscMalloc(sizeof(PetscInt)*gn,&gcid);CHKERRQ(ierr);
    for (i=0;i<gn;i++) {
      ierr = VecGetValues(gF,1,&i,&c_scalar);CHKERRQ(ierr);
      gcid[i] = *((PetscInt *)&c_scalar);
    }
  }

  ierr = VecDestroy(&F);CHKERRQ(ierr);
  ierr = VecDestroy(&gF);CHKERRQ(ierr);
  ierr = VecDestroy(&C);CHKERRQ(ierr);

  /* count the on and off processor sparsity patterns for the prolongator */
  for (i=0;i<fn;i++) {
    /* on */
    lsparse[i] = 0;
    gsparse[i] = 0;
    if (lcid[i] >= 0) {
      lsparse[i] = 1;
      gsparse[i] = 0;
    } else {
      ierr = MatGetRow(lA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
      for (j = 0;j < ncols;j++) {
        col = rcol[j];
        if (lcid[col] >= 0 && (PetscRealPart(rval[j]) > gamg->threshold*Amax_pos[i] || PetscRealPart(-rval[j]) > gamg->threshold*Amax_neg[i])) {
          lsparse[i] += 1;
        }
      }
      ierr = MatRestoreRow(lA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
      /* off */
      if (gA) {
        ierr = MatGetRow(gA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
        for (j = 0; j < ncols; j++) {
          col = rcol[j];
          if (gcid[col] >= 0 && (PetscRealPart(rval[j]) > gamg->threshold*Amax_pos[i] || PetscRealPart(-rval[j]) > gamg->threshold*Amax_neg[i])) {
            gsparse[i] += 1;
          }
        }
        ierr = MatRestoreRow(gA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
      }
    }
  }

  /* preallocate and create the prolongator */
  ierr = MatCreate(comm,P); CHKERRQ(ierr);
  ierr = MatGetType(G,&mtype);CHKERRQ(ierr);
  ierr = MatSetType(*P,mtype);CHKERRQ(ierr);

  ierr = MatSetSizes(*P,fn,cn,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
  ierr = MatMPIAIJSetPreallocation(*P,0,lsparse,0,gsparse);CHKERRQ(ierr);
  ierr = MatSeqAIJSetPreallocation(*P,0,lsparse);CHKERRQ(ierr);

  /* loop over local fine nodes -- get the diagonal, the sum of positive and negative strong and weak weights, and set up the row */
  for (i = 0;i < fn;i++) {
    /* determine on or off */
    row_f = i + fs;
    row_c = lcid[i];
    if (row_c >= 0) {
      pij = 1.;
      ierr = MatSetValues(*P,1,&row_f,1,&row_c,&pij,INSERT_VALUES);CHKERRQ(ierr);
    } else {
      g_pos = 0.;
      g_neg = 0.;
      a_pos = 0.;
      a_neg = 0.;
      diag = 0.;

      /* local connections */
      ierr = MatGetRow(lA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
      for (j = 0; j < ncols; j++) {
        col = rcol[j];
        if (lcid[col] >= 0 && (PetscRealPart(rval[j]) > gamg->threshold*Amax_pos[i] || PetscRealPart(-rval[j]) > gamg->threshold*Amax_neg[i])) {
          if (PetscRealPart(rval[j]) > 0.) {
            g_pos += rval[j];
          } else {
            g_neg += rval[j];
          }
        }
        if (col != i) {
          if (PetscRealPart(rval[j]) > 0.) {
            a_pos += rval[j];
          } else {
            a_neg += rval[j];
          }
        } else {
          diag = rval[j];
        }
      }
      ierr = MatRestoreRow(lA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);

      /* ghosted connections */
      if (gA) {
        ierr = MatGetRow(gA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
        for (j = 0; j < ncols; j++) {
          col = rcol[j];
          if (gcid[col] >= 0 && (PetscRealPart(rval[j]) > gamg->threshold*Amax_pos[i] || PetscRealPart(-rval[j]) > gamg->threshold*Amax_neg[i])) {
            if (PetscRealPart(rval[j]) > 0.) {
              g_pos += rval[j];
            } else {
              g_neg += rval[j];
            }
          }
          if (PetscRealPart(rval[j]) > 0.) {
            a_pos += rval[j];
          } else {
            a_neg += rval[j];
          }
        }
        ierr = MatRestoreRow(gA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
      }

      if (g_neg == 0.) {
        alpha = 0.;
      } else {
        alpha = -a_neg/g_neg;
      }

      if (g_pos == 0.) {
        diag += a_pos;
        beta = 0.;
      } else {
        beta = -a_pos/g_pos;
      }
      if (diag == 0.) {
        invdiag = 0.;
      } else invdiag = 1. / diag;
      /* on */
      ierr = MatGetRow(lA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
      idx = 0;
      for (j = 0;j < ncols;j++) {
        col = rcol[j];
        if (lcid[col] >= 0 && (PetscRealPart(rval[j]) > gamg->threshold*Amax_pos[i] || PetscRealPart(-rval[j]) > gamg->threshold*Amax_neg[i])) {
          row_f = i + fs;
          row_c = lcid[col];
          /* set the values for on-processor ones */
          if (PetscRealPart(rval[j]) < 0.) {
            pij = rval[j]*alpha*invdiag;
          } else {
            pij = rval[j]*beta*invdiag;
          }
          if (PetscAbsScalar(pij) != 0.) {
            pvals[idx] = pij;
            pcols[idx] = row_c;
            idx++;
          }
        }
      }
      ierr = MatRestoreRow(lA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
      /* off */
      if (gA) {
        ierr = MatGetRow(gA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
        for (j = 0; j < ncols; j++) {
          col = rcol[j];
          if (gcid[col] >= 0 && (PetscRealPart(rval[j]) > gamg->threshold*Amax_pos[i] || PetscRealPart(-rval[j]) > gamg->threshold*Amax_neg[i])) {
            row_f = i + fs;
            row_c = gcid[col];
            /* set the values for on-processor ones */
            if (PetscRealPart(rval[j]) < 0.) {
              pij = rval[j]*alpha*invdiag;
            } else {
              pij = rval[j]*beta*invdiag;
            }
            if (PetscAbsScalar(pij) != 0.) {
              pvals[idx] = pij;
              pcols[idx] = row_c;
              idx++;
            }
          }
        }
        ierr = MatRestoreRow(gA,i,&ncols,&rcol,&rval);CHKERRQ(ierr);
      }
      ierr = MatSetValues(*P,1,&row_f,idx,pcols,pvals,INSERT_VALUES);CHKERRQ(ierr);
    }
  }

  ierr = MatAssemblyBegin(*P, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(*P, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

  ierr = PetscFree(lsparse);CHKERRQ(ierr);
  ierr = PetscFree(gsparse);CHKERRQ(ierr);
  ierr = PetscFree(pcols);CHKERRQ(ierr);
  ierr = PetscFree(pvals);CHKERRQ(ierr);
  ierr = PetscFree(Amax_pos);CHKERRQ(ierr);
  ierr = PetscFree(Amax_neg);CHKERRQ(ierr);
  ierr = PetscFree(lcid);CHKERRQ(ierr);
  if (gA) {ierr = PetscFree(gcid);CHKERRQ(ierr);}

  PetscFunctionReturn(0);
}
コード例 #3
0
ファイル: classical.c プロジェクト: ZJLi2013/petsc
PetscErrorCode PCGAMGProlongator_Classical_Standard(PC pc, const Mat A, const Mat G, PetscCoarsenData *agg_lists,Mat *P)
{
  PetscErrorCode    ierr;
  Mat               *lA;
  Vec               lv,v,cv;
  PetscScalar       *lcid;
  IS                lis;
  PetscInt          fs,fe,cs,ce,nl,i,j,k,li,lni,ci;
  VecScatter        lscat;
  PetscInt          fn,cn,cid,c_indx;
  PetscBool         iscoarse;
  PetscScalar       c_scalar;
  const PetscScalar *vcol;
  const PetscInt    *icol;
  const PetscInt    *gidx;
  PetscInt          ncols;
  PetscInt          *lsparse,*gsparse;
  MatType           mtype;
  PetscInt          maxcols;
  PetscReal         diag,jdiag,jwttotal;
  PetscScalar       *pvcol,vi;
  PetscInt          *picol;
  PetscInt          pncols;
  PetscScalar       *pcontrib,pentry,pjentry;
  /* PC_MG             *mg          = (PC_MG*)pc->data; */
  /* PC_GAMG           *gamg        = (PC_GAMG*)mg->innerctx; */

  PetscFunctionBegin;

  ierr = MatGetOwnershipRange(A,&fs,&fe);CHKERRQ(ierr);
  fn = fe-fs;
  ierr = MatGetVecs(A,NULL,&v);CHKERRQ(ierr);
  ierr = ISCreateStride(PETSC_COMM_SELF,fe-fs,fs,1,&lis);CHKERRQ(ierr);
  /* increase the overlap by two to get neighbors of neighbors */
  ierr = MatIncreaseOverlap(A,1,&lis,2);CHKERRQ(ierr);
  ierr = ISSort(lis);CHKERRQ(ierr);
  /* get the local part of A */
  ierr = MatGetSubMatrices(A,1,&lis,&lis,MAT_INITIAL_MATRIX,&lA);CHKERRQ(ierr);
  /* build the scatter out of it */
  ierr = ISGetLocalSize(lis,&nl);CHKERRQ(ierr);
  ierr = VecCreateSeq(PETSC_COMM_SELF,nl,&lv);CHKERRQ(ierr);
  ierr = VecScatterCreate(v,lis,lv,NULL,&lscat);CHKERRQ(ierr);

  ierr = PetscMalloc(sizeof(PetscInt)*fn,&lsparse);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscInt)*fn,&gsparse);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscScalar)*nl,&pcontrib);CHKERRQ(ierr);

  /* create coarse vector */
  cn = 0;
  for (i=0;i<fn;i++) {
    ierr = PetscCDEmptyAt(agg_lists,i,&iscoarse);CHKERRQ(ierr);
    if (!iscoarse) {
      cn++;
    }
  }
  ierr = VecCreateMPI(PetscObjectComm((PetscObject)A),cn,PETSC_DECIDE,&cv);CHKERRQ(ierr);
  ierr = VecGetOwnershipRange(cv,&cs,&ce);CHKERRQ(ierr);
  cn = 0;
  for (i=0;i<fn;i++) {
    ierr = PetscCDEmptyAt(agg_lists,i,&iscoarse); CHKERRQ(ierr);
    if (!iscoarse) {
      cid = cs+cn;
      cn++;
    } else {
      cid = -1;
    }
    *(PetscInt*)&c_scalar = cid;
    c_indx = fs+i;
    ierr = VecSetValues(v,1,&c_indx,&c_scalar,INSERT_VALUES);CHKERRQ(ierr);
  }
  ierr = VecScatterBegin(lscat,v,lv,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = VecScatterEnd(lscat,v,lv,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  /* count to preallocate the prolongator */
  ierr = ISGetIndices(lis,&gidx);CHKERRQ(ierr);
  ierr = VecGetArray(lv,&lcid);CHKERRQ(ierr);
  maxcols = 0;
  /* count the number of unique contributing coarse cells for each fine */
  for (i=0;i<nl;i++) {
    pcontrib[i] = 0.;
    ierr = MatGetRow(lA[0],i,&ncols,&icol,NULL);CHKERRQ(ierr);
    if (gidx[i] >= fs && gidx[i] < fe) {
      li = gidx[i] - fs;
      lsparse[li] = 0;
      gsparse[li] = 0;
      cid = *(PetscInt*)&(lcid[i]);
      if (cid >= 0) {
        lsparse[li] = 1;
      } else {
        for (j=0;j<ncols;j++) {
          if (*(PetscInt*)&(lcid[icol[j]]) >= 0) {
            pcontrib[icol[j]] = 1.;
          } else {
            ci = icol[j];
            ierr = MatRestoreRow(lA[0],i,&ncols,&icol,NULL);CHKERRQ(ierr);
            ierr = MatGetRow(lA[0],ci,&ncols,&icol,NULL);CHKERRQ(ierr);
            for (k=0;k<ncols;k++) {
              if (*(PetscInt*)&(lcid[icol[k]]) >= 0) {
                pcontrib[icol[k]] = 1.;
              }
            }
            ierr = MatRestoreRow(lA[0],ci,&ncols,&icol,NULL);CHKERRQ(ierr);
            ierr = MatGetRow(lA[0],i,&ncols,&icol,NULL);CHKERRQ(ierr);
          }
        }
        for (j=0;j<ncols;j++) {
          if (*(PetscInt*)&(lcid[icol[j]]) >= 0 && pcontrib[icol[j]] != 0.) {
            lni = *(PetscInt*)&(lcid[icol[j]]);
            if (lni >= cs && lni < ce) {
              lsparse[li]++;
            } else {
              gsparse[li]++;
            }
            pcontrib[icol[j]] = 0.;
          } else {
            ci = icol[j];
            ierr = MatRestoreRow(lA[0],i,&ncols,&icol,NULL);CHKERRQ(ierr);
            ierr = MatGetRow(lA[0],ci,&ncols,&icol,NULL);CHKERRQ(ierr);
            for (k=0;k<ncols;k++) {
              if (*(PetscInt*)&(lcid[icol[k]]) >= 0 && pcontrib[icol[k]] != 0.) {
                lni = *(PetscInt*)&(lcid[icol[k]]);
                if (lni >= cs && lni < ce) {
                  lsparse[li]++;
                } else {
                  gsparse[li]++;
                }
                pcontrib[icol[k]] = 0.;
              }
            }
            ierr = MatRestoreRow(lA[0],ci,&ncols,&icol,NULL);CHKERRQ(ierr);
            ierr = MatGetRow(lA[0],i,&ncols,&icol,NULL);CHKERRQ(ierr);
          }
        }
      }
      if (lsparse[li] + gsparse[li] > maxcols) maxcols = lsparse[li]+gsparse[li];
    }
    ierr = MatRestoreRow(lA[0],i,&ncols,&icol,&vcol);CHKERRQ(ierr);
  }
  ierr = PetscMalloc(sizeof(PetscInt)*maxcols,&picol);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscScalar)*maxcols,&pvcol);CHKERRQ(ierr);
  ierr = MatCreate(PetscObjectComm((PetscObject)A),P);CHKERRQ(ierr);
  ierr = MatGetType(A,&mtype);CHKERRQ(ierr);
  ierr = MatSetType(*P,mtype);CHKERRQ(ierr);
  ierr = MatSetSizes(*P,fn,cn,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
  ierr = MatMPIAIJSetPreallocation(*P,0,lsparse,0,gsparse);CHKERRQ(ierr);
  ierr = MatSeqAIJSetPreallocation(*P,0,lsparse);CHKERRQ(ierr);
  for (i=0;i<nl;i++) {
    diag = 0.;
    if (gidx[i] >= fs && gidx[i] < fe) {
      li = gidx[i] - fs;
      pncols=0;
      cid = *(PetscInt*)&(lcid[i]);
      if (cid >= 0) {
        pncols = 1;
        picol[0] = cid;
        pvcol[0] = 1.;
      } else {
        ierr = MatGetRow(lA[0],i,&ncols,&icol,&vcol);CHKERRQ(ierr);
        for (j=0;j<ncols;j++) {
          pentry = vcol[j];
          if (*(PetscInt*)&(lcid[icol[j]]) >= 0) {
            /* coarse neighbor */
            pcontrib[icol[j]] += pentry;
          } else if (icol[j] != i) {
            /* the neighbor is a strongly connected fine node */
            ci = icol[j];
            vi = vcol[j];
            ierr = MatRestoreRow(lA[0],i,&ncols,&icol,&vcol);CHKERRQ(ierr);
            ierr = MatGetRow(lA[0],ci,&ncols,&icol,&vcol);CHKERRQ(ierr);
            jwttotal=0.;
            jdiag = 0.;
            for (k=0;k<ncols;k++) {
              if (ci == icol[k]) {
                jdiag = PetscRealPart(vcol[k]);
              }
            }
            for (k=0;k<ncols;k++) {
              if (*(PetscInt*)&(lcid[icol[k]]) >= 0 && jdiag*PetscRealPart(vcol[k]) < 0.) {
                pjentry = vcol[k];
                jwttotal += PetscRealPart(pjentry);
              }
            }
            if (jwttotal != 0.) {
              jwttotal = PetscRealPart(vi)/jwttotal;
              for (k=0;k<ncols;k++) {
                if (*(PetscInt*)&(lcid[icol[k]]) >= 0 && jdiag*PetscRealPart(vcol[k]) < 0.) {
                  pjentry = vcol[k]*jwttotal;
                  pcontrib[icol[k]] += pjentry;
                }
              }
            } else {
              diag += PetscRealPart(vi);
            }
            ierr = MatRestoreRow(lA[0],ci,&ncols,&icol,&vcol);CHKERRQ(ierr);
            ierr = MatGetRow(lA[0],i,&ncols,&icol,&vcol);CHKERRQ(ierr);
          } else {
            diag += PetscRealPart(vcol[j]);
          }
        }
        if (diag != 0.) {
          diag = 1./diag;
          for (j=0;j<ncols;j++) {
            if (*(PetscInt*)&(lcid[icol[j]]) >= 0 && pcontrib[icol[j]] != 0.) {
              /* the neighbor is a coarse node */
              if (PetscAbsScalar(pcontrib[icol[j]]) > 0.0) {
                lni = *(PetscInt*)&(lcid[icol[j]]);
                pvcol[pncols] = -pcontrib[icol[j]]*diag;
                picol[pncols] = lni;
                pncols++;
              }
              pcontrib[icol[j]] = 0.;
            } else {
              /* the neighbor is a strongly connected fine node */
              ci = icol[j];
              ierr = MatRestoreRow(lA[0],i,&ncols,&icol,&vcol);CHKERRQ(ierr);
              ierr = MatGetRow(lA[0],ci,&ncols,&icol,&vcol);CHKERRQ(ierr);
              for (k=0;k<ncols;k++) {
                if (*(PetscInt*)&(lcid[icol[k]]) >= 0 && pcontrib[icol[k]] != 0.) {
                  if (PetscAbsScalar(pcontrib[icol[k]]) > 0.0) {
                    lni = *(PetscInt*)&(lcid[icol[k]]);
                    pvcol[pncols] = -pcontrib[icol[k]]*diag;
                    picol[pncols] = lni;
                    pncols++;
                  }
                  pcontrib[icol[k]] = 0.;
                }
              }
              ierr = MatRestoreRow(lA[0],ci,&ncols,&icol,&vcol);CHKERRQ(ierr);
              ierr = MatGetRow(lA[0],i,&ncols,&icol,&vcol);CHKERRQ(ierr);
            }
            pcontrib[icol[j]] = 0.;
          }
          ierr = MatRestoreRow(lA[0],i,&ncols,&icol,&vcol);CHKERRQ(ierr);
        }
      }
      ci = gidx[i];
      li = gidx[i] - fs;
      if (pncols > 0) {
        ierr = MatSetValues(*P,1,&ci,pncols,picol,pvcol,INSERT_VALUES);CHKERRQ(ierr);
      }
    }
  }
  ierr = ISRestoreIndices(lis,&gidx);CHKERRQ(ierr);
  ierr = VecRestoreArray(lv,&lcid);CHKERRQ(ierr);

  ierr = PetscFree(pcontrib);CHKERRQ(ierr);
  ierr = PetscFree(picol);CHKERRQ(ierr);
  ierr = PetscFree(pvcol);CHKERRQ(ierr);
  ierr = PetscFree(lsparse);CHKERRQ(ierr);
  ierr = PetscFree(gsparse);CHKERRQ(ierr);
  ierr = ISDestroy(&lis);CHKERRQ(ierr);
  ierr = MatDestroyMatrices(1,&lA);CHKERRQ(ierr);
  ierr = VecDestroy(&lv);CHKERRQ(ierr);
  ierr = VecDestroy(&cv);CHKERRQ(ierr);
  ierr = VecDestroy(&v);CHKERRQ(ierr);
  ierr = VecScatterDestroy(&lscat);CHKERRQ(ierr);

  ierr = MatAssemblyBegin(*P, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(*P, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

  /*
  Mat Pold;
  ierr = PCGAMGProlongator_Classical(pc,A,G,agg_lists,&Pold);CHKERRQ(ierr);
  ierr = MatView(Pold,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  ierr = MatView(*P,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  ierr = MatDestroy(&Pold);CHKERRQ(ierr);
   */
  PetscFunctionReturn(0);
}
コード例 #4
0
ファイル: hem.c プロジェクト: erdc-cm/petsc-dev
PetscErrorCode heavyEdgeMatchAgg(IS perm,Mat a_Gmat,PetscInt verbose,PetscCoarsenData **a_locals_llist)
{
  PetscErrorCode   ierr;
  PetscBool        isMPI;
  MPI_Comm         wcomm = ((PetscObject)a_Gmat)->comm;
  PetscInt         sub_it,kk,n,ix,*idx,*ii,iter,Iend,my0;
  PetscMPIInt      rank,size;
  const PetscInt   nloc = a_Gmat->rmap->n,n_iter=6; /* need to figure out how to stop this */
  PetscInt         *lid_cprowID,*lid_gid;
  PetscBool        *lid_matched;
  Mat_SeqAIJ       *matA, *matB=0;
  Mat_MPIAIJ       *mpimat=0;
  PetscScalar      one=1.;
  PetscCoarsenData *agg_llists = PETSC_NULL,*deleted_list = PETSC_NULL;
  Mat              cMat,tMat,P;
  MatScalar        *ap;
  PetscMPIInt      tag1,tag2;

  PetscFunctionBegin;
  ierr = MPI_Comm_rank(wcomm, &rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(wcomm, &size);CHKERRQ(ierr);
  ierr = MatGetOwnershipRange(a_Gmat, &my0, &Iend);CHKERRQ(ierr);
  ierr = PetscCommGetNewTag(wcomm, &tag1);CHKERRQ(ierr);
  ierr = PetscCommGetNewTag(wcomm, &tag2);CHKERRQ(ierr);

  ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_gid);CHKERRQ(ierr); /* explicit array needed */
  ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_cprowID);CHKERRQ(ierr);
  ierr = PetscMalloc(nloc*sizeof(PetscBool), &lid_matched);CHKERRQ(ierr);

  ierr = PetscCDCreate(nloc, &agg_llists);CHKERRQ(ierr);
  /* ierr = PetscCDSetChuckSize(agg_llists, nloc+1);CHKERRQ(ierr); */
  *a_locals_llist = agg_llists;
  ierr = PetscCDCreate(size, &deleted_list);CHKERRQ(ierr);
  ierr = PetscCDSetChuckSize(deleted_list, 100);CHKERRQ(ierr);
  /* setup 'lid_gid' for scatters and add self to all lists */
  for (kk=0;kk<nloc;kk++) {
    lid_gid[kk] = kk + my0;
    ierr = PetscCDAppendID(agg_llists, kk, my0+kk);CHKERRQ(ierr);
  }

  /* make a copy of the graph, this gets destroyed in iterates */
  ierr = MatDuplicate(a_Gmat,MAT_COPY_VALUES,&cMat);CHKERRQ(ierr);
  ierr = PetscObjectTypeCompare((PetscObject)a_Gmat, MATMPIAIJ, &isMPI);CHKERRQ(ierr);
  iter = 0;
  while(iter++ < n_iter) {
    PetscScalar    *cpcol_gid,*cpcol_max_ew,*cpcol_max_pe,*lid_max_ew;
    PetscBool      *cpcol_matched;
    PetscMPIInt    *cpcol_pe,proc;
    Vec            locMaxEdge,locMaxPE,ghostMaxEdge,ghostMaxPE;
    PetscInt       nEdges,n_nz_row,jj;
    Edge           *Edges;
    PetscInt       gid;
    const PetscInt *perm_ix, n_sub_its = 120;

    /* get submatrices of cMat */
    if (isMPI) {
      mpimat = (Mat_MPIAIJ*)cMat->data;
      matA = (Mat_SeqAIJ*)mpimat->A->data;
      matB = (Mat_SeqAIJ*)mpimat->B->data;
      /* force compressed storage of B */
      matB->compressedrow.check = PETSC_TRUE;
      ierr = MatCheckCompressedRow(mpimat->B,&matB->compressedrow,matB->i,cMat->rmap->n,-1.0);CHKERRQ(ierr);
      assert(matB->compressedrow.use);
    } else {
      matA = (Mat_SeqAIJ*)cMat->data;
    }
    assert(matA && !matA->compressedrow.use);
    assert(matB==0 || matB->compressedrow.use);

    /* set max edge on nodes */
    ierr = MatGetVecs(cMat, &locMaxEdge, 0);CHKERRQ(ierr);
    ierr = MatGetVecs(cMat, &locMaxPE, 0);CHKERRQ(ierr);

    /* get 'cpcol_pe' & 'cpcol_gid' & init. 'cpcol_matched' using 'mpimat->lvec' */
    if (mpimat) {
      Vec         vec; 
      PetscScalar vval;

      ierr = MatGetVecs(cMat, &vec, 0);CHKERRQ(ierr);
      /* cpcol_pe */
      vval = (PetscScalar)(rank);
      for (kk=0,gid=my0;kk<nloc;kk++,gid++) {
        ierr = VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
      }
      ierr = VecAssemblyBegin(vec);CHKERRQ(ierr);
      ierr = VecAssemblyEnd(vec);CHKERRQ(ierr);
      ierr = VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecGetArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* get proc ID in 'cpcol_gid' */
      ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr);
      ierr = PetscMalloc(n*sizeof(PetscInt), &cpcol_pe);CHKERRQ(ierr);
      for (kk=0;kk<n;kk++) cpcol_pe[kk] = (PetscMPIInt)PetscRealPart(cpcol_gid[kk]);
      ierr = VecRestoreArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr);

      /* cpcol_gid */
      for (kk=0,gid=my0;kk<nloc;kk++,gid++) {
        vval = (PetscScalar)(gid);
        ierr = VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
      }
      ierr = VecAssemblyBegin(vec);CHKERRQ(ierr);
      ierr = VecAssemblyEnd(vec);CHKERRQ(ierr);
      ierr = VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecDestroy(&vec);CHKERRQ(ierr);
      ierr = VecGetArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* get proc ID in 'cpcol_gid' */

      /* cpcol_matched */
      ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr);
      ierr = PetscMalloc(n*sizeof(PetscBool), &cpcol_matched);CHKERRQ(ierr);
      for (kk=0;kk<n;kk++) cpcol_matched[kk] = PETSC_FALSE;
    }

    /* need an inverse map - locals */
    for (kk=0;kk<nloc;kk++) lid_cprowID[kk] = -1;
    /* set index into compressed row 'lid_cprowID' */
    if (matB) {
      ii = matB->compressedrow.i;
      for (ix=0; ix<matB->compressedrow.nrows; ix++) {
        lid_cprowID[matB->compressedrow.rindex[ix]] = ix;
      }
    }

    /* get removed IS, use '' */
    /* if (iter==1) { */
    /*   PetscInt *lid_rem,idx; */
    /*   ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_rem);CHKERRQ(ierr); */
    /*   for (kk=idx=0;kk<nloc;kk++){ */
    /*     PetscInt nn,lid=kk; */
    /*     ii = matA->i; nn = ii[lid+1] - ii[lid]; */
    /*     if ((ix=lid_cprowID[lid]) != -1) { /\* if I have any ghost neighbors *\/ */
    /*       ii = matB->compressedrow.i; */
    /*       nn += ii[ix+1] - ii[ix]; */
    /*     } */
    /*     if (nn < 2) { */
    /*       lid_rem[idx++] = kk + my0; */
    /*     } */
    /*   } */
    /*   ierr = PetscCDSetRemovedIS(agg_llists, wcomm, idx, lid_rem);CHKERRQ(ierr); */
    /*   ierr = PetscFree(lid_rem);CHKERRQ(ierr); */
    /* } */

    /* compute 'locMaxEdge' & 'locMaxPE', and create list of edges, count edges' */
    for (nEdges=0,kk=0,gid=my0;kk<nloc;kk++,gid++){
      PetscReal   max_e = 0., tt;
      PetscScalar vval;
      PetscInt    lid = kk;
      PetscMPIInt max_pe=rank,pe;
      ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
      ap = matA->a + ii[lid];
      for (jj=0; jj<n; jj++) {
        PetscInt lidj = idx[jj];
        if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
        if (lidj > lid) nEdges++;
      }
      if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
        ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
        ap = matB->a + ii[ix];
        idx = matB->j + ii[ix];
        for (jj=0 ; jj<n ; jj++) {
          if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
          nEdges++;
          if ((pe=cpcol_pe[idx[jj]]) > max_pe) max_pe = pe;
        }
      }
      vval = max_e;
      ierr = VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr);

      vval = (PetscScalar)max_pe;
      ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr);
    }
    ierr = VecAssemblyBegin(locMaxEdge);CHKERRQ(ierr);
    ierr = VecAssemblyEnd(locMaxEdge);CHKERRQ(ierr);
    ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr);
    ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr);

    /* get 'cpcol_max_ew' & 'cpcol_max_pe' */
    if (mpimat) {
      ierr = VecDuplicate(mpimat->lvec, &ghostMaxEdge);CHKERRQ(ierr);
      ierr = VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);

      ierr = VecDuplicate(mpimat->lvec, &ghostMaxPE);CHKERRQ(ierr);
      ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr =   VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecGetArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
    }

    /* setup sorted list of edges */
    ierr = PetscMalloc(nEdges*sizeof(Edge), &Edges);CHKERRQ(ierr);
    ierr = ISGetIndices(perm, &perm_ix);CHKERRQ(ierr);
    for (nEdges=n_nz_row=kk=0;kk<nloc;kk++){
      PetscInt nn, lid = perm_ix[kk];
      ii = matA->i; nn = n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
      ap = matA->a + ii[lid];
      for (jj=0; jj<n; jj++) {
        PetscInt lidj = idx[jj];        assert(PetscRealPart(ap[jj])>0.);
        if (lidj > lid) {
          Edges[nEdges].lid0 = lid;
          Edges[nEdges].gid1 = lidj + my0;
          Edges[nEdges].cpid1 = -1;
          Edges[nEdges].weight = PetscRealPart(ap[jj]);
          nEdges++;
        }
      }
      if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
        ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
        ap = matB->a + ii[ix];
        idx = matB->j + ii[ix];
        nn += n;
        for (jj=0 ; jj<n ; jj++) {
          assert(PetscRealPart(ap[jj])>0.);
          Edges[nEdges].lid0 = lid;
          Edges[nEdges].gid1 = (PetscInt)PetscRealPart(cpcol_gid[idx[jj]]);
          Edges[nEdges].cpid1 = idx[jj];
          Edges[nEdges].weight = PetscRealPart(ap[jj]);
          nEdges++;
        }
      }
      if (nn > 1) n_nz_row++;
      else if (iter == 1){
        /* should select this because it is technically in the MIS but lets not */
        ierr = PetscCDRemoveAll(agg_llists, lid);CHKERRQ(ierr);
      }
    }
    ierr = ISRestoreIndices(perm,&perm_ix);CHKERRQ(ierr);

    qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);

    /* projection matrix */
    ierr = MatCreateAIJ(wcomm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 1, 0, 1, 0, &P);CHKERRQ(ierr);

    /* clear matched flags */
    for (kk=0;kk<nloc;kk++) lid_matched[kk] = PETSC_FALSE;
    /* process - communicate - process */
    for (sub_it=0;sub_it<n_sub_its;sub_it++){
      PetscInt nactive_edges;

      ierr = VecGetArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
      for (kk=nactive_edges=0;kk<nEdges;kk++){
        /* HEM */
        const Edge *e = &Edges[kk];
        const PetscInt lid0=e->lid0,gid1=e->gid1,cpid1=e->cpid1,gid0=lid0+my0,lid1=gid1-my0;
        PetscBool isOK = PETSC_TRUE;

        /* skip if either (local) vertex is done already */
        if (lid_matched[lid0] || (gid1>=my0 && gid1<Iend && lid_matched[gid1-my0])) {
          continue;
        }
        /* skip if ghost vertex is done */
        if (cpid1 != -1 && cpcol_matched[cpid1]) {
          continue;
        }

        nactive_edges++;
        /* skip if I have a bigger edge someplace (lid_max_ew gets updated) */
        if (PetscRealPart(lid_max_ew[lid0]) > e->weight + 1.e-12) {
          continue;
        }

        if (cpid1 == -1) {
          if (PetscRealPart(lid_max_ew[lid1]) > e->weight + 1.e-12) {
            continue;
          }
        } else {
          /* see if edge might get matched on other proc */
          PetscReal g_max_e = PetscRealPart(cpcol_max_ew[cpid1]);
          if (g_max_e > e->weight + 1.e-12) {
            continue;
          } else if (e->weight > g_max_e - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[cpid1]) > rank) {
            /* check for max_e == to this edge and larger processor that will deal with this */
            continue;
          }
        }

        /* check ghost for v0 */
        if (isOK){
          PetscReal max_e,ew;
          if ((ix=lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
            ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
            ap = matB->a + ii[ix];
            idx = matB->j + ii[ix];
            for (jj=0 ; jj<n && isOK; jj++) {
              PetscInt lidj = idx[jj];
              if (cpcol_matched[lidj]) continue;
              ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
              /* check for max_e == to this edge and larger processor that will deal with this */
              if (ew > max_e - 1.e-12 && ew > PetscRealPart(lid_max_ew[lid0]) - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank){
                isOK = PETSC_FALSE;
              }
            }
          }

          /* for v1 */
          if (cpid1 == -1 && isOK){
            if ((ix=lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
              ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
              ap = matB->a + ii[ix];
              idx = matB->j + ii[ix];
              for (jj=0 ; jj<n && isOK ; jj++) {
                PetscInt lidj = idx[jj];
                if (cpcol_matched[lidj]) continue;
                ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
                /* check for max_e == to this edge and larger processor that will deal with this */
                if (ew > max_e - 1.e-12 && ew > PetscRealPart(lid_max_ew[lid1]) - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) {
                  isOK = PETSC_FALSE;
                }
              }
            }
          }
        }

        /* do it */
        if (isOK){
          if (cpid1 == -1) {
            lid_matched[lid1] = PETSC_TRUE;  /* keep track of what we've done this round */
            ierr = PetscCDAppendRemove(agg_llists, lid0, lid1);CHKERRQ(ierr);
          } else if (sub_it != n_sub_its-1) {
            /* add gid1 to list of ghost deleted by me -- I need their children */
            proc = cpcol_pe[cpid1];
            cpcol_matched[cpid1] = PETSC_TRUE; /* messing with VecGetArray array -- needed??? */
            ierr = PetscCDAppendID(deleted_list, proc, cpid1);CHKERRQ(ierr); /* cache to send messages */
            ierr = PetscCDAppendID(deleted_list, proc, lid0);CHKERRQ(ierr);
          } else {
            continue;
          }
          lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
          /* set projection */
          ierr = MatSetValues(P,1,&gid0,1,&gid0,&one,INSERT_VALUES);CHKERRQ(ierr);
          ierr = MatSetValues(P,1,&gid1,1,&gid0,&one,INSERT_VALUES);CHKERRQ(ierr);
        } /* matched */
      } /* edge loop */

      /* deal with deleted ghost on first pass */
      if (size>1 && sub_it != n_sub_its-1){
        PetscCDPos  pos;  PetscBool ise = PETSC_FALSE;
        PetscInt    nSend1, **sbuffs1,nSend2;
#define REQ_BF_SIZE 100
        MPI_Request *sreqs2[REQ_BF_SIZE],*rreqs2[REQ_BF_SIZE];
        MPI_Status  status;

        /* send request */
        for (proc=0,nSend1=0;proc<size;proc++){
          ierr = PetscCDEmptyAt(deleted_list,proc,&ise);CHKERRQ(ierr);
          if (!ise) nSend1++;
        }
        ierr = PetscMalloc(nSend1*sizeof(PetscInt*), &sbuffs1);CHKERRQ(ierr);
        /* ierr = PetscMalloc4(nSend1, PetscInt*, sbuffs1, nSend1, PetscInt*, rbuffs1, nSend1, MPI_Request*, sreqs1, nSend1, MPI_Request*, rreqs1);CHKERRQ(ierr); */
        /* PetscFree4(sbuffs1,rbuffs1,sreqs1,rreqs1); */
        for (proc=0,nSend1=0;proc<size;proc++){
          /* count ghosts */
          ierr = PetscCDSizeAt(deleted_list,proc,&n);CHKERRQ(ierr);
          if (n>0){
#define CHUNCK_SIZE 100
            PetscInt    *sbuff,*pt;
            MPI_Request *request;
            assert(n%2==0);
            n /= 2;
            ierr = PetscMalloc((2 + 2*n + n*CHUNCK_SIZE)*sizeof(PetscInt) + 2*sizeof(MPI_Request), &sbuff);CHKERRQ(ierr);
            /* PetscMalloc4(2+2*n,PetscInt,sbuffs1[nSend1],n*CHUNCK_SIZE,PetscInt,rbuffs1[nSend1],1,MPI_Request,rreqs2[nSend1],1,MPI_Request,sreqs2[nSend1]); */
            /* save requests */
            sbuffs1[nSend1] = sbuff;
            request = (MPI_Request*)sbuff;
            sbuff = pt = (PetscInt*)(request+1);
            *pt++ = n; *pt++ = rank;

            ierr = PetscCDGetHeadPos(deleted_list,proc,&pos);CHKERRQ(ierr);
            while(pos){
              PetscInt lid0, cpid, gid;
              ierr = PetscLLNGetID(pos, &cpid);CHKERRQ(ierr);
              gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
              ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
              ierr = PetscLLNGetID(pos, &lid0);CHKERRQ(ierr);
              ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
              *pt++ = gid; *pt++ = lid0;
            }
            /* send request tag1 [n, proc, n*[gid1,lid0] ] */
            ierr = MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, wcomm, request);CHKERRQ(ierr);
            /* post recieve */
            request = (MPI_Request*)pt;
            rreqs2[nSend1] = request; /* cache recv request */
            pt = (PetscInt*)(request+1);
            ierr = MPI_Irecv(pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, wcomm, request);CHKERRQ(ierr);
            /* clear list */
            ierr = PetscCDRemoveAll(deleted_list, proc);CHKERRQ(ierr);
            nSend1++;
          }
        }
        /* recieve requests, send response, clear lists */
        kk = nactive_edges;
        ierr = MPI_Allreduce(&kk,&nactive_edges,1,MPIU_INT,MPI_SUM,wcomm);CHKERRQ(ierr); /* not correct syncronization and global */
        nSend2 = 0;
        while(1){
#define BF_SZ 10000
          PetscMPIInt flag,count;
          PetscInt    rbuff[BF_SZ],*pt,*pt2,*pt3,count2,*sbuff,count3;
          MPI_Request *request;
          ierr = MPI_Iprobe(MPI_ANY_SOURCE, tag1, wcomm, &flag, &status);CHKERRQ(ierr);
          if (!flag) break;
          ierr = MPI_Get_count(&status, MPIU_INT, &count);CHKERRQ(ierr);
          if (count > BF_SZ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for recieve: %d",count);
          proc = status.MPI_SOURCE;
          /* recieve request tag1 [n, proc, n*[gid1,lid0] ] */
          ierr = MPI_Recv(rbuff, count, MPIU_INT, proc, tag1, wcomm, &status);CHKERRQ(ierr);
          /* count sends */
          pt = rbuff; count3 = count2 = 0;
          n = *pt++; kk = *pt++;           assert(kk==proc);
          while(n--){
            PetscInt gid1=*pt++, lid1=gid1-my0; kk=*pt++;  assert(lid1>=0 && lid1<nloc);
            if (lid_matched[lid1]){
              PetscPrintf(PETSC_COMM_SELF,"\t *** [%d]%s %d) ERROR recieved deleted gid %d, deleted by (lid) %d from proc %d\n",rank,__FUNCT__,sub_it,gid1,kk);
              PetscSleep(1);
            }
            assert(!lid_matched[lid1]);
            lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
            ierr = PetscCDSizeAt(agg_llists, lid1, &kk);CHKERRQ(ierr);
            count2 += kk + 2;
            count3++; /* number of verts requested (n) */
          }
          assert(pt-rbuff==count);
          if (count2 > count3*CHUNCK_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Irecv will be too small: %d",count2);
          /* send tag2 *[lid0, n, n*[gid] ] */
          ierr = PetscMalloc(count2*sizeof(PetscInt) + sizeof(MPI_Request), &sbuff);CHKERRQ(ierr);
          request = (MPI_Request*)sbuff;
          sreqs2[nSend2++] = request; /* cache request */
          if (nSend2==REQ_BF_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for requests: %d",nSend2);
          pt2 = sbuff = (PetscInt*)(request+1);
          pt  = rbuff;
          n = *pt++; kk = *pt++;           assert(kk==proc);
          while(n--){
            /* read [n, proc, n*[gid1,lid0] */
            PetscInt gid1=*pt++, lid1=gid1-my0, lid0=*pt++;   assert(lid1>=0 && lid1<nloc);
            /* write [lid0, n, n*[gid] ] */
            *pt2++ = lid0;
            pt3 = pt2++; /* save pointer for later */
            /* for (pos=PetscCDGetHeadPos(agg_llists,lid1) ; pos ; pos=PetscCDGetNextPos(agg_llists,lid1,pos)){ */
            ierr = PetscCDGetHeadPos(agg_llists,lid1,&pos);CHKERRQ(ierr);
            while(pos){
              PetscInt gid;
              ierr = PetscLLNGetID(pos, &gid);CHKERRQ(ierr);
              ierr = PetscCDGetNextPos(agg_llists,lid1,&pos);CHKERRQ(ierr);
              *pt2++ = gid;
            }
            *pt3 = (pt2-pt3)-1;
            /* clear list */
            ierr = PetscCDRemoveAll(agg_llists, lid1);CHKERRQ(ierr);
          }
          assert(pt2-sbuff==count2); assert(pt-rbuff==count);
          /* send requested data tag2 *[lid0, n, n*[gid1] ] */
          ierr = MPI_Isend(sbuff, count2, MPIU_INT, proc, tag2, wcomm, request);CHKERRQ(ierr);
        }

        /* recieve tag2 *[lid0, n, n*[gid] ] */
        for (kk=0;kk<nSend1;kk++){
          PetscMPIInt count;
          MPI_Request *request;
          PetscInt    *pt, *pt2;
          request = rreqs2[kk]; /* no need to free -- buffer is in 'sbuffs1' */
          ierr = MPI_Wait(request, &status);CHKERRQ(ierr);
          ierr = MPI_Get_count(&status, MPIU_INT, &count);CHKERRQ(ierr);
          pt = pt2 = (PetscInt*)(request+1);
          while(pt-pt2 < count){
            PetscInt lid0 = *pt++, n = *pt++;           assert(lid0>=0 && lid0<nloc);
            while(n--){
              PetscInt gid1 = *pt++;
              ierr = PetscCDAppendID(agg_llists, lid0, gid1);CHKERRQ(ierr);
            }
          }
          assert(pt-pt2==count);
        }

        /* wait for tag1 isends */
        while(nSend1--){
          MPI_Request *request;
          request = (MPI_Request*)sbuffs1[nSend1];
          ierr = MPI_Wait(request, &status);CHKERRQ(ierr);
          ierr = PetscFree(request);CHKERRQ(ierr);
        }
        ierr = PetscFree(sbuffs1);CHKERRQ(ierr);

        /* wait for tag2 isends */
        while(nSend2--){
          MPI_Request *request = sreqs2[nSend2];
          ierr = MPI_Wait(request, &status);CHKERRQ(ierr);
          ierr = PetscFree(request);CHKERRQ(ierr);
        }

        ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
        ierr = VecRestoreArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);

        /* get 'cpcol_matched' - use locMaxPE, ghostMaxEdge, cpcol_max_ew */
        for (kk=0,gid=my0;kk<nloc;kk++,gid++) {
          PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
          ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
        }
        ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr);
        ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr);
        ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr =   VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
        ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr);
        for (kk=0;kk<n;kk++) {
          cpcol_matched[kk] = (PetscBool)(PetscRealPart(cpcol_max_ew[kk]) != 0.0);
        }

        ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
      } /* size > 1 */

      /* compute 'locMaxEdge' */
      ierr = VecRestoreArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
      for (kk=0,gid=my0;kk<nloc;kk++,gid++){
        PetscReal   max_e = 0.,tt;
        PetscScalar vval;
        PetscInt    lid = kk;
        if (lid_matched[lid]) vval = 0.;
        else {
          ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
          ap = matA->a + ii[lid];
          for (jj=0; jj<n; jj++) {
            PetscInt lidj = idx[jj];
            if (lid_matched[lidj]) continue; /* this is new - can change local max */
            if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
          }
          if (lid_cprowID && (ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
            ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
            ap = matB->a + ii[ix];
            idx = matB->j + ii[ix];
            for (jj=0 ; jj<n ; jj++) {
              PetscInt lidj = idx[jj];
              if (cpcol_matched[lidj]) continue;
              if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
            }
          }
        }
        vval = (PetscScalar)max_e;
        ierr = VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
      }
      ierr = VecAssemblyBegin(locMaxEdge);CHKERRQ(ierr);
      ierr = VecAssemblyEnd(locMaxEdge);CHKERRQ(ierr);

      if (size>1 && sub_it != n_sub_its-1){
        /* compute 'cpcol_max_ew' */
        ierr = VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr =   VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
        ierr = VecGetArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);

        /* compute 'cpcol_max_pe' */
        for (kk=0,gid=my0;kk<nloc;kk++,gid++){
          PetscInt    lid = kk;
          PetscReal   ew,v1_max_e,v0_max_e=PetscRealPart(lid_max_ew[lid]);
          PetscScalar vval;
          PetscMPIInt max_pe=rank,pe;
          if (lid_matched[lid]) vval = (PetscScalar)rank;
          else if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
            ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
            ap = matB->a + ii[ix];
            idx = matB->j + ii[ix];
            for (jj=0 ; jj<n ; jj++) {
              PetscInt lidj = idx[jj];
              if (cpcol_matched[lidj]) continue;
              ew = PetscRealPart(ap[jj]); v1_max_e = PetscRealPart(cpcol_max_ew[lidj]);
              /* get max pe that has a max_e == to this edge w */
              if ((pe=cpcol_pe[idx[jj]]) > max_pe && ew > v1_max_e - 1.e-12 && ew > v0_max_e - 1.e-12) max_pe = pe;
              assert(ew < v0_max_e + 1.e-12 && ew < v1_max_e + 1.e-12);
            }
            vval = (PetscScalar)max_pe;
          }
          ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr);
        }
        ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr);
        ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr);

        ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr =   VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr = VecGetArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
        ierr = VecRestoreArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
      } /* deal with deleted ghost */
      if (verbose>2) PetscPrintf(wcomm,"\t[%d]%s %d.%d: %d active edges.\n",
                                rank,__FUNCT__,iter,sub_it,nactive_edges);
      if (!nactive_edges) break;
    } /* sub_it loop */

    /* clean up iteration */
    ierr = PetscFree(Edges);CHKERRQ(ierr);
    if (mpimat){
      ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
      ierr = VecDestroy(&ghostMaxEdge);CHKERRQ(ierr);
      ierr = VecRestoreArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
      ierr = VecDestroy(&ghostMaxPE);CHKERRQ(ierr);
      ierr = PetscFree(cpcol_pe);CHKERRQ(ierr);
      ierr = PetscFree(cpcol_matched);CHKERRQ(ierr);
    }

    ierr = VecDestroy(&locMaxEdge);CHKERRQ(ierr);
    ierr = VecDestroy(&locMaxPE);CHKERRQ(ierr);

    if (mpimat){
      ierr = VecRestoreArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr);
    }

    /* create next G if needed */
    if (iter == n_iter) { /* hard wired test - need to look at full surrounded nodes or something */
      ierr = MatDestroy(&P);CHKERRQ(ierr);
      ierr = MatDestroy(&cMat);CHKERRQ(ierr);
      break;
    } else {
      Vec diag;
      /* add identity for unmatched vertices so they stay alive */
      for (kk=0,gid=my0;kk<nloc;kk++,gid++){
        if (!lid_matched[kk]) {
          gid = kk+my0;
          ierr = MatGetRow(cMat,gid,&n,0,0);CHKERRQ(ierr);
          if (n>1){
            ierr = MatSetValues(P,1,&gid,1,&gid,&one,INSERT_VALUES);CHKERRQ(ierr);
          }
          ierr = MatRestoreRow(cMat,gid,&n,0,0);CHKERRQ(ierr);
        }
      }
      ierr = MatAssemblyBegin(P,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
      ierr = MatAssemblyEnd(P,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

      /* project to make new graph with colapsed edges */
      ierr = MatPtAP(cMat,P,MAT_INITIAL_MATRIX,1.0,&tMat);CHKERRQ(ierr);
      ierr = MatDestroy(&P);CHKERRQ(ierr);
      ierr = MatDestroy(&cMat);CHKERRQ(ierr);
      cMat = tMat;
      ierr = MatGetVecs(cMat, &diag, 0);CHKERRQ(ierr);
      ierr = MatGetDiagonal(cMat, diag);CHKERRQ(ierr); /* effectively PCJACOBI */
      ierr = VecReciprocal(diag);CHKERRQ(ierr);
      ierr = VecSqrtAbs(diag);CHKERRQ(ierr);
      ierr = MatDiagonalScale(cMat, diag, diag);CHKERRQ(ierr);
      ierr = VecDestroy(&diag);CHKERRQ(ierr);
    }
  } /* coarsen iterator */

  /* make fake matrix */
  if (size>1){
    Mat        mat;
    PetscCDPos pos;
    PetscInt   gid, NN, MM, jj = 0, mxsz = 0;

    for (kk=0;kk<nloc;kk++){
      ierr = PetscCDSizeAt(agg_llists, kk, &jj);CHKERRQ(ierr);
      if (jj > mxsz)  mxsz = jj;
    }
    ierr = MatGetSize(a_Gmat, &MM, &NN);CHKERRQ(ierr);
    if (mxsz > MM-nloc) mxsz = MM-nloc;

    ierr = MatCreateAIJ(wcomm, nloc, nloc,PETSC_DETERMINE, PETSC_DETERMINE,0, 0, mxsz, 0, &mat);CHKERRQ(ierr);

    /* */
    for (kk=0,gid=my0;kk<nloc;kk++,gid++){
      /* for (pos=PetscCDGetHeadPos(agg_llists,kk) ; pos ; pos=PetscCDGetNextPos(agg_llists,kk,pos)){ */
      ierr = PetscCDGetHeadPos(agg_llists,kk,&pos);CHKERRQ(ierr);
      while(pos){
        PetscInt gid1;
        ierr = PetscLLNGetID(pos, &gid1);CHKERRQ(ierr);
        ierr = PetscCDGetNextPos(agg_llists,kk,&pos);CHKERRQ(ierr);

        if (gid1 < my0 || gid1 >= my0+nloc) {
          ierr = MatSetValues(mat,1,&gid,1,&gid1,&one,ADD_VALUES);CHKERRQ(ierr);
        }
      }
    }
    ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

    ierr = PetscCDSetMat(agg_llists, mat);CHKERRQ(ierr);
  }

  ierr = PetscFree(lid_cprowID);CHKERRQ(ierr);
  ierr = PetscFree(lid_gid);CHKERRQ(ierr);
  ierr = PetscFree(lid_matched);CHKERRQ(ierr);
  ierr = PetscCDDestroy(deleted_list);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}