예제 #1
0
파일: mis.c 프로젝트: erdc-cm/petsc-dev
PetscErrorCode maxIndSetAgg(IS perm,Mat Gmat,PetscBool strict_aggs,PetscInt verbose,PetscCoarsenData **a_locals_llist)
{
  PetscErrorCode   ierr;
  PetscBool        isMPI;
  Mat_SeqAIJ       *matA, *matB = 0;
  MPI_Comm         wcomm = ((PetscObject)Gmat)->comm;
  Vec              locState, ghostState;
  PetscInt         num_fine_ghosts,kk,n,ix,j,*idx,*ii,iter,Iend,my0,nremoved;
  Mat_MPIAIJ       *mpimat = 0;
  PetscScalar      *cpcol_gid,*cpcol_state;
  PetscMPIInt      mype,npe;
  const PetscInt   *perm_ix;
  PetscInt         nDone, nselected = 0;
  const PetscInt   nloc = Gmat->rmap->n;
  PetscInt         *lid_cprowID, *lid_gid;
  PetscBool        *lid_removed;
  PetscScalar      *lid_parent_gid = PETSC_NULL; /* only used for strict aggs */
  PetscScalar      *lid_state;
  PetscCoarsenData *agg_lists;

  PetscFunctionBegin;
  ierr = MPI_Comm_rank(wcomm, &mype);CHKERRQ(ierr);
  ierr = MPI_Comm_size(wcomm, &npe);CHKERRQ(ierr);

  /* get submatrices */
  ierr = PetscObjectTypeCompare((PetscObject)Gmat, MATMPIAIJ, &isMPI);CHKERRQ(ierr);
  if (isMPI) {
    mpimat = (Mat_MPIAIJ*)Gmat->data;
    matA = (Mat_SeqAIJ*)mpimat->A->data;
    matB = (Mat_SeqAIJ*)mpimat->B->data;
    /* force compressed storage of B */
    matB->compressedrow.check = PETSC_TRUE;
    ierr = MatCheckCompressedRow(mpimat->B,&matB->compressedrow,matB->i,Gmat->rmap->n,-1.0);CHKERRQ(ierr);
    assert(matB->compressedrow.use);
  } else {
    PetscBool      isAIJ;
    ierr = PetscObjectTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isAIJ);CHKERRQ(ierr);
    assert(isAIJ);
    matA = (Mat_SeqAIJ*)Gmat->data;
  }
  assert(matA && !matA->compressedrow.use);
  assert(matB==0 || matB->compressedrow.use);
  /* get vector */
  ierr = MatGetVecs(Gmat, &locState, 0);CHKERRQ(ierr);

  ierr = MatGetOwnershipRange(Gmat,&my0,&Iend);CHKERRQ(ierr);

  if (mpimat) {
    PetscInt gid;
    for (kk=0,gid=my0;kk<nloc;kk++,gid++) {
      PetscScalar v = (PetscScalar)(gid);
      ierr = VecSetValues(locState, 1, &gid, &v, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
    }
    ierr = VecAssemblyBegin(locState);CHKERRQ(ierr);
    ierr = VecAssemblyEnd(locState);CHKERRQ(ierr);
    ierr = VecScatterBegin(mpimat->Mvctx,locState,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
    ierr =   VecScatterEnd(mpimat->Mvctx,locState,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
    ierr = VecGetArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* get proc ID in 'cpcol_gid' */
    ierr = VecDuplicate(mpimat->lvec, &ghostState);CHKERRQ(ierr); /* need 2nd compressed col. of off proc data */
    ierr = VecGetLocalSize(mpimat->lvec, &num_fine_ghosts);CHKERRQ(ierr);
    ierr = VecSet(ghostState, (PetscScalar)((PetscReal)NOT_DONE));CHKERRQ(ierr); /* set with UNKNOWN state */
  } else num_fine_ghosts = 0;

  ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_cprowID);CHKERRQ(ierr);
  ierr = PetscMalloc((nloc+1)*sizeof(PetscInt), &lid_gid);CHKERRQ(ierr); /* explicit array needed */
  ierr = PetscMalloc(nloc*sizeof(PetscBool), &lid_removed);CHKERRQ(ierr); /* explicit array needed */
  if (strict_aggs) {
    ierr = PetscMalloc((nloc+1)*sizeof(PetscScalar), &lid_parent_gid);CHKERRQ(ierr);
  }
  ierr = PetscMalloc((nloc+1)*sizeof(PetscScalar), &lid_state);CHKERRQ(ierr);

  /* has ghost nodes for !strict and uses local indexing (yuck) */
  ierr = PetscCDCreate(strict_aggs ? nloc : num_fine_ghosts+nloc, &agg_lists);CHKERRQ(ierr);
  if (a_locals_llist) *a_locals_llist = agg_lists;

  /* need an inverse map - locals */
  for (kk=0;kk<nloc;kk++) {
    lid_cprowID[kk] = -1; lid_removed[kk] = PETSC_FALSE;
    if (strict_aggs) {
      lid_parent_gid[kk] = -1.0;
    }
    lid_gid[kk] = kk + my0;
    lid_state[kk] =  (PetscScalar)((PetscReal)NOT_DONE);
  }
  /* set index into cmpressed row 'lid_cprowID' */
  if (matB) {
    for (ix=0; ix<matB->compressedrow.nrows; ix++) {
      PetscInt lid = matB->compressedrow.rindex[ix];
      lid_cprowID[lid] = ix;
    }
  }
  /* MIS */
  iter = nremoved = nDone = 0;
  ierr = ISGetIndices(perm, &perm_ix);CHKERRQ(ierr);
  while (nDone < nloc || PETSC_TRUE) { /* asyncronous not implemented */
    iter++;
    if (mpimat) {
      ierr = VecGetArray(ghostState, &cpcol_state);CHKERRQ(ierr);
    }
    /* check all vertices */
    for (kk=0;kk<nloc;kk++){
      PetscInt lid = perm_ix[kk];
      NState state = (NState)PetscRealPart(lid_state[lid]);
      if (lid_removed[lid]) continue;
      if (state == NOT_DONE) {
        /* parallel test, delete if selected ghost */
        PetscBool isOK = PETSC_TRUE;
        if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
          ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
          idx = matB->j + ii[ix];
          for (j=0 ; j<n ; j++) {
            PetscInt cpid = idx[j]; /* compressed row ID in B mat */
            PetscInt gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
            NState statej = (NState)PetscRealPart(cpcol_state[cpid]);
            if (statej == NOT_DONE && gid >= Iend) { /* should be (pe>mype), use gid as pe proxy */
              isOK = PETSC_FALSE; /* can not delete */
              break;
            } else assert(!IS_SELECTED(statej)); /* lid is now deleted, do it */
          }
        } /* parallel test */
        if (isOK){ /* select or remove this vertex */
          nDone++;
          /* check for singleton */
          ii = matA->i; n = ii[lid+1] - ii[lid];
          if (n < 2) {
            /* if I have any ghost adj then not a sing */
            ix = lid_cprowID[lid];
            if (ix==-1 || (matB->compressedrow.i[ix+1]-matB->compressedrow.i[ix])==0){
              nremoved++;
              lid_removed[lid] = PETSC_TRUE;
              /* should select this because it is technically in the MIS but lets not */
              /* lid_state[lid] = (PetscScalar)(lid+my0); */
              continue; /* one local adj (me) and no ghost - singleton */
            }
          }
          /* SELECTED state encoded with global index */
          lid_state[lid] = (PetscScalar)(lid+my0); /* needed???? */
          nselected++;
          if (strict_aggs) {
            ierr = PetscCDAppendID(agg_lists, lid, lid+my0);CHKERRQ(ierr);
          } else {
            ierr = PetscCDAppendID(agg_lists, lid, lid);CHKERRQ(ierr);
          }
          /* delete local adj */
          idx = matA->j + ii[lid];
          for (j=0; j<n; j++) {
            PetscInt lidj = idx[j];
            NState statej = (NState)PetscRealPart(lid_state[lidj]);
            if (statej == NOT_DONE){
              nDone++;
              /* id_llist[lidj] = id_llist[lid]; id_llist[lid] = lidj; */ /* insert 'lidj' into head of llist */
              if (strict_aggs) {
                ierr = PetscCDAppendID(agg_lists, lid, lidj+my0);CHKERRQ(ierr);
              } else {
                ierr = PetscCDAppendID(agg_lists, lid, lidj);CHKERRQ(ierr);
              }
              lid_state[lidj] = (PetscScalar)(PetscReal)DELETED;  /* delete this */
            }
          }

          /* delete ghost adj of lid - deleted ghost done later for strict_aggs */
          if (!strict_aggs) {
            if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
              ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
              idx = matB->j + ii[ix];
              for (j=0 ; j<n ; j++) {
                PetscInt cpid = idx[j]; /* compressed row ID in B mat */
                NState statej = (NState)PetscRealPart(cpcol_state[cpid]);        assert(!IS_SELECTED(statej));
                if (statej == NOT_DONE) {
                  /* cpcol_state[cpid] = (PetscScalar)DELETED; this should happen later ... */
                  /* id_llist[lidj] = id_llist[lid]; id_llist[lid] = lidj; */ /* insert 'lidj' into head of llist */
                  ierr = PetscCDAppendID(agg_lists, lid, nloc+cpid);CHKERRQ(ierr);
                }
              }
            }
          }
        } /* selected */
      } /* not done vertex */
    } /* vertex loop */

    /* update ghost states and count todos */
    if (mpimat) {
      ierr = VecRestoreArray(ghostState, &cpcol_state);CHKERRQ(ierr);
      /* put lid state in 'locState' */
      ierr = VecSetValues(locState, nloc, lid_gid, lid_state, INSERT_VALUES);CHKERRQ(ierr);
      ierr = VecAssemblyBegin(locState);CHKERRQ(ierr);
      ierr = VecAssemblyEnd(locState);CHKERRQ(ierr);
      /* scatter states, check for done */
      ierr = VecScatterBegin(mpimat->Mvctx,locState,ghostState,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr =   VecScatterEnd(mpimat->Mvctx,locState,ghostState,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      /* delete locals from selected ghosts */
      ierr = VecGetArray(ghostState, &cpcol_state);CHKERRQ(ierr);
      ii = matB->compressedrow.i;
      for (ix=0; ix<matB->compressedrow.nrows; ix++) {
        PetscInt lid = matB->compressedrow.rindex[ix]; /* local boundary node */
        NState   state = (NState)PetscRealPart(lid_state[lid]);
        if (state == NOT_DONE) {
          /* look at ghosts */
          n = ii[ix+1] - ii[ix];
          idx = matB->j + ii[ix];
          for (j=0 ; j<n ; j++) {
            PetscInt cpid = idx[j]; /* compressed row ID in B mat */
            NState statej = (NState)PetscRealPart(cpcol_state[cpid]);
            if (IS_SELECTED(statej)) { /* lid is now deleted, do it */
              nDone++;
              lid_state[lid] = (PetscScalar)(PetscReal)DELETED; /* delete this */
              if (!strict_aggs) {
                PetscInt lidj = nloc + cpid;
                /* id_llist[lid] = id_llist[lidj]; id_llist[lidj] = lid; */ /* insert 'lid' into head of ghost llist */
                ierr = PetscCDAppendID(agg_lists, lidj, lid);CHKERRQ(ierr);
              } else {
                PetscInt sgid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
                lid_parent_gid[lid] = (PetscScalar)sgid; /* keep track of proc that I belong to */
              }
              break;
            }
          }
        }
      }
      ierr = VecRestoreArray(ghostState, &cpcol_state);CHKERRQ(ierr);

      /* all done? */
      {
        PetscInt t1, t2;
        t1 = nloc - nDone; assert(t1>=0);
        ierr = MPI_Allreduce(&t1, &t2, 1, MPIU_INT, MPI_SUM, wcomm);CHKERRQ(ierr); /* synchronous version */
        if (t2 == 0) break;
      }
    } else break; /* all done */
  } /* outer parallel MIS loop */
  ierr = ISRestoreIndices(perm,&perm_ix);CHKERRQ(ierr);

  if (verbose) {
    if (verbose == 1) {
      ierr = PetscPrintf(wcomm,"\t[%d]%s removed %d of %d vertices.  %d selected.\n",mype,__FUNCT__,nremoved,nloc,nselected);CHKERRQ(ierr);
    } else {
      ierr = MPI_Allreduce(&nremoved, &n, 1, MPIU_INT, MPI_SUM, wcomm);CHKERRQ(ierr);
      ierr = MatGetSize(Gmat, &kk, &j);CHKERRQ(ierr);
      ierr = MPI_Allreduce(&nselected, &j, 1, MPIU_INT, MPI_SUM, wcomm);CHKERRQ(ierr);
      ierr = PetscPrintf(wcomm,"\t[%d]%s removed %d of %d vertices. (%d local)  %d selected.\n",mype,__FUNCT__,n,kk,nremoved,j);CHKERRQ(ierr);
    }
  }

  /* tell adj who my lid_parent_gid vertices belong to - fill in agg_lists selected ghost lists */
  if (strict_aggs && matB) {
    PetscScalar *cpcol_sel_gid;
    PetscInt cpid,*icpcol_gid;

    /* need to copy this to free buffer -- should do this globaly */
    ierr = PetscMalloc(num_fine_ghosts*sizeof(PetscInt), &icpcol_gid);CHKERRQ(ierr);
    for (cpid=0; cpid<num_fine_ghosts; cpid++) icpcol_gid[cpid] = (PetscInt)PetscRealPart(cpcol_gid[cpid]);

    /* get proc of deleted ghost */
    ierr = VecSetValues(locState, nloc, lid_gid, lid_parent_gid, INSERT_VALUES);CHKERRQ(ierr);
    ierr = VecAssemblyBegin(locState);CHKERRQ(ierr);
    ierr = VecAssemblyEnd(locState);CHKERRQ(ierr);
    ierr = VecScatterBegin(mpimat->Mvctx,locState,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
    ierr =   VecScatterEnd(mpimat->Mvctx,locState,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
    ierr = VecGetArray(mpimat->lvec, &cpcol_sel_gid);CHKERRQ(ierr); /* has pe that owns ghost */
    for (cpid=0; cpid<num_fine_ghosts; cpid++) {
      PetscInt sgid = (PetscInt)PetscRealPart(cpcol_sel_gid[cpid]);
      PetscInt gid = icpcol_gid[cpid];
      if (sgid >= my0 && sgid < Iend) { /* I own this deleted */
        PetscInt slid = sgid - my0;
        /* id_llist[lidj] = id_llist[lid]; id_llist[lid] = lidj; */ /* insert 'lidj' into head of llist */
        ierr = PetscCDAppendID(agg_lists, slid, gid);CHKERRQ(ierr);
        assert(IS_SELECTED((NState)PetscRealPart(lid_state[slid])));
      }
    }
    ierr = VecRestoreArray(mpimat->lvec, &cpcol_sel_gid);CHKERRQ(ierr);
    ierr = PetscFree(icpcol_gid);CHKERRQ(ierr);
  } else if (matB) {
    ierr = VecRestoreArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr);
  }

  /* cache IS of removed nodes, use 'lid_gid' */
  /* for (kk=n=0,ix=my0;kk<nloc;kk++,ix++) { */
  /*   if (lid_removed[kk]) lid_gid[n++] = ix; */
  /* } */
  /* assert(n==nremoved); */
  /* ierr = PetscCDSetRemovedIS(agg_lists, wcomm, n, lid_gid);CHKERRQ(ierr); */

  ierr = PetscFree(lid_cprowID);CHKERRQ(ierr);
  ierr = PetscFree(lid_gid);CHKERRQ(ierr);
  ierr = PetscFree(lid_removed);CHKERRQ(ierr);
  if (strict_aggs) {
    ierr = PetscFree(lid_parent_gid);CHKERRQ(ierr);
  }
  ierr = PetscFree(lid_state);CHKERRQ(ierr);

  if (mpimat){
    ierr = VecDestroy(&ghostState);CHKERRQ(ierr);
  }
  ierr = VecDestroy(&locState);CHKERRQ(ierr);

  PetscFunctionReturn(0);
}
예제 #2
0
파일: mis.c 프로젝트: haubentaucher/petsc
PetscErrorCode maxIndSetAgg(IS perm,Mat Gmat,PetscBool strict_aggs,PetscCoarsenData **a_locals_llist)
{
    PetscErrorCode   ierr;
    Mat_SeqAIJ       *matA,*matB=NULL;
    Mat_MPIAIJ       *mpimat=NULL;
    MPI_Comm         comm;
    PetscInt         num_fine_ghosts,kk,n,ix,j,*idx,*ii,iter,Iend,my0,nremoved,gid,lid,cpid,lidj,sgid,t1,t2,slid,nDone,nselected=0,state,statej;
    PetscInt         *cpcol_gid,*cpcol_state,*lid_cprowID,*lid_gid,*cpcol_sel_gid,*icpcol_gid,*lid_state,*lid_parent_gid=NULL;
    PetscBool        *lid_removed;
    PetscBool        isMPI,isAIJ,isOK;
    const PetscInt   *perm_ix;
    const PetscInt   nloc = Gmat->rmap->n;
    PetscCoarsenData *agg_lists;
    PetscLayout      layout;
    PetscSF          sf;

    PetscFunctionBegin;
    ierr = PetscObjectGetComm((PetscObject)Gmat,&comm);
    CHKERRQ(ierr);

    /* get submatrices */
    ierr = PetscObjectTypeCompare((PetscObject)Gmat,MATMPIAIJ,&isMPI);
    CHKERRQ(ierr);
    if (isMPI) {
        mpimat = (Mat_MPIAIJ*)Gmat->data;
        matA   = (Mat_SeqAIJ*)mpimat->A->data;
        matB   = (Mat_SeqAIJ*)mpimat->B->data;
        /* force compressed storage of B */
        ierr   = MatCheckCompressedRow(mpimat->B,matB->nonzerorowcnt,&matB->compressedrow,matB->i,Gmat->rmap->n,-1.0);
        CHKERRQ(ierr);
    } else {
        ierr = PetscObjectTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isAIJ);
        CHKERRQ(ierr);
        matA = (Mat_SeqAIJ*)Gmat->data;
    }
    ierr = MatGetOwnershipRange(Gmat,&my0,&Iend);
    CHKERRQ(ierr);
    ierr = PetscMalloc1(nloc,&lid_gid);
    CHKERRQ(ierr); /* explicit array needed */
    if (mpimat) {
        for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
            lid_gid[kk] = gid;
        }
        ierr = VecGetLocalSize(mpimat->lvec, &num_fine_ghosts);
        CHKERRQ(ierr);
        ierr = PetscMalloc1(num_fine_ghosts,&cpcol_gid);
        CHKERRQ(ierr);
        ierr = PetscMalloc1(num_fine_ghosts,&cpcol_state);
        CHKERRQ(ierr);
        ierr = PetscSFCreate(PetscObjectComm((PetscObject)Gmat),&sf);
        CHKERRQ(ierr);
        ierr = MatGetLayouts(Gmat,&layout,NULL);
        CHKERRQ(ierr);
        ierr = PetscSFSetGraphLayout(sf,layout,num_fine_ghosts,NULL,PETSC_COPY_VALUES,mpimat->garray);
        CHKERRQ(ierr);
        ierr = PetscSFBcastBegin(sf,MPIU_INT,lid_gid,cpcol_gid);
        CHKERRQ(ierr);
        ierr = PetscSFBcastEnd(sf,MPIU_INT,lid_gid,cpcol_gid);
        CHKERRQ(ierr);
        for (kk=0; kk<num_fine_ghosts; kk++) {
            cpcol_state[kk]=MIS_NOT_DONE;
        }
    } else num_fine_ghosts = 0;

    ierr = PetscMalloc1(nloc, &lid_cprowID);
    CHKERRQ(ierr);
    ierr = PetscMalloc1(nloc, &lid_removed);
    CHKERRQ(ierr); /* explicit array needed */
    if (strict_aggs) {
        ierr = PetscMalloc1(nloc,&lid_parent_gid);
        CHKERRQ(ierr);
    }
    ierr = PetscMalloc1(nloc,&lid_state);
    CHKERRQ(ierr);

    /* has ghost nodes for !strict and uses local indexing (yuck) */
    ierr = PetscCDCreate(strict_aggs ? nloc : num_fine_ghosts+nloc, &agg_lists);
    CHKERRQ(ierr);
    if (a_locals_llist) *a_locals_llist = agg_lists;

    /* need an inverse map - locals */
    for (kk=0; kk<nloc; kk++) {
        lid_cprowID[kk] = -1;
        lid_removed[kk] = PETSC_FALSE;
        if (strict_aggs) {
            lid_parent_gid[kk] = -1.0;
        }
        lid_state[kk] = MIS_NOT_DONE;
    }
    /* set index into cmpressed row 'lid_cprowID' */
    if (matB) {
        for (ix=0; ix<matB->compressedrow.nrows; ix++) {
            lid = matB->compressedrow.rindex[ix];
            lid_cprowID[lid] = ix;
        }
    }
    /* MIS */
    iter = nremoved = nDone = 0;
    ierr = ISGetIndices(perm, &perm_ix);
    CHKERRQ(ierr);
    while (nDone < nloc || PETSC_TRUE) { /* asyncronous not implemented */
        iter++;
        /* check all vertices */
        for (kk=0; kk<nloc; kk++) {
            lid   = perm_ix[kk];
            state = lid_state[lid];
            if (lid_removed[lid]) continue;
            if (state == MIS_NOT_DONE) {
                /* parallel test, delete if selected ghost */
                isOK = PETSC_TRUE;
                if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
                    ii  = matB->compressedrow.i;
                    n = ii[ix+1] - ii[ix];
                    idx = matB->j + ii[ix];
                    for (j=0; j<n; j++) {
                        cpid   = idx[j]; /* compressed row ID in B mat */
                        gid    = cpcol_gid[cpid];
                        statej = cpcol_state[cpid];
                        if (statej == MIS_NOT_DONE && gid >= Iend) { /* should be (pe>rank), use gid as pe proxy */
                            isOK = PETSC_FALSE; /* can not delete */
                            break;
                        }
                    }
                } /* parallel test */
                if (isOK) { /* select or remove this vertex */
                    nDone++;
                    /* check for singleton */
                    ii = matA->i;
                    n = ii[lid+1] - ii[lid];
                    if (n < 2) {
                        /* if I have any ghost adj then not a sing */
                        ix = lid_cprowID[lid];
                        if (ix==-1 || (matB->compressedrow.i[ix+1]-matB->compressedrow.i[ix])==0) {
                            nremoved++;
                            lid_removed[lid] = PETSC_TRUE;
                            /* should select this because it is technically in the MIS but lets not */
                            continue; /* one local adj (me) and no ghost - singleton */
                        }
                    }
                    /* SELECTED state encoded with global index */
                    lid_state[lid] = lid+my0; /* needed???? */
                    nselected++;
                    if (strict_aggs) {
                        ierr = PetscCDAppendID(agg_lists, lid, lid+my0);
                        CHKERRQ(ierr);
                    } else {
                        ierr = PetscCDAppendID(agg_lists, lid, lid);
                        CHKERRQ(ierr);
                    }
                    /* delete local adj */
                    idx = matA->j + ii[lid];
                    for (j=0; j<n; j++) {
                        lidj   = idx[j];
                        statej = lid_state[lidj];
                        if (statej == MIS_NOT_DONE) {
                            nDone++;
                            if (strict_aggs) {
                                ierr = PetscCDAppendID(agg_lists, lid, lidj+my0);
                                CHKERRQ(ierr);
                            } else {
                                ierr = PetscCDAppendID(agg_lists, lid, lidj);
                                CHKERRQ(ierr);
                            }
                            lid_state[lidj] = MIS_DELETED;  /* delete this */
                        }
                    }
                    /* delete ghost adj of lid - deleted ghost done later for strict_aggs */
                    if (!strict_aggs) {
                        if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
                            ii  = matB->compressedrow.i;
                            n = ii[ix+1] - ii[ix];
                            idx = matB->j + ii[ix];
                            for (j=0; j<n; j++) {
                                cpid   = idx[j]; /* compressed row ID in B mat */
                                statej = cpcol_state[cpid];
                                if (statej == MIS_NOT_DONE) {
                                    ierr = PetscCDAppendID(agg_lists, lid, nloc+cpid);
                                    CHKERRQ(ierr);
                                }
                            }
                        }
                    }
                } /* selected */
            } /* not done vertex */
        } /* vertex loop */

        /* update ghost states and count todos */
        if (mpimat) {
            /* scatter states, check for done */
            ierr = PetscSFBcastBegin(sf,MPIU_INT,lid_state,cpcol_state);
            CHKERRQ(ierr);
            ierr = PetscSFBcastEnd(sf,MPIU_INT,lid_state,cpcol_state);
            CHKERRQ(ierr);
            ii   = matB->compressedrow.i;
            for (ix=0; ix<matB->compressedrow.nrows; ix++) {
                lid   = matB->compressedrow.rindex[ix]; /* local boundary node */
                state = lid_state[lid];
                if (state == MIS_NOT_DONE) {
                    /* look at ghosts */
                    n   = ii[ix+1] - ii[ix];
                    idx = matB->j + ii[ix];
                    for (j=0; j<n; j++) {
                        cpid   = idx[j]; /* compressed row ID in B mat */
                        statej = cpcol_state[cpid];
                        if (MIS_IS_SELECTED(statej)) { /* lid is now deleted, do it */
                            nDone++;
                            lid_state[lid] = MIS_DELETED; /* delete this */
                            if (!strict_aggs) {
                                lidj = nloc + cpid;
                                ierr = PetscCDAppendID(agg_lists, lidj, lid);
                                CHKERRQ(ierr);
                            } else {
                                sgid = cpcol_gid[cpid];
                                lid_parent_gid[lid] = sgid; /* keep track of proc that I belong to */
                            }
                            break;
                        }
                    }
                }
            }
            /* all done? */
            t1   = nloc - nDone;
            ierr = MPI_Allreduce(&t1, &t2, 1, MPIU_INT, MPI_SUM, comm);
            CHKERRQ(ierr); /* synchronous version */
            if (t2 == 0) break;
        } else break; /* all done */
    } /* outer parallel MIS loop */
    ierr = ISRestoreIndices(perm,&perm_ix);
    CHKERRQ(ierr);
    ierr = PetscInfo3(Gmat,"\t removed %D of %D vertices.  %D selected.\n",nremoved,nloc,nselected);
    CHKERRQ(ierr);

    /* tell adj who my lid_parent_gid vertices belong to - fill in agg_lists selected ghost lists */
    if (strict_aggs && matB) {
        /* need to copy this to free buffer -- should do this globaly */
        ierr = PetscMalloc1(num_fine_ghosts, &cpcol_sel_gid);
        CHKERRQ(ierr);
        ierr = PetscMalloc1(num_fine_ghosts, &icpcol_gid);
        CHKERRQ(ierr);
        for (cpid=0; cpid<num_fine_ghosts; cpid++) icpcol_gid[cpid] = cpcol_gid[cpid];

        /* get proc of deleted ghost */
        ierr = PetscSFBcastBegin(sf,MPIU_INT,lid_parent_gid,cpcol_sel_gid);
        CHKERRQ(ierr);
        ierr = PetscSFBcastEnd(sf,MPIU_INT,lid_parent_gid,cpcol_sel_gid);
        CHKERRQ(ierr);
        for (cpid=0; cpid<num_fine_ghosts; cpid++) {
            sgid = cpcol_sel_gid[cpid];
            gid  = icpcol_gid[cpid];
            if (sgid >= my0 && sgid < Iend) { /* I own this deleted */
                slid = sgid - my0;
                ierr = PetscCDAppendID(agg_lists, slid, gid);
                CHKERRQ(ierr);
            }
        }
        ierr = PetscFree(icpcol_gid);
        CHKERRQ(ierr);
        ierr = PetscFree(cpcol_sel_gid);
        CHKERRQ(ierr);
    }
    if (mpimat) {
        ierr = PetscSFDestroy(&sf);
        CHKERRQ(ierr);
        ierr = PetscFree(cpcol_gid);
        CHKERRQ(ierr);
        ierr = PetscFree(cpcol_state);
        CHKERRQ(ierr);
    }
    ierr = PetscFree(lid_cprowID);
    CHKERRQ(ierr);
    ierr = PetscFree(lid_gid);
    CHKERRQ(ierr);
    ierr = PetscFree(lid_removed);
    CHKERRQ(ierr);
    if (strict_aggs) {
        ierr = PetscFree(lid_parent_gid);
        CHKERRQ(ierr);
    }
    ierr = PetscFree(lid_state);
    CHKERRQ(ierr);
    PetscFunctionReturn(0);
}
예제 #3
0
파일: hem.c 프로젝트: erdc-cm/petsc-dev
PetscErrorCode heavyEdgeMatchAgg(IS perm,Mat a_Gmat,PetscInt verbose,PetscCoarsenData **a_locals_llist)
{
  PetscErrorCode   ierr;
  PetscBool        isMPI;
  MPI_Comm         wcomm = ((PetscObject)a_Gmat)->comm;
  PetscInt         sub_it,kk,n,ix,*idx,*ii,iter,Iend,my0;
  PetscMPIInt      rank,size;
  const PetscInt   nloc = a_Gmat->rmap->n,n_iter=6; /* need to figure out how to stop this */
  PetscInt         *lid_cprowID,*lid_gid;
  PetscBool        *lid_matched;
  Mat_SeqAIJ       *matA, *matB=0;
  Mat_MPIAIJ       *mpimat=0;
  PetscScalar      one=1.;
  PetscCoarsenData *agg_llists = PETSC_NULL,*deleted_list = PETSC_NULL;
  Mat              cMat,tMat,P;
  MatScalar        *ap;
  PetscMPIInt      tag1,tag2;

  PetscFunctionBegin;
  ierr = MPI_Comm_rank(wcomm, &rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(wcomm, &size);CHKERRQ(ierr);
  ierr = MatGetOwnershipRange(a_Gmat, &my0, &Iend);CHKERRQ(ierr);
  ierr = PetscCommGetNewTag(wcomm, &tag1);CHKERRQ(ierr);
  ierr = PetscCommGetNewTag(wcomm, &tag2);CHKERRQ(ierr);

  ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_gid);CHKERRQ(ierr); /* explicit array needed */
  ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_cprowID);CHKERRQ(ierr);
  ierr = PetscMalloc(nloc*sizeof(PetscBool), &lid_matched);CHKERRQ(ierr);

  ierr = PetscCDCreate(nloc, &agg_llists);CHKERRQ(ierr);
  /* ierr = PetscCDSetChuckSize(agg_llists, nloc+1);CHKERRQ(ierr); */
  *a_locals_llist = agg_llists;
  ierr = PetscCDCreate(size, &deleted_list);CHKERRQ(ierr);
  ierr = PetscCDSetChuckSize(deleted_list, 100);CHKERRQ(ierr);
  /* setup 'lid_gid' for scatters and add self to all lists */
  for (kk=0;kk<nloc;kk++) {
    lid_gid[kk] = kk + my0;
    ierr = PetscCDAppendID(agg_llists, kk, my0+kk);CHKERRQ(ierr);
  }

  /* make a copy of the graph, this gets destroyed in iterates */
  ierr = MatDuplicate(a_Gmat,MAT_COPY_VALUES,&cMat);CHKERRQ(ierr);
  ierr = PetscObjectTypeCompare((PetscObject)a_Gmat, MATMPIAIJ, &isMPI);CHKERRQ(ierr);
  iter = 0;
  while(iter++ < n_iter) {
    PetscScalar    *cpcol_gid,*cpcol_max_ew,*cpcol_max_pe,*lid_max_ew;
    PetscBool      *cpcol_matched;
    PetscMPIInt    *cpcol_pe,proc;
    Vec            locMaxEdge,locMaxPE,ghostMaxEdge,ghostMaxPE;
    PetscInt       nEdges,n_nz_row,jj;
    Edge           *Edges;
    PetscInt       gid;
    const PetscInt *perm_ix, n_sub_its = 120;

    /* get submatrices of cMat */
    if (isMPI) {
      mpimat = (Mat_MPIAIJ*)cMat->data;
      matA = (Mat_SeqAIJ*)mpimat->A->data;
      matB = (Mat_SeqAIJ*)mpimat->B->data;
      /* force compressed storage of B */
      matB->compressedrow.check = PETSC_TRUE;
      ierr = MatCheckCompressedRow(mpimat->B,&matB->compressedrow,matB->i,cMat->rmap->n,-1.0);CHKERRQ(ierr);
      assert(matB->compressedrow.use);
    } else {
      matA = (Mat_SeqAIJ*)cMat->data;
    }
    assert(matA && !matA->compressedrow.use);
    assert(matB==0 || matB->compressedrow.use);

    /* set max edge on nodes */
    ierr = MatGetVecs(cMat, &locMaxEdge, 0);CHKERRQ(ierr);
    ierr = MatGetVecs(cMat, &locMaxPE, 0);CHKERRQ(ierr);

    /* get 'cpcol_pe' & 'cpcol_gid' & init. 'cpcol_matched' using 'mpimat->lvec' */
    if (mpimat) {
      Vec         vec; 
      PetscScalar vval;

      ierr = MatGetVecs(cMat, &vec, 0);CHKERRQ(ierr);
      /* cpcol_pe */
      vval = (PetscScalar)(rank);
      for (kk=0,gid=my0;kk<nloc;kk++,gid++) {
        ierr = VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
      }
      ierr = VecAssemblyBegin(vec);CHKERRQ(ierr);
      ierr = VecAssemblyEnd(vec);CHKERRQ(ierr);
      ierr = VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecGetArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* get proc ID in 'cpcol_gid' */
      ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr);
      ierr = PetscMalloc(n*sizeof(PetscInt), &cpcol_pe);CHKERRQ(ierr);
      for (kk=0;kk<n;kk++) cpcol_pe[kk] = (PetscMPIInt)PetscRealPart(cpcol_gid[kk]);
      ierr = VecRestoreArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr);

      /* cpcol_gid */
      for (kk=0,gid=my0;kk<nloc;kk++,gid++) {
        vval = (PetscScalar)(gid);
        ierr = VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
      }
      ierr = VecAssemblyBegin(vec);CHKERRQ(ierr);
      ierr = VecAssemblyEnd(vec);CHKERRQ(ierr);
      ierr = VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecDestroy(&vec);CHKERRQ(ierr);
      ierr = VecGetArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* get proc ID in 'cpcol_gid' */

      /* cpcol_matched */
      ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr);
      ierr = PetscMalloc(n*sizeof(PetscBool), &cpcol_matched);CHKERRQ(ierr);
      for (kk=0;kk<n;kk++) cpcol_matched[kk] = PETSC_FALSE;
    }

    /* need an inverse map - locals */
    for (kk=0;kk<nloc;kk++) lid_cprowID[kk] = -1;
    /* set index into compressed row 'lid_cprowID' */
    if (matB) {
      ii = matB->compressedrow.i;
      for (ix=0; ix<matB->compressedrow.nrows; ix++) {
        lid_cprowID[matB->compressedrow.rindex[ix]] = ix;
      }
    }

    /* get removed IS, use '' */
    /* if (iter==1) { */
    /*   PetscInt *lid_rem,idx; */
    /*   ierr = PetscMalloc(nloc*sizeof(PetscInt), &lid_rem);CHKERRQ(ierr); */
    /*   for (kk=idx=0;kk<nloc;kk++){ */
    /*     PetscInt nn,lid=kk; */
    /*     ii = matA->i; nn = ii[lid+1] - ii[lid]; */
    /*     if ((ix=lid_cprowID[lid]) != -1) { /\* if I have any ghost neighbors *\/ */
    /*       ii = matB->compressedrow.i; */
    /*       nn += ii[ix+1] - ii[ix]; */
    /*     } */
    /*     if (nn < 2) { */
    /*       lid_rem[idx++] = kk + my0; */
    /*     } */
    /*   } */
    /*   ierr = PetscCDSetRemovedIS(agg_llists, wcomm, idx, lid_rem);CHKERRQ(ierr); */
    /*   ierr = PetscFree(lid_rem);CHKERRQ(ierr); */
    /* } */

    /* compute 'locMaxEdge' & 'locMaxPE', and create list of edges, count edges' */
    for (nEdges=0,kk=0,gid=my0;kk<nloc;kk++,gid++){
      PetscReal   max_e = 0., tt;
      PetscScalar vval;
      PetscInt    lid = kk;
      PetscMPIInt max_pe=rank,pe;
      ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
      ap = matA->a + ii[lid];
      for (jj=0; jj<n; jj++) {
        PetscInt lidj = idx[jj];
        if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
        if (lidj > lid) nEdges++;
      }
      if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
        ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
        ap = matB->a + ii[ix];
        idx = matB->j + ii[ix];
        for (jj=0 ; jj<n ; jj++) {
          if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
          nEdges++;
          if ((pe=cpcol_pe[idx[jj]]) > max_pe) max_pe = pe;
        }
      }
      vval = max_e;
      ierr = VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr);

      vval = (PetscScalar)max_pe;
      ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr);
    }
    ierr = VecAssemblyBegin(locMaxEdge);CHKERRQ(ierr);
    ierr = VecAssemblyEnd(locMaxEdge);CHKERRQ(ierr);
    ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr);
    ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr);

    /* get 'cpcol_max_ew' & 'cpcol_max_pe' */
    if (mpimat) {
      ierr = VecDuplicate(mpimat->lvec, &ghostMaxEdge);CHKERRQ(ierr);
      ierr = VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);

      ierr = VecDuplicate(mpimat->lvec, &ghostMaxPE);CHKERRQ(ierr);
      ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr =   VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
      ierr = VecGetArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
    }

    /* setup sorted list of edges */
    ierr = PetscMalloc(nEdges*sizeof(Edge), &Edges);CHKERRQ(ierr);
    ierr = ISGetIndices(perm, &perm_ix);CHKERRQ(ierr);
    for (nEdges=n_nz_row=kk=0;kk<nloc;kk++){
      PetscInt nn, lid = perm_ix[kk];
      ii = matA->i; nn = n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
      ap = matA->a + ii[lid];
      for (jj=0; jj<n; jj++) {
        PetscInt lidj = idx[jj];        assert(PetscRealPart(ap[jj])>0.);
        if (lidj > lid) {
          Edges[nEdges].lid0 = lid;
          Edges[nEdges].gid1 = lidj + my0;
          Edges[nEdges].cpid1 = -1;
          Edges[nEdges].weight = PetscRealPart(ap[jj]);
          nEdges++;
        }
      }
      if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
        ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
        ap = matB->a + ii[ix];
        idx = matB->j + ii[ix];
        nn += n;
        for (jj=0 ; jj<n ; jj++) {
          assert(PetscRealPart(ap[jj])>0.);
          Edges[nEdges].lid0 = lid;
          Edges[nEdges].gid1 = (PetscInt)PetscRealPart(cpcol_gid[idx[jj]]);
          Edges[nEdges].cpid1 = idx[jj];
          Edges[nEdges].weight = PetscRealPart(ap[jj]);
          nEdges++;
        }
      }
      if (nn > 1) n_nz_row++;
      else if (iter == 1){
        /* should select this because it is technically in the MIS but lets not */
        ierr = PetscCDRemoveAll(agg_llists, lid);CHKERRQ(ierr);
      }
    }
    ierr = ISRestoreIndices(perm,&perm_ix);CHKERRQ(ierr);

    qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);

    /* projection matrix */
    ierr = MatCreateAIJ(wcomm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 1, 0, 1, 0, &P);CHKERRQ(ierr);

    /* clear matched flags */
    for (kk=0;kk<nloc;kk++) lid_matched[kk] = PETSC_FALSE;
    /* process - communicate - process */
    for (sub_it=0;sub_it<n_sub_its;sub_it++){
      PetscInt nactive_edges;

      ierr = VecGetArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
      for (kk=nactive_edges=0;kk<nEdges;kk++){
        /* HEM */
        const Edge *e = &Edges[kk];
        const PetscInt lid0=e->lid0,gid1=e->gid1,cpid1=e->cpid1,gid0=lid0+my0,lid1=gid1-my0;
        PetscBool isOK = PETSC_TRUE;

        /* skip if either (local) vertex is done already */
        if (lid_matched[lid0] || (gid1>=my0 && gid1<Iend && lid_matched[gid1-my0])) {
          continue;
        }
        /* skip if ghost vertex is done */
        if (cpid1 != -1 && cpcol_matched[cpid1]) {
          continue;
        }

        nactive_edges++;
        /* skip if I have a bigger edge someplace (lid_max_ew gets updated) */
        if (PetscRealPart(lid_max_ew[lid0]) > e->weight + 1.e-12) {
          continue;
        }

        if (cpid1 == -1) {
          if (PetscRealPart(lid_max_ew[lid1]) > e->weight + 1.e-12) {
            continue;
          }
        } else {
          /* see if edge might get matched on other proc */
          PetscReal g_max_e = PetscRealPart(cpcol_max_ew[cpid1]);
          if (g_max_e > e->weight + 1.e-12) {
            continue;
          } else if (e->weight > g_max_e - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[cpid1]) > rank) {
            /* check for max_e == to this edge and larger processor that will deal with this */
            continue;
          }
        }

        /* check ghost for v0 */
        if (isOK){
          PetscReal max_e,ew;
          if ((ix=lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
            ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
            ap = matB->a + ii[ix];
            idx = matB->j + ii[ix];
            for (jj=0 ; jj<n && isOK; jj++) {
              PetscInt lidj = idx[jj];
              if (cpcol_matched[lidj]) continue;
              ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
              /* check for max_e == to this edge and larger processor that will deal with this */
              if (ew > max_e - 1.e-12 && ew > PetscRealPart(lid_max_ew[lid0]) - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank){
                isOK = PETSC_FALSE;
              }
            }
          }

          /* for v1 */
          if (cpid1 == -1 && isOK){
            if ((ix=lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
              ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
              ap = matB->a + ii[ix];
              idx = matB->j + ii[ix];
              for (jj=0 ; jj<n && isOK ; jj++) {
                PetscInt lidj = idx[jj];
                if (cpcol_matched[lidj]) continue;
                ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
                /* check for max_e == to this edge and larger processor that will deal with this */
                if (ew > max_e - 1.e-12 && ew > PetscRealPart(lid_max_ew[lid1]) - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) {
                  isOK = PETSC_FALSE;
                }
              }
            }
          }
        }

        /* do it */
        if (isOK){
          if (cpid1 == -1) {
            lid_matched[lid1] = PETSC_TRUE;  /* keep track of what we've done this round */
            ierr = PetscCDAppendRemove(agg_llists, lid0, lid1);CHKERRQ(ierr);
          } else if (sub_it != n_sub_its-1) {
            /* add gid1 to list of ghost deleted by me -- I need their children */
            proc = cpcol_pe[cpid1];
            cpcol_matched[cpid1] = PETSC_TRUE; /* messing with VecGetArray array -- needed??? */
            ierr = PetscCDAppendID(deleted_list, proc, cpid1);CHKERRQ(ierr); /* cache to send messages */
            ierr = PetscCDAppendID(deleted_list, proc, lid0);CHKERRQ(ierr);
          } else {
            continue;
          }
          lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
          /* set projection */
          ierr = MatSetValues(P,1,&gid0,1,&gid0,&one,INSERT_VALUES);CHKERRQ(ierr);
          ierr = MatSetValues(P,1,&gid1,1,&gid0,&one,INSERT_VALUES);CHKERRQ(ierr);
        } /* matched */
      } /* edge loop */

      /* deal with deleted ghost on first pass */
      if (size>1 && sub_it != n_sub_its-1){
        PetscCDPos  pos;  PetscBool ise = PETSC_FALSE;
        PetscInt    nSend1, **sbuffs1,nSend2;
#define REQ_BF_SIZE 100
        MPI_Request *sreqs2[REQ_BF_SIZE],*rreqs2[REQ_BF_SIZE];
        MPI_Status  status;

        /* send request */
        for (proc=0,nSend1=0;proc<size;proc++){
          ierr = PetscCDEmptyAt(deleted_list,proc,&ise);CHKERRQ(ierr);
          if (!ise) nSend1++;
        }
        ierr = PetscMalloc(nSend1*sizeof(PetscInt*), &sbuffs1);CHKERRQ(ierr);
        /* ierr = PetscMalloc4(nSend1, PetscInt*, sbuffs1, nSend1, PetscInt*, rbuffs1, nSend1, MPI_Request*, sreqs1, nSend1, MPI_Request*, rreqs1);CHKERRQ(ierr); */
        /* PetscFree4(sbuffs1,rbuffs1,sreqs1,rreqs1); */
        for (proc=0,nSend1=0;proc<size;proc++){
          /* count ghosts */
          ierr = PetscCDSizeAt(deleted_list,proc,&n);CHKERRQ(ierr);
          if (n>0){
#define CHUNCK_SIZE 100
            PetscInt    *sbuff,*pt;
            MPI_Request *request;
            assert(n%2==0);
            n /= 2;
            ierr = PetscMalloc((2 + 2*n + n*CHUNCK_SIZE)*sizeof(PetscInt) + 2*sizeof(MPI_Request), &sbuff);CHKERRQ(ierr);
            /* PetscMalloc4(2+2*n,PetscInt,sbuffs1[nSend1],n*CHUNCK_SIZE,PetscInt,rbuffs1[nSend1],1,MPI_Request,rreqs2[nSend1],1,MPI_Request,sreqs2[nSend1]); */
            /* save requests */
            sbuffs1[nSend1] = sbuff;
            request = (MPI_Request*)sbuff;
            sbuff = pt = (PetscInt*)(request+1);
            *pt++ = n; *pt++ = rank;

            ierr = PetscCDGetHeadPos(deleted_list,proc,&pos);CHKERRQ(ierr);
            while(pos){
              PetscInt lid0, cpid, gid;
              ierr = PetscLLNGetID(pos, &cpid);CHKERRQ(ierr);
              gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
              ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
              ierr = PetscLLNGetID(pos, &lid0);CHKERRQ(ierr);
              ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
              *pt++ = gid; *pt++ = lid0;
            }
            /* send request tag1 [n, proc, n*[gid1,lid0] ] */
            ierr = MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, wcomm, request);CHKERRQ(ierr);
            /* post recieve */
            request = (MPI_Request*)pt;
            rreqs2[nSend1] = request; /* cache recv request */
            pt = (PetscInt*)(request+1);
            ierr = MPI_Irecv(pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, wcomm, request);CHKERRQ(ierr);
            /* clear list */
            ierr = PetscCDRemoveAll(deleted_list, proc);CHKERRQ(ierr);
            nSend1++;
          }
        }
        /* recieve requests, send response, clear lists */
        kk = nactive_edges;
        ierr = MPI_Allreduce(&kk,&nactive_edges,1,MPIU_INT,MPI_SUM,wcomm);CHKERRQ(ierr); /* not correct syncronization and global */
        nSend2 = 0;
        while(1){
#define BF_SZ 10000
          PetscMPIInt flag,count;
          PetscInt    rbuff[BF_SZ],*pt,*pt2,*pt3,count2,*sbuff,count3;
          MPI_Request *request;
          ierr = MPI_Iprobe(MPI_ANY_SOURCE, tag1, wcomm, &flag, &status);CHKERRQ(ierr);
          if (!flag) break;
          ierr = MPI_Get_count(&status, MPIU_INT, &count);CHKERRQ(ierr);
          if (count > BF_SZ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for recieve: %d",count);
          proc = status.MPI_SOURCE;
          /* recieve request tag1 [n, proc, n*[gid1,lid0] ] */
          ierr = MPI_Recv(rbuff, count, MPIU_INT, proc, tag1, wcomm, &status);CHKERRQ(ierr);
          /* count sends */
          pt = rbuff; count3 = count2 = 0;
          n = *pt++; kk = *pt++;           assert(kk==proc);
          while(n--){
            PetscInt gid1=*pt++, lid1=gid1-my0; kk=*pt++;  assert(lid1>=0 && lid1<nloc);
            if (lid_matched[lid1]){
              PetscPrintf(PETSC_COMM_SELF,"\t *** [%d]%s %d) ERROR recieved deleted gid %d, deleted by (lid) %d from proc %d\n",rank,__FUNCT__,sub_it,gid1,kk);
              PetscSleep(1);
            }
            assert(!lid_matched[lid1]);
            lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
            ierr = PetscCDSizeAt(agg_llists, lid1, &kk);CHKERRQ(ierr);
            count2 += kk + 2;
            count3++; /* number of verts requested (n) */
          }
          assert(pt-rbuff==count);
          if (count2 > count3*CHUNCK_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Irecv will be too small: %d",count2);
          /* send tag2 *[lid0, n, n*[gid] ] */
          ierr = PetscMalloc(count2*sizeof(PetscInt) + sizeof(MPI_Request), &sbuff);CHKERRQ(ierr);
          request = (MPI_Request*)sbuff;
          sreqs2[nSend2++] = request; /* cache request */
          if (nSend2==REQ_BF_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for requests: %d",nSend2);
          pt2 = sbuff = (PetscInt*)(request+1);
          pt  = rbuff;
          n = *pt++; kk = *pt++;           assert(kk==proc);
          while(n--){
            /* read [n, proc, n*[gid1,lid0] */
            PetscInt gid1=*pt++, lid1=gid1-my0, lid0=*pt++;   assert(lid1>=0 && lid1<nloc);
            /* write [lid0, n, n*[gid] ] */
            *pt2++ = lid0;
            pt3 = pt2++; /* save pointer for later */
            /* for (pos=PetscCDGetHeadPos(agg_llists,lid1) ; pos ; pos=PetscCDGetNextPos(agg_llists,lid1,pos)){ */
            ierr = PetscCDGetHeadPos(agg_llists,lid1,&pos);CHKERRQ(ierr);
            while(pos){
              PetscInt gid;
              ierr = PetscLLNGetID(pos, &gid);CHKERRQ(ierr);
              ierr = PetscCDGetNextPos(agg_llists,lid1,&pos);CHKERRQ(ierr);
              *pt2++ = gid;
            }
            *pt3 = (pt2-pt3)-1;
            /* clear list */
            ierr = PetscCDRemoveAll(agg_llists, lid1);CHKERRQ(ierr);
          }
          assert(pt2-sbuff==count2); assert(pt-rbuff==count);
          /* send requested data tag2 *[lid0, n, n*[gid1] ] */
          ierr = MPI_Isend(sbuff, count2, MPIU_INT, proc, tag2, wcomm, request);CHKERRQ(ierr);
        }

        /* recieve tag2 *[lid0, n, n*[gid] ] */
        for (kk=0;kk<nSend1;kk++){
          PetscMPIInt count;
          MPI_Request *request;
          PetscInt    *pt, *pt2;
          request = rreqs2[kk]; /* no need to free -- buffer is in 'sbuffs1' */
          ierr = MPI_Wait(request, &status);CHKERRQ(ierr);
          ierr = MPI_Get_count(&status, MPIU_INT, &count);CHKERRQ(ierr);
          pt = pt2 = (PetscInt*)(request+1);
          while(pt-pt2 < count){
            PetscInt lid0 = *pt++, n = *pt++;           assert(lid0>=0 && lid0<nloc);
            while(n--){
              PetscInt gid1 = *pt++;
              ierr = PetscCDAppendID(agg_llists, lid0, gid1);CHKERRQ(ierr);
            }
          }
          assert(pt-pt2==count);
        }

        /* wait for tag1 isends */
        while(nSend1--){
          MPI_Request *request;
          request = (MPI_Request*)sbuffs1[nSend1];
          ierr = MPI_Wait(request, &status);CHKERRQ(ierr);
          ierr = PetscFree(request);CHKERRQ(ierr);
        }
        ierr = PetscFree(sbuffs1);CHKERRQ(ierr);

        /* wait for tag2 isends */
        while(nSend2--){
          MPI_Request *request = sreqs2[nSend2];
          ierr = MPI_Wait(request, &status);CHKERRQ(ierr);
          ierr = PetscFree(request);CHKERRQ(ierr);
        }

        ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
        ierr = VecRestoreArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);

        /* get 'cpcol_matched' - use locMaxPE, ghostMaxEdge, cpcol_max_ew */
        for (kk=0,gid=my0;kk<nloc;kk++,gid++) {
          PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
          ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
        }
        ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr);
        ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr);
        ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr =   VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
        ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr);
        for (kk=0;kk<n;kk++) {
          cpcol_matched[kk] = (PetscBool)(PetscRealPart(cpcol_max_ew[kk]) != 0.0);
        }

        ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
      } /* size > 1 */

      /* compute 'locMaxEdge' */
      ierr = VecRestoreArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
      for (kk=0,gid=my0;kk<nloc;kk++,gid++){
        PetscReal   max_e = 0.,tt;
        PetscScalar vval;
        PetscInt    lid = kk;
        if (lid_matched[lid]) vval = 0.;
        else {
          ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
          ap = matA->a + ii[lid];
          for (jj=0; jj<n; jj++) {
            PetscInt lidj = idx[jj];
            if (lid_matched[lidj]) continue; /* this is new - can change local max */
            if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
          }
          if (lid_cprowID && (ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
            ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
            ap = matB->a + ii[ix];
            idx = matB->j + ii[ix];
            for (jj=0 ; jj<n ; jj++) {
              PetscInt lidj = idx[jj];
              if (cpcol_matched[lidj]) continue;
              if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
            }
          }
        }
        vval = (PetscScalar)max_e;
        ierr = VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
      }
      ierr = VecAssemblyBegin(locMaxEdge);CHKERRQ(ierr);
      ierr = VecAssemblyEnd(locMaxEdge);CHKERRQ(ierr);

      if (size>1 && sub_it != n_sub_its-1){
        /* compute 'cpcol_max_ew' */
        ierr = VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr =   VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
        ierr = VecGetArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);

        /* compute 'cpcol_max_pe' */
        for (kk=0,gid=my0;kk<nloc;kk++,gid++){
          PetscInt    lid = kk;
          PetscReal   ew,v1_max_e,v0_max_e=PetscRealPart(lid_max_ew[lid]);
          PetscScalar vval;
          PetscMPIInt max_pe=rank,pe;
          if (lid_matched[lid]) vval = (PetscScalar)rank;
          else if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
            ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
            ap = matB->a + ii[ix];
            idx = matB->j + ii[ix];
            for (jj=0 ; jj<n ; jj++) {
              PetscInt lidj = idx[jj];
              if (cpcol_matched[lidj]) continue;
              ew = PetscRealPart(ap[jj]); v1_max_e = PetscRealPart(cpcol_max_ew[lidj]);
              /* get max pe that has a max_e == to this edge w */
              if ((pe=cpcol_pe[idx[jj]]) > max_pe && ew > v1_max_e - 1.e-12 && ew > v0_max_e - 1.e-12) max_pe = pe;
              assert(ew < v0_max_e + 1.e-12 && ew < v1_max_e + 1.e-12);
            }
            vval = (PetscScalar)max_pe;
          }
          ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr);
        }
        ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr);
        ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr);

        ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr =   VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
        ierr = VecGetArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
        ierr = VecRestoreArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
      } /* deal with deleted ghost */
      if (verbose>2) PetscPrintf(wcomm,"\t[%d]%s %d.%d: %d active edges.\n",
                                rank,__FUNCT__,iter,sub_it,nactive_edges);
      if (!nactive_edges) break;
    } /* sub_it loop */

    /* clean up iteration */
    ierr = PetscFree(Edges);CHKERRQ(ierr);
    if (mpimat){
      ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
      ierr = VecDestroy(&ghostMaxEdge);CHKERRQ(ierr);
      ierr = VecRestoreArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
      ierr = VecDestroy(&ghostMaxPE);CHKERRQ(ierr);
      ierr = PetscFree(cpcol_pe);CHKERRQ(ierr);
      ierr = PetscFree(cpcol_matched);CHKERRQ(ierr);
    }

    ierr = VecDestroy(&locMaxEdge);CHKERRQ(ierr);
    ierr = VecDestroy(&locMaxPE);CHKERRQ(ierr);

    if (mpimat){
      ierr = VecRestoreArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr);
    }

    /* create next G if needed */
    if (iter == n_iter) { /* hard wired test - need to look at full surrounded nodes or something */
      ierr = MatDestroy(&P);CHKERRQ(ierr);
      ierr = MatDestroy(&cMat);CHKERRQ(ierr);
      break;
    } else {
      Vec diag;
      /* add identity for unmatched vertices so they stay alive */
      for (kk=0,gid=my0;kk<nloc;kk++,gid++){
        if (!lid_matched[kk]) {
          gid = kk+my0;
          ierr = MatGetRow(cMat,gid,&n,0,0);CHKERRQ(ierr);
          if (n>1){
            ierr = MatSetValues(P,1,&gid,1,&gid,&one,INSERT_VALUES);CHKERRQ(ierr);
          }
          ierr = MatRestoreRow(cMat,gid,&n,0,0);CHKERRQ(ierr);
        }
      }
      ierr = MatAssemblyBegin(P,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
      ierr = MatAssemblyEnd(P,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

      /* project to make new graph with colapsed edges */
      ierr = MatPtAP(cMat,P,MAT_INITIAL_MATRIX,1.0,&tMat);CHKERRQ(ierr);
      ierr = MatDestroy(&P);CHKERRQ(ierr);
      ierr = MatDestroy(&cMat);CHKERRQ(ierr);
      cMat = tMat;
      ierr = MatGetVecs(cMat, &diag, 0);CHKERRQ(ierr);
      ierr = MatGetDiagonal(cMat, diag);CHKERRQ(ierr); /* effectively PCJACOBI */
      ierr = VecReciprocal(diag);CHKERRQ(ierr);
      ierr = VecSqrtAbs(diag);CHKERRQ(ierr);
      ierr = MatDiagonalScale(cMat, diag, diag);CHKERRQ(ierr);
      ierr = VecDestroy(&diag);CHKERRQ(ierr);
    }
  } /* coarsen iterator */

  /* make fake matrix */
  if (size>1){
    Mat        mat;
    PetscCDPos pos;
    PetscInt   gid, NN, MM, jj = 0, mxsz = 0;

    for (kk=0;kk<nloc;kk++){
      ierr = PetscCDSizeAt(agg_llists, kk, &jj);CHKERRQ(ierr);
      if (jj > mxsz)  mxsz = jj;
    }
    ierr = MatGetSize(a_Gmat, &MM, &NN);CHKERRQ(ierr);
    if (mxsz > MM-nloc) mxsz = MM-nloc;

    ierr = MatCreateAIJ(wcomm, nloc, nloc,PETSC_DETERMINE, PETSC_DETERMINE,0, 0, mxsz, 0, &mat);CHKERRQ(ierr);

    /* */
    for (kk=0,gid=my0;kk<nloc;kk++,gid++){
      /* for (pos=PetscCDGetHeadPos(agg_llists,kk) ; pos ; pos=PetscCDGetNextPos(agg_llists,kk,pos)){ */
      ierr = PetscCDGetHeadPos(agg_llists,kk,&pos);CHKERRQ(ierr);
      while(pos){
        PetscInt gid1;
        ierr = PetscLLNGetID(pos, &gid1);CHKERRQ(ierr);
        ierr = PetscCDGetNextPos(agg_llists,kk,&pos);CHKERRQ(ierr);

        if (gid1 < my0 || gid1 >= my0+nloc) {
          ierr = MatSetValues(mat,1,&gid,1,&gid1,&one,ADD_VALUES);CHKERRQ(ierr);
        }
      }
    }
    ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

    ierr = PetscCDSetMat(agg_llists, mat);CHKERRQ(ierr);
  }

  ierr = PetscFree(lid_cprowID);CHKERRQ(ierr);
  ierr = PetscFree(lid_gid);CHKERRQ(ierr);
  ierr = PetscFree(lid_matched);CHKERRQ(ierr);
  ierr = PetscCDDestroy(deleted_list);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}