PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C,PetscInt is_max,IS is[],PetscInt ov) { PetscErrorCode ierr; PetscInt i,N=C->cmap->N, bs=C->rmap->bs; IS *is_new; PetscFunctionBegin; ierr = PetscMalloc(is_max*sizeof(IS),&is_new);CHKERRQ(ierr); /* Convert the indices into block format */ ierr = ISCompressIndicesGeneral(N,bs,is_max,is,is_new);CHKERRQ(ierr); if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");} for (i=0; i<ov; ++i) { ierr = MatIncreaseOverlap_MPISBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr); } for (i=0; i<is_max; i++) {ierr = ISDestroy(is[i]);CHKERRQ(ierr);} ierr = ISExpandIndicesGeneral(N,bs,is_max,is_new,is);CHKERRQ(ierr); for (i=0; i<is_max; i++) {ierr = ISDestroy(is_new[i]);CHKERRQ(ierr);} ierr = PetscFree(is_new);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C,PetscInt is_max,IS is[],PetscInt ov) { PetscErrorCode ierr; PetscInt i,N=C->cmap->N, bs=C->rmap->bs,M=C->rmap->N,Mbs=M/bs,*nidx,isz,iov; IS *is_new,*is_row; Mat *submats; Mat_MPISBAIJ *c=(Mat_MPISBAIJ*)C->data; Mat_SeqSBAIJ *asub_i; PetscBT table; PetscInt *ai,brow,nz,nis,l,nmax,nstages_local,nstages,max_no,pos; const PetscInt *idx; PetscBool flg,*allcolumns,*allrows; PetscFunctionBegin; ierr = PetscMalloc1(is_max,&is_new); CHKERRQ(ierr); /* Convert the indices into block format */ ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,is_max,is,is_new); CHKERRQ(ierr); if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n"); /* ----- previous non-scalable implementation ----- */ flg = PETSC_FALSE; ierr = PetscOptionsHasName(NULL,NULL, "-IncreaseOverlap_old", &flg); CHKERRQ(ierr); if (flg) { /* previous non-scalable implementation */ printf("use previous non-scalable implementation...\n"); for (i=0; i<ov; ++i) { ierr = MatIncreaseOverlap_MPISBAIJ_Once(C,is_max,is_new); CHKERRQ(ierr); } } else { /* implementation using modified BAIJ routines */ ierr = PetscMalloc1(Mbs+1,&nidx); CHKERRQ(ierr); ierr = PetscBTCreate(Mbs,&table); CHKERRQ(ierr); /* for column search */ ierr = PetscMalloc2(is_max+1,&allcolumns,is_max+1,&allrows); CHKERRQ(ierr); /* Create is_row */ ierr = PetscMalloc1(is_max,&is_row); CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,Mbs,0,1,&is_row[0]); CHKERRQ(ierr); allrows[0] = PETSC_TRUE; for (i=1; i<is_max; i++) { is_row[i] = is_row[0]; /* reuse is_row[0] */ allrows[i] = PETSC_TRUE; } /* Allocate memory to hold all the submatrices - Modified from MatGetSubMatrices_MPIBAIJ() */ ierr = PetscMalloc1(is_max+1,&submats); CHKERRQ(ierr); /* Check for special case: each processor gets entire matrix columns */ for (i=0; i<is_max; i++) { ierr = ISIdentity(is_new[i],&flg); CHKERRQ(ierr); ierr = ISGetLocalSize(is_new[i],&isz); CHKERRQ(ierr); if (flg && isz == Mbs) { allcolumns[i] = PETSC_TRUE; } else { allcolumns[i] = PETSC_FALSE; } } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (c->Nbs * sizeof(PetscInt)); if (!nmax) nmax = 1; nstages_local = is_max/nmax + ((is_max % nmax) ? 1 : 0); /* Make sure every processor loops through the nstages */ ierr = MPIU_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C)); CHKERRQ(ierr); for (iov=0; iov<ov; ++iov) { /* 1) Get submats for column search */ for (i=0,pos=0; i<nstages; i++) { if (pos+nmax <= is_max) max_no = nmax; else if (pos == is_max) max_no = 0; else max_no = is_max-pos; c->ijonly = PETSC_TRUE; ierr = MatGetSubMatrices_MPIBAIJ_local(C,max_no,is_row+pos,is_new+pos,MAT_INITIAL_MATRIX,allrows+pos,allcolumns+pos,submats+pos); CHKERRQ(ierr); pos += max_no; } /* 2) Row search */ ierr = MatIncreaseOverlap_MPIBAIJ_Once(C,is_max,is_new); CHKERRQ(ierr); /* 3) Column search */ for (i=0; i<is_max; i++) { asub_i = (Mat_SeqSBAIJ*)submats[i]->data; ai = asub_i->i;; /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */ ierr = PetscBTMemzero(Mbs,table); CHKERRQ(ierr); ierr = ISGetIndices(is_new[i],&idx); CHKERRQ(ierr); ierr = ISGetLocalSize(is_new[i],&nis); CHKERRQ(ierr); for (l=0; l<nis; l++) { ierr = PetscBTSet(table,idx[l]); CHKERRQ(ierr); nidx[l] = idx[l]; } isz = nis; /* add column entries to table */ for (brow=0; brow<Mbs; brow++) { nz = ai[brow+1] - ai[brow]; if (nz) { if (!PetscBTLookupSet(table,brow)) nidx[isz++] = brow; } } ierr = ISRestoreIndices(is_new[i],&idx); CHKERRQ(ierr); ierr = ISDestroy(&is_new[i]); CHKERRQ(ierr); /* create updated is_new */ ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,PETSC_COPY_VALUES,is_new+i); CHKERRQ(ierr); } /* Free tmp spaces */ for (i=0; i<is_max; i++) { ierr = MatDestroy(&submats[i]); CHKERRQ(ierr); } } ierr = PetscFree2(allcolumns,allrows); CHKERRQ(ierr); ierr = PetscBTDestroy(&table); CHKERRQ(ierr); ierr = PetscFree(submats); CHKERRQ(ierr); ierr = ISDestroy(&is_row[0]); CHKERRQ(ierr); ierr = PetscFree(is_row); CHKERRQ(ierr); ierr = PetscFree(nidx); CHKERRQ(ierr); } for (i=0; i<is_max; i++) { ierr = ISDestroy(&is[i]); CHKERRQ(ierr); } ierr = ISExpandIndicesGeneral(N,N,bs,is_max,is_new,is); CHKERRQ(ierr); for (i=0; i<is_max; i++) { ierr = ISDestroy(&is_new[i]); CHKERRQ(ierr); } ierr = PetscFree(is_new); CHKERRQ(ierr); PetscFunctionReturn(0); }