/*@C PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank Not Collective Input Arguments: + sf - star forest - unit - data type for each node Output Arguments: + localtypes - types describing part of local leaf buffer referencing each remote rank - remotetypes - types describing part of remote root buffer referenced for each remote rank Level: developer .seealso: PetscSFSetGraph(), PetscSFView() @*/ static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes) { PetscSF_Window *w = (PetscSF_Window*)sf->data; PetscErrorCode ierr; PetscSFDataLink link; PetscInt i,nranks; const PetscInt *roffset,*rmine,*rremote; const PetscMPIInt *ranks; PetscFunctionBegin; /* Look for types in cache */ for (link=w->link; link; link=link->next) { PetscBool match; ierr = MPIPetsc_Type_compare(unit,link->unit,&match);CHKERRQ(ierr); if (match) { *localtypes = link->mine; *remotetypes = link->remote; PetscFunctionReturn(0); } } /* Create new composite types for each send rank */ ierr = PetscSFGetRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); ierr = PetscMalloc(sizeof(*link),&link);CHKERRQ(ierr); ierr = MPI_Type_dup(unit,&link->unit);CHKERRQ(ierr); ierr = PetscMalloc2(nranks,&link->mine,nranks,&link->remote);CHKERRQ(ierr); for (i=0; i<nranks; i++) { PETSC_UNUSED PetscInt rcount = roffset[i+1] - roffset[i]; PetscMPIInt *rmine,*rremote; #if !defined(PETSC_USE_64BIT_INDICES) rmine = sf->rmine + sf->roffset[i]; rremote = sf->rremote + sf->roffset[i]; #else PetscInt j; ierr = PetscMalloc2(rcount,&rmine,rcount,&rremote);CHKERRQ(ierr); for (j=0; j<rcount; j++) { ierr = PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);CHKERRQ(ierr); ierr = PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);CHKERRQ(ierr); } #endif ierr = MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);CHKERRQ(ierr); ierr = MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);CHKERRQ(ierr); #if defined(PETSC_USE_64BIT_INDICES) ierr = PetscFree2(rmine,rremote);CHKERRQ(ierr); #endif ierr = MPI_Type_commit(&link->mine[i]);CHKERRQ(ierr); ierr = MPI_Type_commit(&link->remote[i]);CHKERRQ(ierr); } link->next = w->link; w->link = link; *localtypes = link->mine; *remotetypes = link->remote; PetscFunctionReturn(0); }
PetscErrorCode DMPlexPreallocateOperator_2(DM dm, PetscInt bs, PetscSection section, PetscSection sectionGlobal, PetscInt dnz[], PetscInt onz[], PetscInt dnzu[], PetscInt onzu[], Mat A, PetscBool fillMatrix) { PetscInt *tmpClosure,*tmpAdj,*visits; PetscInt c,cStart,cEnd,pStart,pEnd; PetscErrorCode ierr; PetscFunctionBegin; ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr); ierr = DMPlexGetMaxSizes(dm, &maxConeSize, &maxSupportSize);CHKERRQ(ierr); maxClosureSize = 2*PetscMax(PetscPowInt(mesh->maxConeSize,depth+1),PetscPowInt(mesh->maxSupportSize,depth+1)); ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr); npoints = pEnd - pStart; ierr = PetscMalloc3(maxClosureSize,&tmpClosure,npoints,&lvisits,npoints,&visits);CHKERRQ(ierr); ierr = PetscMemzero(lvisits,(pEnd-pStart)*sizeof(PetscInt));CHKERRQ(ierr); ierr = PetscMemzero(visits,(pEnd-pStart)*sizeof(PetscInt));CHKERRQ(ierr); ierr = DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr); for (c=cStart; c<cEnd; c++) { PetscInt *support = tmpClosure; ierr = DMPlexGetTransitiveClosure(dm, c, PETSC_FALSE, &supportSize, (PetscInt**)&support);CHKERRQ(ierr); for (p=0; p<supportSize; p++) lvisits[support[p]]++; } ierr = PetscSFReduceBegin(sf,MPIU_INT,lvisits,visits,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd (sf,MPIU_INT,lvisits,visits,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf,MPIU_INT,visits,lvisits);CHKERRQ(ierr); ierr = PetscSFBcastEnd (sf,MPIU_INT,visits,lvisits);CHKERRQ(ierr); ierr = PetscSFGetRanks();CHKERRQ(ierr); ierr = PetscMalloc2(maxClosureSize*maxClosureSize,&cellmat,npoints,&owner);CHKERRQ(ierr); for (c=cStart; c<cEnd; c++) { ierr = PetscMemzero(cellmat,maxClosureSize*maxClosureSize*sizeof(PetscInt));CHKERRQ(ierr); /* Depth-first walk of transitive closure. At each leaf frame f of transitive closure that we see, add 1/visits[f] to each pair (p,q) not marked as done in cellmat. This contribution is added to dnz if owning ranks of p and q match, to onz otherwise. */ } ierr = PetscSFReduceBegin(sf,MPIU_INT,ldnz,dnz,MPI_SUM);CHKERRQ(ierr); ierr = PetscSFReduceEnd (sf,MPIU_INT,lonz,onz,MPI_SUM);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) { PetscSF_Window *w = (PetscSF_Window*)sf->data; PetscErrorCode ierr; PetscInt i,nranks; const PetscMPIInt *ranks; const MPI_Datatype *mine,*remote; MPI_Win win; PetscFunctionBegin; ierr = PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr); ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr); ierr = PetscSFGetWindow(sf,unit,(void*)rootdata,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&win);CHKERRQ(ierr); for (i=0; i<nranks; i++) { if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);CHKERRQ(ierr);} ierr = MPI_Get(leafdata,1,mine[i],ranks[i],0,1,remote[i],win);CHKERRQ(ierr); if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr);} } PetscFunctionReturn(0); }
static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) { PetscErrorCode ierr; PetscInt i,nranks; const PetscMPIInt *ranks; const MPI_Datatype *mine,*remote; MPI_Win win; PetscFunctionBegin; ierr = PetscSFGetRanks(sf,&nranks,&ranks,NULL,NULL,NULL);CHKERRQ(ierr); ierr = PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);CHKERRQ(ierr); ierr = PetscSFWindowOpTranslate(&op);CHKERRQ(ierr); ierr = PetscSFGetWindow(sf,unit,rootdata,PETSC_FALSE,0,0,0,&win);CHKERRQ(ierr); for (i=0; i<sf->nranks; i++) { ierr = MPI_Win_lock(MPI_LOCK_EXCLUSIVE,sf->ranks[i],0,win);CHKERRQ(ierr); ierr = MPI_Get(leafupdate,1,mine[i],ranks[i],0,1,remote[i],win);CHKERRQ(ierr); ierr = MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],0,1,remote[i],op,win);CHKERRQ(ierr); ierr = MPI_Win_unlock(ranks[i],win);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection section, PetscSF *sectionSF) { PetscInt numRanks; const PetscInt *ranks, *rankOffsets; const PetscMPIInt *localPoints, *remotePoints; PetscInt numPoints, numIndices = 0; PetscInt *remoteOffsets; PetscInt *localIndices; PetscSFNode *remoteIndices; PetscInt i, r, ind; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscSFGetRanks(sf, &numRanks, &ranks, &rankOffsets, &localPoints, &remotePoints); CHKERRQ(ierr); numPoints = rankOffsets[numRanks]; for (i = 0; i < numPoints; ++i) { PetscInt dof; ierr = PetscSectionGetDof(section, localPoints[i], &dof); CHKERRQ(ierr); numIndices += dof; } /* Communicate offsets for ghosted points */ #if 0 PetscInt *localOffsets; ierr = PetscMalloc2(numPoints,PetscInt,&localOffsets,numPoints,PetscInt,&remoteOffsets); CHKERRQ(ierr); for (i = 0; i < numPoints; ++i) { ierr = PetscSectionGetOffset(section, localPoints[i], &localOffsets[i]); CHKERRQ(ierr); } ierr = PetscSFBcastBegin(sf, MPIU_INT, localOffsets, remoteOffsets); CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf, MPIU_INT, localOffsets, remoteOffsets); CHKERRQ(ierr); for (i = 0; i < numPoints; ++i) { ierr = PetscSynchronizedPrintf(((PetscObject) sf)->comm, "remoteOffsets[%d]: %d\n", i, remoteOffsets[i]); CHKERRQ(ierr); } #else ierr = PetscMalloc((section->atlasLayout.pEnd - section->atlasLayout.pStart) * sizeof(PetscInt), &remoteOffsets); CHKERRQ(ierr); ierr = PetscSFBcastBegin(sf, MPIU_INT, §ion->atlasOff[-section->atlasLayout.pStart], &remoteOffsets[-section->atlasLayout.pStart]); CHKERRQ(ierr); ierr = PetscSFBcastEnd(sf, MPIU_INT, §ion->atlasOff[-section->atlasLayout.pStart], &remoteOffsets[-section->atlasLayout.pStart]); CHKERRQ(ierr); for (i = section->atlasLayout.pStart; i < section->atlasLayout.pEnd; ++i) { ierr = PetscSynchronizedPrintf(((PetscObject) sf)->comm, "remoteOffsets[%d]: %d\n", i, remoteOffsets[i-section->atlasLayout.pStart]); CHKERRQ(ierr); } #endif ierr = PetscSynchronizedFlush(((PetscObject) sf)->comm); CHKERRQ(ierr); ierr = PetscMalloc(numIndices * sizeof(PetscInt), &localIndices); CHKERRQ(ierr); ierr = PetscMalloc(numIndices * sizeof(PetscSFNode), &remoteIndices); CHKERRQ(ierr); /* Create new index graph */ for (r = 0, ind = 0; r < numRanks; ++r) { PetscInt rank = ranks[r]; for (i = rankOffsets[r]; i < rankOffsets[r+1]; ++i) { PetscInt localPoint = localPoints[i]; PetscInt remoteOffset = remoteOffsets[localPoint-section->atlasLayout.pStart]; PetscInt localOffset, dof, d; ierr = PetscSectionGetOffset(section, localPoint, &localOffset); CHKERRQ(ierr); ierr = PetscSectionGetDof(section, localPoint, &dof); CHKERRQ(ierr); for (d = 0; d < dof; ++d, ++ind) { localIndices[ind] = localOffset+d; remoteIndices[ind].rank = rank; remoteIndices[ind].index = remoteOffset+d; } } } ierr = PetscFree(remoteOffsets); CHKERRQ(ierr); if (numIndices != ind) SETERRQ2(((PetscObject) sf)->comm, PETSC_ERR_PLIB, "Inconsistency in indices, %d should be %d", ind, numIndices); ierr = PetscSFCreate(((PetscObject) sf)->comm, sectionSF); CHKERRQ(ierr); ierr = PetscSFSetGraph(*sectionSF, numIndices, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER); CHKERRQ(ierr); ierr = PetscSFView(*sectionSF, NULL); CHKERRQ(ierr); PetscFunctionReturn(0); }