int main(int argc,char *argv[]) { int rank, num_of_processes; int i; MPI_Comm comm = MPI_COMM_WORLD; MPI_Init(&argc,&argv); MPI_Comm_size( comm, &num_of_processes); MPI_Comm_rank( comm, &rank); int localsum = 0; int globalsum = 0; int expectedsum = 0; if(rank == 0) { printf("Checking mpi_scan(sum)... (if you see no output then you are good)\n"); } localsum = do_something(rank, 2); globalsum = 0; MPI_Scan(&localsum,&globalsum,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD); expectedsum = 0; // count upto my rank and verify that that was the return from scan for(i=0; i<rank+1; i++) { expectedsum = expectedsum + do_something(i, 2); } if (globalsum != expectedsum) { printf("ERROR: Expected %d got %d [rank:%d]\n", expectedsum, globalsum, rank); } MPI_Finalize(); }
int main (int argc, char** argv) { int rank, size,i,t,sum; MPI_Status stat; MPI_Request sendreq, recvreq; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); int sendbuf[1]; int recvbuf[1]; int finalbuf[size]; sendbuf[0]=rank+1; MPI_Scan(&sendbuf, &recvbuf, 1, MPI_INT, MPI_PROD, MPI_COMM_WORLD); MPI_Gather(&recvbuf,1,MPI_INT, &finalbuf, 1, MPI_INT, 0, MPI_COMM_WORLD); if(rank==0) { for(int i=0; i<size;i++) { printf("%d: Result %d: %d\n", rank, i,finalbuf[i]); } } }
/* * Compute an array that contains the cumulative sum of objects * on each processor. * * Memory for the vtxdist array is allocated here, * but must be freed by the calling routine. * */ int Zoltan_Get_Distribution(ZZ *zz, int **vtxdist) { int ierr = ZOLTAN_OK, num_obj; char *yo = "Zoltan_Get_Distribution"; num_obj = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &ierr); if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN){ /* Return error code */ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error in Get_Num_Obj."); return (ierr); } *vtxdist = (int *) ZOLTAN_MALLOC((zz->Num_Proc+1)*sizeof(int)); if (num_obj>0){ if (!(*vtxdist)){ /* Not enough memory */ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory."); return ZOLTAN_MEMERR; } } /* Construct *vtxdist[i] = the number of objects on all procs < i. */ /* Scan to compute partial sums of the number of objs */ MPI_Scan (&num_obj, *vtxdist, 1, MPI_INT, MPI_SUM, zz->Communicator); /* Gather data from all procs */ MPI_Allgather (&((*vtxdist)[0]), 1, MPI_INT, &((*vtxdist)[1]), 1, MPI_INT, zz->Communicator); (*vtxdist)[0] = 0; return ZOLTAN_OK; }
Bool MPIStream_SetOffset( Stream* stream, SizeT sizeToWrite, MPI_Comm communicator ) { MPI_Offset offset = 0; int rank; int nproc; unsigned int localSizeToWrite; unsigned int sizePartialSum; if ( stream->_file == NULL ) { return False; } if ( stream->_file->type != MPIFile_Type ) { return False; } MPI_Comm_rank( communicator, &rank ); MPI_Comm_size( communicator, &nproc ); /* Sum up the individual sizeToWrites for processors lower than this one */ localSizeToWrite = sizeToWrite; MPI_Scan( &localSizeToWrite, &sizePartialSum, 1, MPI_UNSIGNED, MPI_SUM, communicator ); /* Now, just subtract the sizeToWrite of current processor to get our start point */ offset = sizePartialSum - localSizeToWrite; MPI_File_seek( *(MPI_File*)stream->_file->fileHandle, offset, MPI_SEEK_SET ); return True; }
/* * Class: mpi_Intracomm * Method: Scan * Signature: (Ljava/lang/Object;ILjava/lang/Object;IILmpi/Datatype;Lmpi/Op;)V */ JNIEXPORT void JNICALL Java_mpi_Intracomm_scan(JNIEnv *env, jobject jthis, jobject sendbuf, jint sendoffset, jobject recvbuf, jint recvoffset, jint count, jobject type, jobject op) { MPI_Comm mpi_comm = (MPI_Comm)((*env)->GetLongField(env,jthis,ompi_java.CommhandleID)) ; MPI_Datatype mpi_type = (MPI_Datatype)((*env)->GetLongField(env,type,ompi_java.DatatypehandleID)) ; int baseType = (*env)->GetIntField(env, type, ompi_java.DatatypebaseTypeID) ; void *sendptr, *recvptr ; void *sbufbase, *rbufbase ; ompi_java_clearFreeList(env) ; recvptr = ompi_java_getBufPtr(&rbufbase, env, recvbuf, baseType, recvoffset) ; sendptr = ompi_java_getBufPtr(&sbufbase, env, sendbuf, baseType, sendoffset) ; MPI_Scan(sendptr, recvptr, count, mpi_type, (MPI_Op)((*env)->GetLongField(env,op,ompi_java.OphandleID)), mpi_comm) ; ompi_java_releaseBufPtr(env, sendbuf, sbufbase, baseType) ; ompi_java_releaseBufPtr(env, recvbuf, rbufbase, baseType) ; }
int SAMRAI_MPI::Scan( void* sendbuf, void* recvbuf, int count, Datatype datatype, Op op) const { #ifndef HAVE_MPI NULL_USE(sendbuf); NULL_USE(recvbuf); NULL_USE(count); NULL_USE(datatype); NULL_USE(op); #endif int rval = MPI_SUCCESS; if (!s_mpi_is_initialized) { TBOX_ERROR("SAMRAI_MPI::Scan is a no-op without run-time MPI!"); } #ifdef HAVE_MPI else { rval = MPI_Scan(sendbuf, recvbuf, count, datatype, op, d_comm); } #endif return rval; }
int main(int argc, char *argv[]) { int root = 0; int processCount; int currentRank; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD,&processCount); MPI_Comm_rank(MPI_COMM_WORLD,¤tRank); int reduce = currentRank; int reduce2 = currentRank; int reduce3 = 0; MPI_Scan(¤tRank,&reduce,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD); printf("Scan: process %d: reduce = %d\n", currentRank, reduce); MPI_Exscan(¤tRank,&reduce2,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD); printf("Exscan: process %d: reduce = %d\n", currentRank, reduce2); MPI_Reduce(¤tRank,&reduce3,1,MPI_INT,MPI_SUM, 0, MPI_COMM_WORLD); if(currentRank==0) printf("Reduce: process %d: reduce = %d\n", currentRank, reduce3); MPI_Finalize(); return 0; }
/* Completes particle distribution */ void finishParticlesInitialization(uint64_t n, particle_t *p) { double x_coord, y_coord, rel_x, rel_y, cos_theta, cos_phi, r1_sq, r2_sq, base_charge, ID; uint64_t x, pi, cumulative_count; MPI_Scan(&n, &cumulative_count, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); ID = (double) (cumulative_count - n + 1); int my_ID; MPI_Comm_rank(MPI_COMM_WORLD, &my_ID); for (pi=0; pi<n; pi++) { x_coord = p[pi].x; y_coord = p[pi].y; rel_x = fmod(x_coord,1.0); rel_y = fmod(y_coord,1.0); x = (uint64_t) x_coord; r1_sq = rel_y * rel_y + rel_x * rel_x; r2_sq = rel_y * rel_y + (1.0-rel_x) * (1.0-rel_x); cos_theta = rel_x/sqrt(r1_sq); cos_phi = (1.0-rel_x)/sqrt(r2_sq); base_charge = 1.0 / ((DT*DT) * Q * (cos_theta/r1_sq + cos_phi/r2_sq)); p[pi].v_x = 0.0; p[pi].v_y = ((double) p[pi].m) / DT; /* this particle charge assures movement in positive x-direction */ p[pi].q = (x%2 == 0) ? (2*p[pi].k+1)*base_charge : -1.0 * (2*p[pi].k+1)*base_charge ; p[pi].x0 = x_coord; p[pi].y0 = y_coord; p[pi].ID = ID; ID += 1.0; } }
int partial_sum_to_all(int in) { int out = in; #ifdef HAVE_MPI MPI_Scan(&in,&out,1,MPI_INT,MPI_SUM,mycomm); #endif return out; }
FORT_DLL_SPEC void FORT_CALL mpi_scan_ ( void*v1, void*v2, MPI_Fint *v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *ierr ){ #ifndef HAVE_MPI_F_INIT_WORKS_WITH_C if (MPIR_F_NeedInit){ mpirinitf_(); MPIR_F_NeedInit = 0; } #endif if (v1 == MPIR_F_MPI_IN_PLACE) v1 = MPI_IN_PLACE; *ierr = MPI_Scan( v1, v2, *v3, (MPI_Datatype)(*v4), *v5, (MPI_Comm)(*v6) ); }
value caml_mpi_scan_int(value data, value op, value comm) { long d = Long_val(data); long r; MPI_Scan(&d, &r, 1, MPI_LONG, reduce_intop[Int_val(op)], Comm_val(comm)); return Val_long(r); }
value caml_mpi_scan_float(value data, value op, value comm) { double d = Double_val(data), r; MPI_Scan(&d, &r, 1, MPI_DOUBLE, reduce_floatop[Int_val(op)], Comm_val(comm)); return copy_double(r); }
FC_FUNC( mpi_scan , MPI_SCAN) ( void *sendbuf, void *recvbuf, int *count, int *datatype, int *op, int *comm, int *ierror) { *ierror=MPI_Scan( sendbuf, recvbuf, *count, *datatype, *op, *comm); }
static void scan(const communicator& comm, const T& in, T& out, const Op&) { MPI_Scan(Datatype::address(const_cast<T&>(in)), Datatype::address(out), Datatype::count(in), Datatype::datatype(), detail::mpi_op<Op>::get(), comm); }
void dummy_operations::run_collective_dummy_operations() { int rank, size; MPI_Comm_rank( MPI_COMM_WORLD, &rank); MPI_Comm_size( MPI_COMM_WORLD, &size); // Run Broadcast { int x; MPI_Comm_rank( MPI_COMM_WORLD, &x); MPI_Bcast(&x, 1, MPI_INT, 0, MPI_COMM_WORLD); } // Run Allgather. { int x, size; MPI_Comm_rank( MPI_COMM_WORLD, &x); MPI_Comm_size( MPI_COMM_WORLD, &size); std::vector<int> rcv(size); MPI_Allgather(&x, 1, MPI_INT, &rcv[0], 1, MPI_INT, MPI_COMM_WORLD); } // Run Allreduce. { int x; MPI_Comm_rank( MPI_COMM_WORLD, &x); int y = 0; MPI_Allreduce(&x, &y, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); } // Dummy Prefix Sum { int x = 1; int y = 0; MPI_Scan(&x, &y, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); } // Run Alltoallv. { std::vector<int> snd(size); std::vector<int> rcv(size); std::vector<int> scounts(size, 1); std::vector<int> rcounts(size, 1); std::vector<int> sdispls(size); std::vector<int> rdispls(size); for (int i = 0, iend = sdispls.size(); i < iend; ++i) { sdispls[i] = rdispls[i] = i; } MPI_Alltoallv(&snd[0], &scounts[0], &sdispls[0], MPI_INT, &rcv[0], &rcounts[0], &rdispls[0], MPI_INT, MPI_COMM_WORLD); } }
HYPRE_Int hypre_MPI_Scan( void *sendbuf, void *recvbuf, HYPRE_Int count, hypre_MPI_Datatype datatype, hypre_MPI_Op op, hypre_MPI_Comm comm ) { return (HYPRE_Int) MPI_Scan(sendbuf, recvbuf, (hypre_int)count, datatype, op, comm); }
/** * @brief Wrapper around MPI_Scan * * We check the error code to detect MPI errors and use the default communicator * MPI_WORLD. * * @param sendbuf Buffer that is being sent * @param recvbuf Buffer to receive in * @param count Number of elements to be sent * @param datatype MPI datatype of the elements * @param op Global reduce operation */ inline void MyMPI_Scan(void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op) { MPIGlobal::commtimer.start(); int status = MPI_Scan(sendbuf, recvbuf, count, datatype, op, MPI_COMM_WORLD); if(status != MPI_SUCCESS) { std::cerr << "Error during MPI_Scan!" << std::endl; my_exit(); } MPIGlobal::commtimer.stop(); }
value caml_mpi_scan_floatarray(value data, value result, value op, value comm) { mlsize_t len = Wosize_val(data) / Double_wosize; double * d = caml_mpi_input_floatarray(data, len); double * res = caml_mpi_output_floatarray(result, len); MPI_Scan(d, res, len, MPI_DOUBLE, reduce_floatop[Int_val(op)], Comm_val(comm)); caml_mpi_free_floatarray(d); caml_mpi_commit_floatarray(res, result, len); return Val_unit; }
/** Based on shared node communication list, compute FEM_NODE FEM_GLOBALNO and FEM_NODE_PRIMARY */ CDECL void FEM_Make_node_globalno(int fem_mesh,FEM_Comm_t comm_context) { const char *caller="FEM_Make_node_globalno"; FEMAPI(caller); FEM_Mesh *m=FEM_Mesh_lookup(fem_mesh,caller); int n, nNo=m->node.size(); const IDXL_Side &shared=m->node.shared; CkVec<int> globalNo(nNo); CkVec<char> nodePrimary(nNo); // Figure out how each of our nodes is shared int nLocal=0; for (n=0;n<nNo;n++) { switch (commState(n,shared)) { case comm_unshared: nodePrimary[n]=0; globalNo[n]=nLocal++; break; case comm_shared: nodePrimary[n]=0; globalNo[n]=-1; // will be filled in during sendsum, below break; case comm_primary: nodePrimary[n]=1; globalNo[n]=nLocal++; break; }; } // Compute global numbers across processors // as the sum of local (unshared and primary) nodes: MPI_Comm comm=(MPI_Comm)comm_context; int firstGlobal=0; // global number of first local element MPI_Scan(&nLocal,&firstGlobal, 1,MPI_INT, MPI_SUM,comm); firstGlobal-=nLocal; /* sum of all locals before me, but *not* including */ for (n=0;n<nNo;n++) { if (globalNo[n]==-1) globalNo[n]=0; else globalNo[n]+=firstGlobal; } // Get globalNo for shared nodes, by copying from primary. IDXL_Layout_t l=IDXL_Layout_create(IDXL_INT,1); IDXL_Comm_t c=IDXL_Comm_begin(72173841,comm_context); IDXL_Comm_sendsum(c,FEM_Comm_shared(fem_mesh,FEM_NODE),l,&globalNo[0]); IDXL_Comm_wait(c); IDXL_Layout_destroy(l); // Copy globalNo and primary into fem FEM_Mesh_set_data(fem_mesh,FEM_NODE, FEM_GLOBALNO, &globalNo[0], 0,nNo, FEM_INDEX_0,1); FEM_Mesh_set_data(fem_mesh,FEM_NODE, FEM_NODE_PRIMARY, &nodePrimary[0], 0,nNo, FEM_BYTE,1); }
/*@C ISColoringGetIS - Extracts index sets from the coloring context Collective on ISColoring Input Parameter: . iscoloring - the coloring context Output Parameters: + nn - number of index sets in the coloring context - is - array of index sets Level: advanced .seealso: ISColoringRestoreIS(), ISColoringView() @*/ PetscErrorCode ISColoringGetIS(ISColoring iscoloring,PetscInt *nn,IS *isis[]) { PetscErrorCode ierr; PetscFunctionBegin; PetscValidPointer(iscoloring,1); if (nn) *nn = iscoloring->n; if (isis) { if (!iscoloring->is) { PetscInt *mcolors,**ii,nc = iscoloring->n,i,base, n = iscoloring->N; ISColoringValue *colors = iscoloring->colors; IS *is; #if defined(PETSC_USE_DEBUG) for (i=0; i<n; i++) { if (((PetscInt)colors[i]) >= nc) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Coloring is our of range index %d value %d number colors %d",(int)i,(int)colors[i],(int)nc); } #endif /* generate the lists of nodes for each color */ ierr = PetscMalloc(nc*sizeof(PetscInt),&mcolors);CHKERRQ(ierr); ierr = PetscMemzero(mcolors,nc*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<n; i++) mcolors[colors[i]]++; ierr = PetscMalloc(nc*sizeof(PetscInt*),&ii);CHKERRQ(ierr); ierr = PetscMalloc(n*sizeof(PetscInt),&ii[0]);CHKERRQ(ierr); for (i=1; i<nc; i++) ii[i] = ii[i-1] + mcolors[i-1]; ierr = PetscMemzero(mcolors,nc*sizeof(PetscInt));CHKERRQ(ierr); if (iscoloring->ctype == IS_COLORING_GLOBAL) { ierr = MPI_Scan(&iscoloring->N,&base,1,MPIU_INT,MPI_SUM,iscoloring->comm);CHKERRQ(ierr); base -= iscoloring->N; for (i=0; i<n; i++) ii[colors[i]][mcolors[colors[i]]++] = i + base; /* global idx */ } else if (iscoloring->ctype == IS_COLORING_GHOSTED) { for (i=0; i<n; i++) ii[colors[i]][mcolors[colors[i]]++] = i; /* local idx */ } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this ISColoringType type"); ierr = PetscMalloc(nc*sizeof(IS),&is);CHKERRQ(ierr); for (i=0; i<nc; i++) { ierr = ISCreateGeneral(iscoloring->comm,mcolors[i],ii[i],PETSC_COPY_VALUES,is+i);CHKERRQ(ierr); } iscoloring->is = is; ierr = PetscFree(ii[0]);CHKERRQ(ierr); ierr = PetscFree(ii);CHKERRQ(ierr); ierr = PetscFree(mcolors);CHKERRQ(ierr); } *isis = iscoloring->is; } PetscFunctionReturn(0); }
size_t findUniqueGidsCommon( size_t num_keys, int num_gid, ZOLTAN_ID_PTR ddkeys, char *ddnewgids, MPI_Comm mpicomm ) { int num_lid = 0; // Local IDs not needed int debug_level = 0; int num_user = sizeof(gno_t); Zoltan_DD_Struct *dd = NULL; Zoltan_DD_Create(&dd, mpicomm, num_gid, num_lid, num_user, num_keys, debug_level); ZOLTAN_ID_PTR ddnotneeded = NULL; // Local IDs not needed Zoltan_DD_Update(dd, ddkeys, ddnotneeded, ddnewgids, NULL, int(num_keys)); ////////// // Insert unique GIDs for DD entries in User data here. // Get value of first gid on this rank ssize_t nDDEntries = (ssize_t)(dd->nodecnt); ssize_t firstIdx; MPI_Scan(&nDDEntries, &firstIdx, 1, MPI_LONG_LONG, MPI_SUM, mpicomm); firstIdx -= nDDEntries; // do not include this rank's entries in prefix sum // Loop over all directory entries, updating their userdata with updated gid DD_NodeIdx cnt = 0; for (DD_NodeIdx i = 0; i < dd->nodelistlen; i++) { DD_Node *ptr = &(dd->nodelist[i]); if (!(ptr->free)) { char *userchar = (char*)(ptr->gid + (dd->gid_length + dd->lid_length)); gno_t *newgid = (gno_t*) userchar; *newgid = gno_t(firstIdx + cnt); cnt++; } } /////////// // Retrieve the global numbers and put in the result gids vector Zoltan_DD_Find(dd, ddkeys, ddnotneeded, ddnewgids, NULL, int(num_keys), NULL); Zoltan_DD_Destroy(&dd); ssize_t nUnique = 0; MPI_Allreduce(&nDDEntries, &nUnique, 1, MPI_LONG_LONG, MPI_SUM, mpicomm); return size_t(nUnique); }
void MpiComm<Ordinal>::scan( const ValueTypeReductionOp<Ordinal,char> &reductOp ,const Ordinal bytes, const char sendBuffer[], char scanReducts[] ) const { TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::scan(...)" ); MpiReductionOpSetter op(mpiReductionOp(rcp(&reductOp,false))); MPI_Scan( const_cast<char*>(sendBuffer),scanReducts,bytes,MPI_CHAR,op.mpi_op() ,*rawMpiComm_ ); }
/*@C PetscObjectsListGetGlobalNumbering - computes a global numbering of PetscObjects living on subcommunicators of a given communicator. Collective on comm. Input Parameters: + comm - MPI_Comm . len - local length of objlist - objlist - a list of PETSc objects living on subcomms of comm and containing this comm rank (subcomm ordering is assumed to be deadlock-free) Output Parameters: + count - global number of distinct subcommunicators on objlist (may be > len) - numbering - global numbers of objlist entries (allocated by user) Level: developer Concepts: MPI subcomm^numbering @*/ PetscErrorCode PetscObjectsListGetGlobalNumbering(MPI_Comm comm, PetscInt len, PetscObject *objlist, PetscInt *count, PetscInt *numbering) { PetscErrorCode ierr; PetscInt i, roots, offset; PetscMPIInt size, rank; PetscFunctionBegin; PetscValidPointer(objlist,3); if (!count && !numbering) PetscFunctionReturn(0); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); roots = 0; for (i = 0; i < len; ++i) { PetscMPIInt srank; ierr = MPI_Comm_rank(objlist[i]->comm, &srank);CHKERRQ(ierr); /* Am I the root of the i-th subcomm? */ if (!srank) ++roots; } if (count) { /* Obtain the sum of all roots -- the global number of distinct subcomms. */ ierr = MPI_Allreduce(&roots,count,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); } if (numbering){ /* Introduce a global numbering for subcomms, initially known only by subcomm roots. */ /* At each subcomm root number all of the subcomms it owns locally and make it global by calculating the shift among all of the roots. The roots are ordered using the comm ordering. */ ierr = MPI_Scan(&roots,&offset,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); offset -= roots; /* Now we are ready to broadcast global subcomm numbers within each subcomm.*/ /* This is where the assumption of a deadlock-free ordering of the subcomms is assumed: broadcast is collective on the subcomm. */ roots = 0; for (i = 0; i < len; ++i) { PetscMPIInt srank; numbering[i] = offset + roots; /* only meaningful if !srank. */ ierr = MPI_Comm_rank(objlist[i]->comm, &srank);CHKERRQ(ierr); ierr = MPI_Bcast(numbering+i,1,MPIU_INT,0,objlist[i]->comm);CHKERRQ(ierr); if (!srank) ++roots; } } PetscFunctionReturn(0); }
void calc_global_node_ids(long long * globalNodeIds, bool * nodeIsOwned, long long numNodes, long long num_node_comm_maps, long long * node_cmap_node_cnts, long long * node_comm_proc_ids, long long * * comm_node_ids, int rank) /*******************************************************************************/ { for(long long i = 0; i < numNodes; i ++){ globalNodeIds[i] = 1l; nodeIsOwned[i] = true; } for(long long j = 0; j < num_node_comm_maps; j++) { for(long long k = 0; k < node_cmap_node_cnts[j] ; k ++){ if(node_comm_proc_ids[j] < rank){ globalNodeIds[comm_node_ids[j][k]-1] = -1; nodeIsOwned[comm_node_ids[j][k]-1] = false; } } } long long num_unique_nodes = 0; for(long long i = 0 ; i < numNodes; i ++)if(globalNodeIds[i] == 1l)num_unique_nodes ++; long long start_id = 0; #ifdef HAVE_MPI MPI_Scan(&num_unique_nodes,&start_id,1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); start_id -= num_unique_nodes; #endif int num_assigned = 0; for(long long i = 0 ; i < numNodes; i ++)if(globalNodeIds[i] == 1l){ globalNodeIds[i] = num_assigned + start_id; num_assigned ++; } //Conforms global nodal ids Conform_Boundary_IDS(comm_node_ids, node_cmap_node_cnts, node_comm_proc_ids, globalNodeIds, num_node_comm_maps, rank); }
value caml_mpi_scan_intarray(value data, value result, value op, value comm) { mlsize_t len = Wosize_val(data); /* Decode data at all nodes in place */ caml_mpi_decode_intarray(data, len); /* Do the scan */ MPI_Scan(&Field(data, 0), &Field(result, 0), len, MPI_LONG, reduce_intop[Int_val(op)], Comm_val(comm)); /* Re-encode data at all nodes in place */ caml_mpi_encode_intarray(data, len); /* Encode result */ caml_mpi_encode_intarray(result, len); return Val_unit; }
int main(int argc, char **argv) { int nproc, id, a1=0, a2=0; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &id); a1=id; MPI_Scan(&a1, &a2, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); printf("my id: %d my a1: %d my a2: %d\n", id, a1, a2); MPI_Finalize(); return 0; }
/*@ ISPartitioningToNumbering - Takes an ISPartitioning and on each processor generates an IS that contains a new global node number for each index based on the partitioing. Collective on IS Input Parameters . partitioning - a partitioning as generated by MatPartitioningApply() Output Parameter: . is - on each processor the index set that defines the global numbers (in the new numbering) for all the nodes currently (before the partitioning) on that processor Level: advanced .seealso: MatPartitioningCreate(), AOCreateBasic(), ISPartitioningCount() @*/ PetscErrorCode ISPartitioningToNumbering(IS part,IS *is) { MPI_Comm comm; PetscInt i,np,npt,n,*starts = NULL,*sums = NULL,*lsizes = NULL,*newi = NULL; const PetscInt *indices = NULL; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)part,&comm);CHKERRQ(ierr); /* count the number of partitions, i.e., virtual processors */ ierr = ISGetLocalSize(part,&n);CHKERRQ(ierr); ierr = ISGetIndices(part,&indices);CHKERRQ(ierr); np = 0; for (i=0; i<n; i++) np = PetscMax(np,indices[i]); ierr = MPI_Allreduce(&np,&npt,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); np = npt+1; /* so that it looks like a MPI_Comm_size output */ /* lsizes - number of elements of each partition on this particular processor sums - total number of "previous" nodes for any particular partition starts - global number of first element in each partition on this processor */ ierr = PetscMalloc3(np,&lsizes,np,&starts,np,&sums);CHKERRQ(ierr); ierr = PetscMemzero(lsizes,np*sizeof(PetscInt));CHKERRQ(ierr); for (i=0; i<n; i++) lsizes[indices[i]]++; ierr = MPI_Allreduce(lsizes,sums,np,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); ierr = MPI_Scan(lsizes,starts,np,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); for (i=0; i<np; i++) starts[i] -= lsizes[i]; for (i=1; i<np; i++) { sums[i] += sums[i-1]; starts[i] += sums[i-1]; } /* For each local index give it the new global number */ ierr = PetscMalloc1(n,&newi);CHKERRQ(ierr); for (i=0; i<n; i++) newi[i] = starts[indices[i]]++; ierr = PetscFree3(lsizes,starts,sums);CHKERRQ(ierr); ierr = ISRestoreIndices(part,&indices);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,n,newi,PETSC_OWN_POINTER,is);CHKERRQ(ierr); ierr = ISSetPermutation(*is);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode ISInvertPermutation_General(IS is,PetscInt nlocal,IS *isout) { IS_General *sub = (IS_General*)is->data; PetscInt i,*ii,n,nstart; const PetscInt *idx = sub->idx; PetscMPIInt size; IS istmp,nistmp; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscLayoutGetLocalSize(is->map, &n);CHKERRQ(ierr); ierr = MPI_Comm_size(PetscObjectComm((PetscObject)is),&size);CHKERRQ(ierr); if (size == 1) { ierr = PetscMalloc1(n,&ii);CHKERRQ(ierr); for (i=0; i<n; i++) ii[idx[i]] = i; ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ii,PETSC_OWN_POINTER,isout);CHKERRQ(ierr); ierr = ISSetPermutation(*isout);CHKERRQ(ierr); } else { /* crude, nonscalable get entire IS on each processor */ if (nlocal == PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Do not yet support nlocal of PETSC_DECIDE"); ierr = ISAllGather(is,&istmp);CHKERRQ(ierr); ierr = ISSetPermutation(istmp);CHKERRQ(ierr); ierr = ISInvertPermutation(istmp,PETSC_DECIDE,&nistmp);CHKERRQ(ierr); ierr = ISDestroy(&istmp);CHKERRQ(ierr); /* get the part we need */ ierr = MPI_Scan(&nlocal,&nstart,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)is));CHKERRQ(ierr); #if defined(PETSC_USE_DEBUG) { PetscInt N; PetscMPIInt rank; ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)is),&rank);CHKERRQ(ierr); ierr = PetscLayoutGetSize(is->map, &N);CHKERRQ(ierr); if (rank == size-1) { if (nstart != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of nlocal lengths %d != total IS length %d",nstart,N); } } #endif nstart -= nlocal; ierr = ISGetIndices(nistmp,&idx);CHKERRQ(ierr); ierr = ISCreateGeneral(PetscObjectComm((PetscObject)is),nlocal,idx+nstart,PETSC_COPY_VALUES,isout);CHKERRQ(ierr); ierr = ISRestoreIndices(nistmp,&idx);CHKERRQ(ierr); ierr = ISDestroy(&nistmp);CHKERRQ(ierr); } PetscFunctionReturn(0); }
void WriteCombinedParallelFile(ug::BinaryBuffer &buffer, std::string strFilename, pcl::ProcessCommunicator pc) { MPI_Status status; MPI_Comm m_mpiComm = pc.get_mpi_communicator(); MPI_File fh; bool bFirst = pc.get_proc_id(0) == pcl::ProcRank(); char filename[1024]; strcpy(filename, strFilename.c_str()); if(MPI_File_open(m_mpiComm, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh)) UG_THROW("could not open "<<filename); int mySize = buffer.write_pos(); int myNextOffset = 0; MPI_Scan(&mySize, &myNextOffset, 1, MPI_INT, MPI_SUM, m_mpiComm); std::vector<int> allNextOffsets; allNextOffsets.resize(pc.size(), 0); //else allNextOffsets.resize(1); myNextOffset += (pc.size()+1)*sizeof(int); MPI_Gather(&myNextOffset, 1, MPI_INT, &allNextOffsets[0], 1, MPI_INT, pc.get_proc_id(0), m_mpiComm); if(bFirst) { int numProcs = pcl::NumProcs(); MPI_File_write(fh, &numProcs, sizeof(numProcs), MPI_BYTE, &status); for(size_t i=0; i<allNextOffsets.size(); i++) { // UG_LOG("allNextOffsets[" << i << "] = " << allNextOffsets[i] << "\n"); MPI_File_write(fh, &allNextOffsets[i], sizeof(allNextOffsets[i]), MPI_BYTE, &status); } } int myOffset = myNextOffset - buffer.write_pos(); MPI_File_seek(fh, myOffset, MPI_SEEK_SET); // UG_LOG_ALL_PROCS("MySize = " << mySize << "\n" << " myOffset = " << myOffset << "\n"); // UG_LOG_ALL_PROCS("buffer.write_pos() = " << buffer.write_pos() << "\n" << "(pc.size()+1)*sizeof(size_t) = " << (pc.size()+1)*sizeof(size_t) << "\n"); MPI_File_write(fh, buffer.buffer(), buffer.write_pos(), MPI_BYTE, &status); MPI_File_close(&fh); }
int main(int argc,char **argv) { PetscErrorCode ierr; PetscMPIInt rank,size; PetscInt n,*ispetsc,*isapp,start,N,i; AO ao; ierr = PetscInitialize(&argc,&argv,(char*)0,help);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); n = rank + 2; ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); /* create the orderings */ ierr = PetscMalloc2(n,&ispetsc,n,&isapp);CHKERRQ(ierr); ierr = MPI_Scan(&n,&start,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);CHKERRQ(ierr); ierr = MPI_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);CHKERRQ(ierr); start -= n; for (i=0; i<n; i++) { ispetsc[i] = start + i; isapp[i] = N - start - i - 1; } /* create the application ordering */ ierr = AOCreateBasic(PETSC_COMM_WORLD,n,isapp,ispetsc,&ao);CHKERRQ(ierr); ierr = AOView(ao,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); /* check the mapping */ ierr = AOPetscToApplication(ao,n,ispetsc);CHKERRQ(ierr); for (i=0; i<n; i++) { if (ispetsc[i] != isapp[i]) { ierr = PetscPrintf(PETSC_COMM_WORLD,"[%d] Problem with mapping %D to %D\n",rank,i,ispetsc[i]); } } ierr = PetscFree2(ispetsc,isapp);CHKERRQ(ierr); ierr = AODestroy(&ao);CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }