void CmiOutOfMemory(int nBytes) { /* We're out of memory: free up the liferaft memory and abort */ char errMsg[200]; if (memory_lifeRaft) free(memory_lifeRaft); if (nBytes>0) sprintf(errMsg,"Could not malloc() %d bytes--are we out of memory? (used :%.3fMB)",nBytes,CmiMemoryUsage()/1000000.0); else sprintf(errMsg,"Could not malloc()--are we out of memory? (used: %.3fMB)", CmiMemoryUsage()/1000000.0); CmiAbort(errMsg); }
unsigned long memusage(const char **source) { unsigned long memtotal = 0; const char* s = "ERROR"; if ( ! CmiMemoryIs(CMI_MEMORY_IS_OS) ) { memtotal = CmiMemoryUsage(); s = "CmiMemoryUsage"; } #if CMK_BLUEGENEQ if( ! memtotal) { memtotal = memusage_bgq(); s="Kernel_GetMemorySize on BG/Q"; } #endif #if CMK_BLUEGENEP if( ! memtotal) { memtotal = memusage_bgp(); s="mallinfo on BG/P"; } #endif #if defined(WIN32) && !defined(__CYGWIN__) if ( ! memtotal ) { memtotal = CmiMemoryUsage(); s = "GetProcessMemoryInfo"; } #endif if ( ! memtotal ) { memtotal = memusage_proc_self_stat(); s = "/proc/self/stat"; } if ( ! memtotal ) { memtotal = memusage_mstats(); s = "mstats"; } if ( ! memtotal ) { memtotal = memusage_mallinfo(); s = "mallinfo"; } if ( ! memtotal ) { memtotal = memusageinit::memusage_sbrk(); s = "sbrk"; } if ( ! memtotal ) { memtotal = memusage_ps(); s = "ps"; } if ( ! memtotal ) { memtotal = CmiMemoryUsage(); s = "CmiMemoryUsage"; } if ( ! memtotal ) s = "nothing"; if ( source ) *source = s; return memtotal; }
int FEM_Mesh_Parallel_broadcast(int fem_mesh,int masterRank,FEM_Comm_t comm_context){ int myRank; MPI_Comm_rank((MPI_Comm)comm_context,&myRank); printf("[%d] FEM_Mesh_Parallel_broadcast called for mesh %d\n",myRank,fem_mesh); int new_mesh; if(myRank == masterRank){ //I am the master, i have the element connectivity data and need //to send it to everybody printf("[%d] Memory usage on vp 0 at the begining of partition %d \n",CkMyPe(),CmiMemoryUsage()); new_mesh=FEM_master_parallel_part(fem_mesh,masterRank,comm_context); }else{ new_mesh=FEM_slave_parallel_part(fem_mesh,masterRank,comm_context); } //temp to keep stuff from falling apart MPI_Barrier((MPI_Comm)comm_context); printf("[%d] Partitioned mesh number %d \n",myRank,new_mesh); return new_mesh; }
void CqsDequeue(Queue q, void **resp) { #ifdef ADAPT_SCHED_MEM /* Added by Isaac for testing purposes: */ if((q->length > 1) && (CmiMemoryUsage() > schedAdaptMemThresholdMB*1024*1024) ){ /* CqsIncreasePriorityForEntryMethod(q, 153); */ CqsIncreasePriorityForMemCriticalEntries(q); } #endif if (q->length==0) { *resp = 0; return; } if (q->negprioq.heapnext>1) { *resp = CqsPrioqDequeue(&(q->negprioq)); q->length--; return; } if (q->zeroprio.head != q->zeroprio.tail) { *resp = CqsDeqDequeue(&(q->zeroprio)); q->length--; return; } if (q->posprioq.heapnext>1) { *resp = CqsPrioqDequeue(&(q->posprioq)); q->length--; return; } *resp = 0; return; }
int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context){ const char *caller="FEM_Create_connmsa"; FEMAPI(caller); FEM_chunk *c=FEM_chunk::get(caller); FEM_Mesh *m=c->lookup(fem_mesh,caller); m->setAbsoluteGlobalno(); int nelem = m->nElems(); int numChunks; MPI_Comm_size((MPI_Comm)comm_context,&numChunks); printf("master -> number of elements %d \n",nelem); DEBUG(m->print(0)); /*load the connectivity information into the eptr and eind datastructure. It will be read by the other slave elements and used to call parmetis*/ MSA1DINT eptrMSA(nelem,numChunks); MSA1DINT eindMSA(nelem*10,numChunks); /* after the msa array has been created and loaded with connectivity data tell the slaves about the msa array */ struct conndata data; data.nelem = nelem; data.nnode = m->node.size(); data.arr1 = eptrMSA; data.arr2 = eindMSA; MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context); eptrMSA.enroll(numChunks); eindMSA.enroll(numChunks); MSA1DINT::Write wPtr = eptrMSA.getInitialWrite(); MSA1DINT::Write wInd = eindMSA.getInitialWrite(); int indcount=0,ptrcount=0; for(int t=0;t<m->elem.size();t++){ if(m->elem.has(t)){ FEM_Elem &k=m->elem[t]; for(int e=0;e<k.size();e++){ wPtr.set(ptrcount)=indcount; ptrcount++; for(int n=0;n<k.getNodesPer();n++){ wInd.set(indcount)=k.getConn(e,n); indcount++; } } } } wPtr.set(ptrcount) = indcount; printf("master -> ptrcount %d indcount %d sizeof(MSA1DINT) %d sizeof(MSA1DINTLIST) %d memory %d\n",ptrcount,indcount,sizeof(MSA1DINT),sizeof(MSA1DINTLIST),CmiMemoryUsage()); /* break up the mesh such that each chunk gets the same number of elements and the nodes corresponding to those elements. However this is not the partition. This is just distributing the data, so that when partition is done using parmetis all the requests for data do not go to chunk 0. Instead after partition each chunk can send the element and node data to the chunks that will need it */ FEM_Mesh *mesh_array=FEM_break_mesh(m,ptrcount,numChunks); /* Send the broken up meshes to the different chunks. */ sendBrokenMeshes(mesh_array,comm_context); delete [] mesh_array; FEM_Mesh mypiece; MPI_Recv_pup(mypiece,masterRank,MESH_CHUNK_TAG,(MPI_Comm)comm_context); /* call parmetis */ double parStartTime = CkWallTimer(); MSA1DINT::Read rPtr = wPtr.syncToRead(); MSA1DINT::Read rInd = wInd.syncToRead(); printf("starting FEM_call_parmetis \n"); struct partconndata *partdata = FEM_call_parmetis(data.nelem, rPtr, rInd, comm_context); printf("done with parmetis %d FEM_Mesh %d in %.6lf \n",CmiMemoryUsage(),sizeof(FEM_Mesh),CkWallTimer()-parStartTime); double dataArrangeStartTime = CkWallTimer(); /* Set up a msa to store the partitions to which a node belongs. A node can belong to multiple partitions. */ int totalNodes = m->node.size(); MSA1DINTLIST nodepart(totalNodes,numChunks); MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context); nodepart.enroll(numChunks); MSA1DINTLIST::Accum nodepartAcc = nodepart.getInitialAccum(); FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context); printf("Creating mapping of node to partition took %.6lf\n",CkWallTimer()-dataArrangeStartTime); dataArrangeStartTime = CkWallTimer(); MSA1DINTLIST::Read nodepartRead = nodepartAcc.syncToRead(); /* Set up a msa to store the nodes that belong to a partition */ MSA1DNODELIST part2node(numChunks,numChunks); MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context); part2node.enroll(numChunks); MSA1DNODELIST::Accum part2nodeAcc = part2node.getInitialAccum(); FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context); /* Get the list of elements and nodes that belong to this partition */ MSA1DNODELIST::Read rPart2node = part2nodeAcc.syncToRead(); NodeList lnodes = rPart2node.get(masterRank); lnodes.uniquify(); // IntList lelems = part2elem.get(masterRank); printf("Creating mapping of partition to node took %.6lf\n",CkWallTimer()-dataArrangeStartTime); printf("Time spent doing +=ElemList %.6lf \n",elemlistaccTime); dataArrangeStartTime = CkWallTimer(); /* Build an MSA of FEM_Mesh, with each index containing the mesh for that chunk */ MSA1DFEMMESH part2mesh(numChunks,numChunks); MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context); part2mesh.enroll(numChunks); MSA1DFEMMESH::Accum aPart2mesh = part2mesh.getInitialAccum(); FEM_write_part2mesh(aPart2mesh,partdata, &data,nodepartRead,numChunks,masterRank,&mypiece); /* Get your mesh consisting of elements and nodes out of the mesh MSA */ MSA1DFEMMESH::Read rPart2mesh = aPart2mesh.syncToRead(); MeshElem me = rPart2mesh.get(masterRank); //printf("[%d] Number of elements in my partitioned mesh %d number of nodes %d \n",masterRank,me.m->nElems(),me.m->node.size()); DEBUG(printf("[%d] Memory usage on vp 0 close to max %d \n",CkMyPe(),CmiMemoryUsage())); //Free up the eptr and eind MSA arrays stored in data delete &rPtr; delete &rInd; data.arr1.FreeMem(); data.arr2.FreeMem(); nodepart.FreeMem(); DEBUG(printf("[%d] Memory usage on vp 0 after FreeMem %d \n",CkMyPe(),CmiMemoryUsage())); addIDXLists(me.m,lnodes,masterRank); part2node.FreeMem(); DEBUG(printf("[%d] Memory usage on vp 0 after addIDXL %d \n",CkMyPe(),CmiMemoryUsage())); /* Broadcast the user data to all the meshes */ DEBUG(printf("[%d] Length of udata vector in master %d \n",masterRank,m->udata.size())); MPI_Bcast_pup(m->udata,masterRank,(MPI_Comm)comm_context); me.m->udata = m->udata; delete partdata; printf("[%d] Data Arrangement took %.6lf \n",masterRank,CkWallTimer()-dataArrangeStartTime); /* collect the ghost data and send it to all the chunks. */ struct ghostdata *gdata = gatherGhosts(); DEBUG(printf("[%d] number of ghost layers %d \n",masterRank,gdata->numLayers)); MPI_Bcast_pup(*gdata,masterRank,(MPI_Comm)comm_context); /* make ghosts for this mesh */ printf("[%d] Starting to generate number of ghost layers %d \n",masterRank,gdata->numLayers); double _startTime = CkWallTimer(); makeGhosts(me.m,(MPI_Comm)comm_context,masterRank,gdata->numLayers,gdata->layers); delete gdata; printf("[%d] Ghost generation took %.6lf \n",masterRank,CkWallTimer()-_startTime); me.m->becomeGetting(); FEM_chunk *chunk = FEM_chunk::get("FEM_Mesh_Parallel_broadcast"); int tempMeshNo = chunk->meshes.put(me.m); int new_mesh = FEM_Mesh_copy(tempMeshNo); FEM_Mesh *nmesh = c->lookup(new_mesh,"master_parallel_broadcast"); DEBUG(printf("[%d] Length of udata vector in master new_mesh %d \n",masterRank,nmesh->udata.size())); part2mesh.FreeMem(); printf("[%d] Max Memory usage on vp 0 at end of parallel partition %d \n",CkMyPe(),CmiMaxMemoryUsage()); return new_mesh; }
void CentralLB::LoadBalance() { #if CMK_LBDB_ON int proc; const int clients = CkNumPes(); #if ! USE_REDUCTION // build data buildStats(); #else for (proc = 0; proc < clients; proc++) statsMsgsList[proc] = NULL; #endif theLbdb->ResetAdaptive(); if (!_lb_args.samePeSpeed()) statsData->normalize_speed(); if (_lb_args.debug()) CmiPrintf("\nCharmLB> %s: PE [%d] step %d starting at %f Memory: %f MB\n", lbname, cur_ld_balancer, step(), start_lb_time, CmiMemoryUsage()/(1024.0*1024.0)); // if we are in simulation mode read data if (LBSimulation::doSimulation) simulationRead(); char *availVector = LBDatabaseObj()->availVector(); for(proc = 0; proc < clients; proc++) statsData->procs[proc].available = (CmiBool)availVector[proc]; preprocess(statsData); // CkPrintf("Before Calling Strategy\n"); if (_lb_args.printSummary()) { LBInfo info(clients); // not take comm data info.getInfo(statsData, clients, 0); LBRealType mLoad, mCpuLoad, totalLoad; info.getSummary(mLoad, mCpuLoad, totalLoad); int nmsgs, nbytes; statsData->computeNonlocalComm(nmsgs, nbytes); CkPrintf("[%d] Load Summary (before LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024); // if (_lb_args.debug() > 1) { // for (int i=0; i<statsData->n_objs; i++) // CmiPrintf("[%d] %.10f %.10f\n", i, statsData->objData[i].minWall, statsData->objData[i].maxWall); // } } #if CMK_REPLAYSYSTEM LDHandle *loadBalancer_pointers; if (_replaySystem) { loadBalancer_pointers = (LDHandle*)malloc(CkNumPes()*sizeof(LDHandle)); for (int i=0; i<statsData->n_objs; ++i) loadBalancer_pointers[statsData->from_proc[i]] = statsData->objData[i].handle.omhandle.ldb; } #endif LBMigrateMsg* migrateMsg = Strategy(statsData); #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) migrateMsg->step = step(); #endif #if CMK_REPLAYSYSTEM CpdHandleLBMessage(&migrateMsg); if (_replaySystem) { for (int i=0; i<migrateMsg->n_moves; ++i) migrateMsg->moves[i].obj.omhandle.ldb = loadBalancer_pointers[migrateMsg->moves[i].from_pe]; free(loadBalancer_pointers); } #endif LBDatabaseObj()->get_avail_vector(migrateMsg->avail_vector); migrateMsg->next_lb = LBDatabaseObj()->new_lbbalancer(); // if this is the step at which we need to dump the database simulationWrite(); // calculate predicted load // very time consuming though, so only happen when debugging is on if (_lb_args.printSummary()) { LBInfo info(clients); // not take comm data getPredictedLoadWithMsg(statsData, clients, migrateMsg, info, 0); LBRealType mLoad, mCpuLoad, totalLoad; info.getSummary(mLoad, mCpuLoad, totalLoad); int nmsgs, nbytes; statsData->computeNonlocalComm(nmsgs, nbytes); CkPrintf("[%d] Load Summary (after LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB useMem: %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024, (1.0*useMem())/1024); for (int i=0; i<clients; i++) migrateMsg->expectedLoad[i] = info.peLoads[i]; } DEBUGF(("[%d]calling recv migration\n",CkMyPe())); #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) lbDecisionCount++; migrateMsg->lbDecisionCount = lbDecisionCount; #endif envelope *env = UsrToEnv(migrateMsg); if (1) { // broadcast thisProxy.ReceiveMigration(migrateMsg); } else { // split the migration for each processor for (int p=0; p<CkNumPes(); p++) { LBMigrateMsg *m = extractMigrateMsg(migrateMsg, p); thisProxy[p].ReceiveMigration(m); } delete migrateMsg; } // Zero out data structures for next cycle // CkPrintf("zeroing out data\n"); statsData->clear(); stats_msg_count=0; #endif }
main(int argc, char **argv){ int my_id; /* process id */ int p; /* number of processes */ char* message; /* storage for the message */ int i, j, k, msg_size; MPI_Status status; /* return status for receive */ float elapsed_time_msec; float bandwidth; char *sndbuf, *recvbuf; int memory_before, memory_after; int memory_diff, local_memory_max; int memory_min_small, memory_max_small, memory_min_medium, memory_max_medium, memory_min_normal, memory_max_normal, memory_min_large, memory_max_large; MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &my_id ); MPI_Comm_size( MPI_COMM_WORLD, &p ); if (argc < 2) { fprintf (stderr, "need msg size as params\n"); goto EXIT; } if(sscanf (argv[1], "%d", &msg_size) < 1){ fprintf (stderr, "need msg size as params\n"); goto EXIT; } message = (char*)malloc (msg_size); if(argc>2) sscanf (argv[2], "%d", &max_msgs); /* don't start timer until everybody is ok */ MPI_Barrier(MPI_COMM_WORLD); if( my_id == 0 ){ int flag=0; } sndbuf = (char *)malloc(msg_size * sizeof(char) * p); recvbuf = (char *)malloc(msg_size * sizeof(char) * p); for(j=0;j<p;j++) memset(sndbuf+j*msg_size,hash(my_id,j),msg_size); memset(recvbuf,0,msg_size*p); if(my_id == 0){ Create_Timers (1); } // Test Long if(1){ // warm up, not instrumented for(i=0; i<max_msgs; i++) { MPI_Alltoall_long(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD); } memset(recvbuf,0,msg_size*p); MPI_Barrier(MPI_COMM_WORLD); CmiResetMaxMemory(); memory_before = CmiMemoryUsage(); // initial memory usage MPI_Barrier(MPI_COMM_WORLD); if(my_id == 0){ Start_Timer (0, ITIMER_REAL); } for(i=0; i<max_msgs; i++) { MPI_Alltoall_long(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); memory_after = CmiMemoryUsage(); if (CmiMaxMemoryUsage() < memory_before) local_memory_max = 0; else local_memory_max = CmiMaxMemoryUsage() - memory_before; // Reduce MAX here assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_max_large, 1, MPI_UNSIGNED_LONG, MPI_MAX, 0, MPI_COMM_WORLD)); assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_min_large, 1, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD)); if(my_id==0)printf("Large Mem Max Usage=%8d Kb\tMin Usage=%8d Kb\tVP=%d\tMsgSize=%d\n", (memory_max_large) / 1024, (memory_min_large) / 1024, p, msg_size); for(j=0;j<p;j++) for(k=0;k<msg_size;k++) assert(*(recvbuf+j*msg_size+k) == hash(j,my_id) ); } // Test Short #if 0 { // warm up, not instrumented for(i=0; i<max_msgs; i++) { MPI_Alltoall_short(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD); } memset(recvbuf,0,msg_size*p); MPI_Barrier(MPI_COMM_WORLD); CmiResetMaxMemory(); memory_before = CmiMemoryUsage(); // initial memory usage MPI_Barrier(MPI_COMM_WORLD); if(my_id == 0){ Start_Timer (0, ITIMER_REAL); } for(i=0; i<max_msgs; i++) { MPI_Alltoall_short(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); memory_after = CmiMemoryUsage(); if (CmiMaxMemoryUsage() < memory_before) local_memory_max = 0; else local_memory_max = CmiMaxMemoryUsage() - memory_before; // Reduce MAX here assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_max_small, 1, MPI_UNSIGNED_LONG, MPI_MAX, 0, MPI_COMM_WORLD)); assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_min_small, 1, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD)); if(my_id==0)printf("Small Mem Max Usage=%8d Kb\tMin Usage=%8d Kb\tVP=%d\tMsgSize=%d\n", (memory_max_small) / 1024, (memory_min_small) / 1024, p, msg_size); for(j=0;j<p;j++) for(k=0;k<msg_size;k++) assert(*(recvbuf+j*msg_size+k) == hash(j,my_id) ); } #endif // Test Medium if(1){ // warm up, not instrumented for(i=0; i<max_msgs; i++) { MPI_Alltoall_medium(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD); } memset(recvbuf,0,msg_size*p); MPI_Barrier(MPI_COMM_WORLD); CmiResetMaxMemory(); memory_before = CmiMemoryUsage(); // initial memory usage MPI_Barrier(MPI_COMM_WORLD); if(my_id == 0){ Start_Timer (0, ITIMER_REAL); } for(i=0; i<max_msgs; i++) { MPI_Alltoall_medium(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); memory_after = CmiMemoryUsage(); if (CmiMaxMemoryUsage() < memory_before) local_memory_max = 0; else local_memory_max = CmiMaxMemoryUsage() - memory_before; // Reduce MAX here assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_max_medium, 1, MPI_UNSIGNED_LONG, MPI_MAX, 0, MPI_COMM_WORLD)); assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_min_medium, 1, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD)); if(my_id==0) printf("Med Mem Max Usage=%8d Kb\tMin Usage=%8d Kb\tVP=%d\tMsgSize=%d\n", (memory_max_medium) / 1024, (memory_min_medium) / 1024, p, msg_size); for(j=0;j<p;j++) for(k=0;k<msg_size;k++) assert(*(recvbuf+j*msg_size+k) == hash(j,my_id) ); } // Test standard version { // warm up, not instrumented for(i=0; i<max_msgs; i++) { MPI_Alltoall(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD); } memset(recvbuf,0,msg_size*p); MPI_Barrier(MPI_COMM_WORLD); CmiResetMaxMemory(); memory_before = CmiMemoryUsage(); // initial memory usage MPI_Barrier(MPI_COMM_WORLD); if(my_id == 0){ Start_Timer (0, ITIMER_REAL); } for(i=0; i<max_msgs; i++) { MPI_Alltoall(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); memory_after = CmiMemoryUsage(); if (CmiMaxMemoryUsage() < memory_before) local_memory_max = 0; else local_memory_max = CmiMaxMemoryUsage() - memory_before; // Reduce MAX here assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_max_normal, 1, MPI_UNSIGNED_LONG, MPI_MAX, 0, MPI_COMM_WORLD)); assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_min_normal, 1, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD)); if(my_id==0) printf("Norm Mem Max Usage=%8d Kb\tMin Usage=%8d Kb\tVP=%d\tMsgSize=%d\n", (memory_max_normal) / 1024, (memory_min_normal) / 1024, p, msg_size); for(j=0;j<p;j++) for(k=0;k<msg_size;k++) assert(*(recvbuf+j*msg_size+k) == hash(j,my_id) ); } if(my_id==0) printf("\n"); free(sndbuf); free(recvbuf); EXIT: MPI_Finalize(); }
void mpi_info_memory(){ CkPrintf("Memory %ld\n", CmiMemoryUsage()); }