Ejemplos de CmiMemoryUsage en C++ (Cpp)

Ejemplo n.º 1

0

Mostrar archivo

Archivo: memory.c Proyecto: quinoacomputing/quinoa

void CmiOutOfMemory(int nBytes)
{ /* We're out of memory: free up the liferaft memory and abort */
  char errMsg[200];
  if (memory_lifeRaft) free(memory_lifeRaft);
  if (nBytes>0) sprintf(errMsg,"Could not malloc() %d bytes--are we out of memory? (used :%.3fMB)",nBytes,CmiMemoryUsage()/1000000.0);
  else sprintf(errMsg,"Could not malloc()--are we out of memory? (used: %.3fMB)", CmiMemoryUsage()/1000000.0);
  CmiAbort(errMsg);
}

Ejemplo n.º 2

0

Mostrar archivo

Archivo: memusage.C Proyecto: aar2163/NAMD-energy

unsigned long memusage(const char **source) {

  unsigned long memtotal = 0;
  const char* s = "ERROR";

  if ( ! CmiMemoryIs(CMI_MEMORY_IS_OS) ) {
    memtotal = CmiMemoryUsage();  s = "CmiMemoryUsage";
  }

#if CMK_BLUEGENEQ
  if( ! memtotal) { memtotal = memusage_bgq(); s="Kernel_GetMemorySize on BG/Q"; }
#endif

#if CMK_BLUEGENEP
  if( ! memtotal) { memtotal = memusage_bgp(); s="mallinfo on BG/P"; }
#endif

#if defined(WIN32) && !defined(__CYGWIN__)
  if ( ! memtotal ) {
    memtotal = CmiMemoryUsage();  s = "GetProcessMemoryInfo";
  }
#endif

  if ( ! memtotal ) {
    memtotal = memusage_proc_self_stat();  s = "/proc/self/stat";
  }

  if ( ! memtotal ) { memtotal = memusage_mstats(); s = "mstats"; }

  if ( ! memtotal ) { memtotal = memusage_mallinfo(); s = "mallinfo"; }

  if ( ! memtotal ) { memtotal = memusageinit::memusage_sbrk(); s = "sbrk"; }

  if ( ! memtotal ) { memtotal = memusage_ps(); s = "ps"; }

  if ( ! memtotal ) { memtotal = CmiMemoryUsage();  s = "CmiMemoryUsage"; }

  if ( ! memtotal ) s = "nothing";

  if ( source ) *source = s;

  return memtotal;

}

Ejemplo n.º 3

0

Mostrar archivo

Archivo: parallel_part.C Proyecto: davidheryanto/sc14

int FEM_Mesh_Parallel_broadcast(int fem_mesh,int masterRank,FEM_Comm_t comm_context){
  int myRank;
  MPI_Comm_rank((MPI_Comm)comm_context,&myRank);
  printf("[%d] FEM_Mesh_Parallel_broadcast called for mesh %d\n",myRank,fem_mesh);
  int new_mesh;
  if(myRank == masterRank){
    //I am the master, i have the element connectivity data and need
    //to send it to everybody
    printf("[%d] Memory usage on vp 0 at the begining of partition %d \n",CkMyPe(),CmiMemoryUsage());
    new_mesh=FEM_master_parallel_part(fem_mesh,masterRank,comm_context);
		
  }else{
    new_mesh=FEM_slave_parallel_part(fem_mesh,masterRank,comm_context);
  }
  //temp to keep stuff from falling apart
  MPI_Barrier((MPI_Comm)comm_context);
  printf("[%d] Partitioned mesh number %d \n",myRank,new_mesh);
  return new_mesh;
}

Ejemplo n.º 4

0

Mostrar archivo

Archivo: queueing.c Proyecto: davidheryanto/sc14

void CqsDequeue(Queue q, void **resp)
{
#ifdef ADAPT_SCHED_MEM
    /* Added by Isaac for testing purposes: */
    if((q->length > 1) && (CmiMemoryUsage() > schedAdaptMemThresholdMB*1024*1024) ){
	/* CqsIncreasePriorityForEntryMethod(q, 153); */
	CqsIncreasePriorityForMemCriticalEntries(q); 
    }
#endif
    
  if (q->length==0) 
    { *resp = 0; return; }
  if (q->negprioq.heapnext>1)
    { *resp = CqsPrioqDequeue(&(q->negprioq)); q->length--; return; }
  if (q->zeroprio.head != q->zeroprio.tail)
    { *resp = CqsDeqDequeue(&(q->zeroprio)); q->length--; return; }
  if (q->posprioq.heapnext>1)
    { *resp = CqsPrioqDequeue(&(q->posprioq)); q->length--; return; }
  *resp = 0; return;
}

Ejemplo n.º 5

0

Mostrar archivo

Archivo: parallel_part.C Proyecto: davidheryanto/sc14

int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context){
  const char *caller="FEM_Create_connmsa"; 
  FEMAPI(caller);
  FEM_chunk *c=FEM_chunk::get(caller);
  FEM_Mesh *m=c->lookup(fem_mesh,caller);
  m->setAbsoluteGlobalno();
  int nelem = m->nElems();
  int numChunks;
  MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
  printf("master -> number of elements %d \n",nelem);
  DEBUG(m->print(0));


  /*load the connectivity information into the eptr and
    eind datastructure. It will be read by the other slave 
    elements and used to call parmetis*/
  MSA1DINT eptrMSA(nelem,numChunks);
  MSA1DINT eindMSA(nelem*10,numChunks);
  /*
    after the msa array has been created and loaded with connectivity data
    tell the slaves about the msa array 
  */
  struct conndata data;
  data.nelem = nelem;
  data.nnode = m->node.size();
  data.arr1 = eptrMSA;
  data.arr2 = eindMSA;
  MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context);

  eptrMSA.enroll(numChunks);
  eindMSA.enroll(numChunks);
  MSA1DINT::Write wPtr = eptrMSA.getInitialWrite();
  MSA1DINT::Write wInd = eindMSA.getInitialWrite();
  int indcount=0,ptrcount=0;
  for(int t=0;t<m->elem.size();t++){
    if(m->elem.has(t)){
      FEM_Elem &k=m->elem[t];
      for(int e=0;e<k.size();e++){
				wPtr.set(ptrcount)=indcount;
				ptrcount++;
				for(int n=0;n<k.getNodesPer();n++){
				  wInd.set(indcount)=k.getConn(e,n);
				  indcount++;
				}
      }
    }
  }
  wPtr.set(ptrcount) = indcount;
  printf("master -> ptrcount %d indcount %d sizeof(MSA1DINT) %d sizeof(MSA1DINTLIST) %d memory %d\n",ptrcount,indcount,sizeof(MSA1DINT),sizeof(MSA1DINTLIST),CmiMemoryUsage());
  /*
    break up the mesh such that each chunk gets the same number of elements
    and the nodes corresponding to those elements. However this is not the partition.
    This is just distributing the data, so that when partition is done using parmetis
    all the requests for data do not go to chunk 0. Instead after partition each chunk
    can send the element and node data to the chunks that will need it
  */
  FEM_Mesh *mesh_array=FEM_break_mesh(m,ptrcount,numChunks);
  /*
    Send the broken up meshes to the different chunks. 
  */
  sendBrokenMeshes(mesh_array,comm_context);
  delete [] mesh_array;
  FEM_Mesh mypiece;
  MPI_Recv_pup(mypiece,masterRank,MESH_CHUNK_TAG,(MPI_Comm)comm_context);
	
  /*
    call parmetis
  */
  double  parStartTime = CkWallTimer();
  MSA1DINT::Read rPtr = wPtr.syncToRead();
  MSA1DINT::Read rInd = wInd.syncToRead();
  printf("starting FEM_call_parmetis \n");
  struct partconndata *partdata = FEM_call_parmetis(data.nelem, rPtr, rInd, comm_context);

  printf("done with parmetis %d FEM_Mesh %d in %.6lf \n",CmiMemoryUsage(),sizeof(FEM_Mesh),CkWallTimer()-parStartTime);
	
	double dataArrangeStartTime = CkWallTimer();
  /*
    Set up a msa to store the partitions to which a node belongs.
    A node can belong to multiple partitions.
  */
  int totalNodes = m->node.size();
  MSA1DINTLIST nodepart(totalNodes,numChunks);
  MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context);
  nodepart.enroll(numChunks);
  MSA1DINTLIST::Accum nodepartAcc = nodepart.getInitialAccum();
	
  FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context);
  printf("Creating mapping of node to partition took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
  dataArrangeStartTime = CkWallTimer();
  MSA1DINTLIST::Read nodepartRead = nodepartAcc.syncToRead();
	
  /*
    Set up a msa to store the nodes that belong to a partition
  */
  MSA1DNODELIST part2node(numChunks,numChunks);
  MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context);
  part2node.enroll(numChunks);
  MSA1DNODELIST::Accum part2nodeAcc = part2node.getInitialAccum();

  FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context);

	
  /*
    Get the list of elements and nodes that belong to this partition
  */
  MSA1DNODELIST::Read rPart2node = part2nodeAcc.syncToRead();
  NodeList lnodes = rPart2node.get(masterRank);
  lnodes.uniquify();
//  IntList lelems = part2elem.get(masterRank);
	

	printf("Creating mapping of  partition to node took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
  printf("Time spent doing +=ElemList %.6lf \n",elemlistaccTime);
	dataArrangeStartTime = CkWallTimer();

  /*
    Build an MSA of FEM_Mesh, with each index containing the mesh for that  chunk
  */
  MSA1DFEMMESH part2mesh(numChunks,numChunks);
  MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context);
  part2mesh.enroll(numChunks);
  MSA1DFEMMESH::Accum aPart2mesh = part2mesh.getInitialAccum();

  FEM_write_part2mesh(aPart2mesh,partdata, &data,nodepartRead,numChunks,masterRank,&mypiece);
  /*
    Get your mesh consisting of elements and nodes out of the mesh MSA
  */
  MSA1DFEMMESH::Read rPart2mesh = aPart2mesh.syncToRead();
  MeshElem me = rPart2mesh.get(masterRank);
  //printf("[%d] Number of elements in my partitioned mesh %d number of nodes %d \n",masterRank,me.m->nElems(),me.m->node.size());
	
  DEBUG(printf("[%d] Memory usage on vp 0 close to max %d \n",CkMyPe(),CmiMemoryUsage()));
	//Free up the eptr and eind MSA arrays stored in data
  delete &rPtr;
  delete &rInd;
  data.arr1.FreeMem();
  data.arr2.FreeMem();
  nodepart.FreeMem();
  DEBUG(printf("[%d] Memory usage on vp 0 after FreeMem %d \n",CkMyPe(),CmiMemoryUsage()));
	
  addIDXLists(me.m,lnodes,masterRank);
	
	part2node.FreeMem();
  DEBUG(printf("[%d] Memory usage on vp 0 after addIDXL %d \n",CkMyPe(),CmiMemoryUsage()));
	
  /*
    Broadcast  the user data to all the meshes
  */
  DEBUG(printf("[%d] Length of udata vector in master %d \n",masterRank,m->udata.size()));
  MPI_Bcast_pup(m->udata,masterRank,(MPI_Comm)comm_context);
  me.m->udata = m->udata;
	
	
  delete partdata;
  
	printf("[%d] Data Arrangement took %.6lf \n",masterRank,CkWallTimer()-dataArrangeStartTime);
	
	/*
    collect the ghost data and send it to all the chunks.
  */
  struct ghostdata *gdata = gatherGhosts();
  DEBUG(printf("[%d] number of ghost layers %d \n",masterRank,gdata->numLayers));
  MPI_Bcast_pup(*gdata,masterRank,(MPI_Comm)comm_context);

  /*
    make ghosts for this mesh
  */
  printf("[%d] Starting to generate number of ghost layers %d \n",masterRank,gdata->numLayers);
	double _startTime = CkWallTimer();
  makeGhosts(me.m,(MPI_Comm)comm_context,masterRank,gdata->numLayers,gdata->layers);
  delete gdata;
	
	printf("[%d] Ghost generation took %.6lf \n",masterRank,CkWallTimer()-_startTime);
	
  me.m->becomeGetting();
  FEM_chunk *chunk = FEM_chunk::get("FEM_Mesh_Parallel_broadcast");
  int tempMeshNo = chunk->meshes.put(me.m);
  int new_mesh = FEM_Mesh_copy(tempMeshNo);
	
  FEM_Mesh *nmesh = c->lookup(new_mesh,"master_parallel_broadcast");
  DEBUG(printf("[%d] Length of udata vector in master new_mesh %d \n",masterRank,nmesh->udata.size()));
	
	part2mesh.FreeMem();
  printf("[%d] Max Memory usage on vp 0 at end of parallel partition %d \n",CkMyPe(),CmiMaxMemoryUsage());
		
  return new_mesh;
}

Ejemplo n.º 6

0

Mostrar archivo

Archivo: CentralLB.C Proyecto: luyukunphy/namd

void CentralLB::LoadBalance()
{
#if CMK_LBDB_ON
  int proc;
  const int clients = CkNumPes();

#if ! USE_REDUCTION
  // build data
  buildStats();
#else
  for (proc = 0; proc < clients; proc++) statsMsgsList[proc] = NULL;
#endif

  theLbdb->ResetAdaptive();
  if (!_lb_args.samePeSpeed()) statsData->normalize_speed();

  if (_lb_args.debug()) 
      CmiPrintf("\nCharmLB> %s: PE [%d] step %d starting at %f Memory: %f MB\n",
		  lbname, cur_ld_balancer, step(), start_lb_time,
		  CmiMemoryUsage()/(1024.0*1024.0));

  // if we are in simulation mode read data
  if (LBSimulation::doSimulation) simulationRead();

  char *availVector = LBDatabaseObj()->availVector();
  for(proc = 0; proc < clients; proc++)
      statsData->procs[proc].available = (CmiBool)availVector[proc];

  preprocess(statsData);

//    CkPrintf("Before Calling Strategy\n");

  if (_lb_args.printSummary()) {
      LBInfo info(clients);
        // not take comm data
      info.getInfo(statsData, clients, 0);
      LBRealType mLoad, mCpuLoad, totalLoad;
      info.getSummary(mLoad, mCpuLoad, totalLoad);
      int nmsgs, nbytes;
      statsData->computeNonlocalComm(nmsgs, nbytes);
      CkPrintf("[%d] Load Summary (before LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024);
//      if (_lb_args.debug() > 1) {
//        for (int i=0; i<statsData->n_objs; i++)
//          CmiPrintf("[%d] %.10f %.10f\n", i, statsData->objData[i].minWall, statsData->objData[i].maxWall);
//      }
  }

#if CMK_REPLAYSYSTEM
  LDHandle *loadBalancer_pointers;
  if (_replaySystem) {
    loadBalancer_pointers = (LDHandle*)malloc(CkNumPes()*sizeof(LDHandle));
    for (int i=0; i<statsData->n_objs; ++i) loadBalancer_pointers[statsData->from_proc[i]] = statsData->objData[i].handle.omhandle.ldb;
  }
#endif
  
  LBMigrateMsg* migrateMsg = Strategy(statsData);
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	migrateMsg->step = step();
#endif

#if CMK_REPLAYSYSTEM
  CpdHandleLBMessage(&migrateMsg);
  if (_replaySystem) {
    for (int i=0; i<migrateMsg->n_moves; ++i) migrateMsg->moves[i].obj.omhandle.ldb = loadBalancer_pointers[migrateMsg->moves[i].from_pe];
    free(loadBalancer_pointers);
  }
#endif
  
  LBDatabaseObj()->get_avail_vector(migrateMsg->avail_vector);
  migrateMsg->next_lb = LBDatabaseObj()->new_lbbalancer();

  // if this is the step at which we need to dump the database
  simulationWrite();

//  calculate predicted load
//  very time consuming though, so only happen when debugging is on
  if (_lb_args.printSummary()) {
      LBInfo info(clients);
        // not take comm data
      getPredictedLoadWithMsg(statsData, clients, migrateMsg, info, 0);
      LBRealType mLoad, mCpuLoad, totalLoad;
      info.getSummary(mLoad, mCpuLoad, totalLoad);
      int nmsgs, nbytes;
      statsData->computeNonlocalComm(nmsgs, nbytes);
      CkPrintf("[%d] Load Summary (after LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB useMem: %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024, (1.0*useMem())/1024);
      for (int i=0; i<clients; i++)
        migrateMsg->expectedLoad[i] = info.peLoads[i];
  }

  DEBUGF(("[%d]calling recv migration\n",CkMyPe()));
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) 
    lbDecisionCount++;
    migrateMsg->lbDecisionCount = lbDecisionCount;
#endif

  envelope *env = UsrToEnv(migrateMsg);
  if (1) {
      // broadcast
    thisProxy.ReceiveMigration(migrateMsg);
  }
  else {
    // split the migration for each processor
    for (int p=0; p<CkNumPes(); p++) {
      LBMigrateMsg *m = extractMigrateMsg(migrateMsg, p);
      thisProxy[p].ReceiveMigration(m);
    }
    delete migrateMsg;
  }

  // Zero out data structures for next cycle
  // CkPrintf("zeroing out data\n");
  statsData->clear();
  stats_msg_count=0;
#endif
}

Ejemplo n.º 7

0

Mostrar archivo

Archivo: alltoall_VPtest.c Proyecto: gitter-badger/quinoa

main(int argc, char **argv){
  int my_id;		/* process id */
  int p;		/* number of processes */
  char* message;	/* storage for the message */
  int i, j, k, msg_size;
  MPI_Status status;	/* return status for receive */
  float elapsed_time_msec;
  float bandwidth;
  char *sndbuf, *recvbuf;
  int memory_before, memory_after;
  int memory_diff, local_memory_max;
  int memory_min_small, memory_max_small, memory_min_medium, memory_max_medium, memory_min_normal, memory_max_normal, memory_min_large, memory_max_large;
  
  MPI_Init( &argc, &argv );
  MPI_Comm_rank( MPI_COMM_WORLD, &my_id );
  MPI_Comm_size( MPI_COMM_WORLD, &p );
  
  if (argc < 2) {
    fprintf (stderr, "need msg size as params\n");
    goto EXIT;
  }
  
  if(sscanf (argv[1], "%d", &msg_size) < 1){
    fprintf (stderr, "need msg size as params\n");
    goto EXIT;
  }
  message = (char*)malloc (msg_size);

  if(argc>2) 
    sscanf (argv[2], "%d", &max_msgs);


  /* don't start timer until everybody is ok */
  MPI_Barrier(MPI_COMM_WORLD); 
  
  if( my_id == 0 ){
    int flag=0;
  }    
  sndbuf = (char *)malloc(msg_size * sizeof(char) * p);
  recvbuf = (char *)malloc(msg_size * sizeof(char) * p);

  for(j=0;j<p;j++)
	memset(sndbuf+j*msg_size,hash(my_id,j),msg_size);
  memset(recvbuf,0,msg_size*p);
  
  
  
  if(my_id == 0){
	Create_Timers (1);
  }

  // Test Long
  if(1){
	// warm up, not instrumented
	for(i=0; i<max_msgs; i++) {
	  MPI_Alltoall_long(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD);
	}

	memset(recvbuf,0,msg_size*p);
	MPI_Barrier(MPI_COMM_WORLD); 
	CmiResetMaxMemory();
	memory_before = CmiMemoryUsage();  // initial memory usage
	MPI_Barrier(MPI_COMM_WORLD); 

	if(my_id == 0){
	  Start_Timer (0, ITIMER_REAL); 
	}
	for(i=0; i<max_msgs; i++) {
	  MPI_Alltoall_long(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD);
	}
	MPI_Barrier(MPI_COMM_WORLD);
	memory_after = CmiMemoryUsage();

	if (CmiMaxMemoryUsage() < memory_before)  
	  local_memory_max = 0;
	else
	  local_memory_max = CmiMaxMemoryUsage() - memory_before;

	// Reduce MAX here
	assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_max_large, 1, MPI_UNSIGNED_LONG, MPI_MAX, 0, MPI_COMM_WORLD));
	assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_min_large, 1, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD));

	if(my_id==0)printf("Large Mem Max Usage=%8d Kb\tMin Usage=%8d Kb\tVP=%d\tMsgSize=%d\n", (memory_max_large) / 1024, (memory_min_large) / 1024, p, msg_size);

 	for(j=0;j<p;j++)
	  for(k=0;k<msg_size;k++)
		assert(*(recvbuf+j*msg_size+k) == hash(j,my_id) );

  }


  // Test Short
#if 0
  {
	// warm up, not instrumented
	for(i=0; i<max_msgs; i++) {
	  MPI_Alltoall_short(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD);
	}

	memset(recvbuf,0,msg_size*p);
	MPI_Barrier(MPI_COMM_WORLD); 
	CmiResetMaxMemory();
	memory_before = CmiMemoryUsage();  // initial memory usage
	MPI_Barrier(MPI_COMM_WORLD); 

	if(my_id == 0){
	  Start_Timer (0, ITIMER_REAL); 
	}
	for(i=0; i<max_msgs; i++) {
	  MPI_Alltoall_short(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD);
	}
	MPI_Barrier(MPI_COMM_WORLD);
	memory_after = CmiMemoryUsage();

	if (CmiMaxMemoryUsage() < memory_before)  
	  local_memory_max = 0;
	else
	  local_memory_max = CmiMaxMemoryUsage() - memory_before;

	// Reduce MAX here
	assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_max_small, 1, MPI_UNSIGNED_LONG, MPI_MAX, 0, MPI_COMM_WORLD));
	assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_min_small, 1, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD));

	if(my_id==0)printf("Small Mem Max Usage=%8d Kb\tMin Usage=%8d Kb\tVP=%d\tMsgSize=%d\n", (memory_max_small) / 1024, (memory_min_small) / 1024, p, msg_size);

	for(j=0;j<p;j++)
	  for(k=0;k<msg_size;k++)
		assert(*(recvbuf+j*msg_size+k) == hash(j,my_id) );
  }
#endif

  // Test Medium
  if(1){
	// warm up, not instrumented
	for(i=0; i<max_msgs; i++) {
	  MPI_Alltoall_medium(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD);
	}

	memset(recvbuf,0,msg_size*p);
	MPI_Barrier(MPI_COMM_WORLD); 
	CmiResetMaxMemory();
	memory_before = CmiMemoryUsage();  // initial memory usage
	MPI_Barrier(MPI_COMM_WORLD); 

	if(my_id == 0){
	  Start_Timer (0, ITIMER_REAL); 
	}
	for(i=0; i<max_msgs; i++) {
	  MPI_Alltoall_medium(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD);
	}
	MPI_Barrier(MPI_COMM_WORLD);
	memory_after = CmiMemoryUsage();

	if (CmiMaxMemoryUsage() < memory_before)  
	  local_memory_max = 0;
	else
	  local_memory_max = CmiMaxMemoryUsage() - memory_before;

	// Reduce MAX here
	assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_max_medium, 1, MPI_UNSIGNED_LONG, MPI_MAX, 0, MPI_COMM_WORLD));
	assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_min_medium, 1, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD));

	if(my_id==0)	printf("Med   Mem Max Usage=%8d Kb\tMin Usage=%8d Kb\tVP=%d\tMsgSize=%d\n", (memory_max_medium) / 1024, (memory_min_medium) / 1024, p, msg_size);

	for(j=0;j<p;j++)
	  for(k=0;k<msg_size;k++)
		assert(*(recvbuf+j*msg_size+k) == hash(j,my_id) );
  }

  // Test standard version
  {
	// warm up, not instrumented
	for(i=0; i<max_msgs; i++) {
	  MPI_Alltoall(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD);
	}
	
	memset(recvbuf,0,msg_size*p);
	MPI_Barrier(MPI_COMM_WORLD); 
	CmiResetMaxMemory();
	memory_before = CmiMemoryUsage();  // initial memory usage
	MPI_Barrier(MPI_COMM_WORLD); 

	if(my_id == 0){
	  Start_Timer (0, ITIMER_REAL); 
	}
	for(i=0; i<max_msgs; i++) {
	  MPI_Alltoall(sndbuf, msg_size, MPI_CHAR, recvbuf, msg_size, MPI_CHAR, MPI_COMM_WORLD);
	}
	MPI_Barrier(MPI_COMM_WORLD);
	memory_after = CmiMemoryUsage();

	if (CmiMaxMemoryUsage() < memory_before)  
	  local_memory_max = 0;
	else
	  local_memory_max = CmiMaxMemoryUsage() - memory_before;

	// Reduce MAX here
	assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_max_normal, 1, MPI_UNSIGNED_LONG, MPI_MAX, 0, MPI_COMM_WORLD));
	assert(MPI_SUCCESS==MPI_Reduce(&local_memory_max, &memory_min_normal, 1, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD));

	
	if(my_id==0)  printf("Norm  Mem Max Usage=%8d Kb\tMin Usage=%8d Kb\tVP=%d\tMsgSize=%d\n", (memory_max_normal) / 1024, (memory_min_normal) / 1024, p, msg_size);

	for(j=0;j<p;j++)
	  for(k=0;k<msg_size;k++)
		assert(*(recvbuf+j*msg_size+k) == hash(j,my_id) );
  }

  if(my_id==0) printf("\n");


  free(sndbuf);
  free(recvbuf);
  
 EXIT:
  MPI_Finalize();
}

Ejemplo n.º 8

0

Mostrar archivo

Archivo: ampif.C Proyecto: gitter-badger/quinoa

void mpi_info_memory(){
  CkPrintf("Memory %ld\n", CmiMemoryUsage());
}