示例#1
0
void vt_check_request(uint64_t* time, struct VTRequest* req, MPI_Status *status)
{
  if (!req ||
      ((req->flags & ERF_IS_PERSISTENT) && !(req->flags & ERF_IS_ACTIVE)))
    return;

  /* if receive request, write receive trace record */
  if ((req->flags & ERF_RECV) &&
      (status->MPI_SOURCE != MPI_PROC_NULL) && 
      (status->MPI_SOURCE != MPI_ANY_SOURCE))
  {
    int count, sz;
    PMPI_Type_size(req->datatype, &sz);
    PMPI_Get_count(status, req->datatype, &count);
    vt_mpi_recv(time, VT_RANK_TO_PE(status->MPI_SOURCE, req->comm),
		VT_COMM_ID(req->comm), status->MPI_TAG, count * sz);
  }
      
  if (req->flags & ERF_IS_PERSISTENT)
    {
      /* if persistent request, set to inactive,
         and, if requested delete request */
      req->flags &= ~ERF_IS_ACTIVE;
      if (req->flags & ERF_DEALLOCATE) vt_request_free(req);
    }
  else
    {
      /* if non-persistent request, delete always request */
      vt_request_free(req);
    }
}
示例#2
0
void vt_check_request(uint64_t* time, struct VTRequest* req, MPI_Status *status,
		uint8_t record_event)
{
  if (!req ||
      ((req->flags & ERF_IS_PERSISTENT) && !(req->flags & ERF_IS_ACTIVE)))
    return;

  /* if receive request, write receive trace record */
  if (record_event &&
      (req->flags & ERF_RECV) &&
      (status->MPI_SOURCE != MPI_PROC_NULL) && 
      (status->MPI_SOURCE != MPI_ANY_SOURCE))
  {
    VT_MPI_INT count, sz;
    PMPI_Type_size(req->datatype, &sz);
    PMPI_Get_count(status, req->datatype, &count);
    vt_mpi_recv(VT_CURRENT_THREAD, time,
                VT_RANK_TO_PE(status->MPI_SOURCE, req->comm),
                VT_COMM_ID(req->comm), status->MPI_TAG, count * sz);
  }

  if (record_event && (req->flags & ERF_IO))
  {
    VT_MPI_INT count, sz;
    PMPI_Type_size(req->datatype, &sz);
    PMPI_Get_count(status, req->datatype, &count);
    if (count == MPI_UNDEFINED)
      count = 0;
    vt_ioend(VT_CURRENT_THREAD, time, req->fileid, req->matchingid, req->handleid, req->fileop,
             (uint64_t)count*(uint64_t)sz);
  }

  if (req->flags & ERF_IS_PERSISTENT)
    {
      /* if persistent request, set to inactive,
         and, if requested delete request */
      req->flags &= ~ERF_IS_ACTIVE;
      if (req->flags & ERF_DEALLOCATE) vt_request_free(req);
    }
  else
    {
      /* if non-persistent request, delete always request */
      vt_request_free(req);
    }
}
示例#3
0
static int oshmem_mkey_recv_cb(void)
{
    MPI_Status status;
    int flag;
    int n;
    int rc;
    opal_buffer_t *msg;
    int32_t size;
    void *tmp_buf;
    oob_comm_request_t *r;

    n = 0;
    r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
    assert(r);
    while(r != (oob_comm_request_t *)opal_list_get_end(&memheap_oob.req_list)) {
        my_MPI_Test(&r->recv_req, &flag, &status);
        if (OPAL_LIKELY(0 == flag)) {
            return n;
        }
        PMPI_Get_count(&status, MPI_BYTE, &size);
        MEMHEAP_VERBOSE(5, "OOB request from PE: %d, size %d", status.MPI_SOURCE, size);
        n++;
        opal_list_remove_first(&memheap_oob.req_list);

        /* to avoid deadlock we must start request
         * before processing it. Data are copied to
         * the tmp buffer
         */
        tmp_buf = malloc(size);
        if (NULL == tmp_buf) {
            MEMHEAP_ERROR("not enough memory");
            ORTE_ERROR_LOG(0);
            return n;
        } else {
		    memcpy(tmp_buf, (void*)&r->buf, size);
		    msg = OBJ_NEW(opal_buffer_t);
		    if (NULL == msg) {
		        MEMHEAP_ERROR("not enough memory");
		        ORTE_ERROR_LOG(0);
		        free(tmp_buf);
		        return n;
		    }
		    opal_dss.load(msg, (void*)tmp_buf, size);

            /*
             * send reply before posting the receive request again to limit the recursion size to
             * number of receive requests.
             * send can call opal_progress which calls this function again. If recv req is started
             * stack size will be proportional to number of job ranks.
             */
            do_recv(status.MPI_SOURCE, msg);
            OBJ_RELEASE(msg);
        }

        rc = PMPI_Start(&r->recv_req);
        if (MPI_SUCCESS != rc) {
            MEMHEAP_ERROR("Failed to post recv request %d", rc);
            ORTE_ERROR_LOG(rc);
            return n;
        }
        opal_list_append(&memheap_oob.req_list, &r->super);


        r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
        assert(r);
    }

    return 1;
}
示例#4
0
void ipm_unify_callsite_ids()
{
  /* 1. build the local cstable 
     2. send it to root where it is merged and unified
     3. root sends out mapping table
     4. apply maps to callgraph 
  */
  cstable_t local, remote, merged;
  MPI_Status status;
  int max_ncs, max_id, gmax_id;
  int *idmap;
  int match, found;
  int i, j;

#if 0

  callgraph_to_cstable(ipm_callgraph, &local);
  max_id = callgraph_maxid(ipm_callgraph)+1;
  //print_cstable(stderr, &local);    
 
  IPM_REDUCE( &local.ncs, &max_ncs, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
  IPM_ALLREDUCE( &max_id, &gmax_id, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
  idmap = (int*)IPM_CALLOC(gmax_id, sizeof(int));

  if( task.taskid==0 ) {
    FILE *mapfile;
    char fname[MAXSIZE_FILENAME];
    
    sprintf(fname, "%s.map.txt", task.fname);
    mapfile=fopen(fname, "w");

    init_cstable(&merged, max_ncs);
    init_cstable(&remote, max_ncs);
    /* copy local into merged */
    merge_cstables(&merged, &local, idmap);
    
    for( i=1; i<task.ntasks; i++ )
      {
	clear_cstable(&remote);
	
	IPM_RECV(remote.csids, merged.ncs, MPI_INT, i, 0, MPI_COMM_WORLD, &status); 
	PMPI_Get_count( &status, MPI_INT, &(remote.ncs) );
	IPM_RECV(remote.cstable, remote.ncs*MAXSIZE_CALLSTACKDEPTH*sizeof(void*), 
		  MPI_BYTE, i, 0, MPI_COMM_WORLD, &status );
	
	/* clear ids, merge remote and send map back  */
	for( j=0; j<gmax_id; j++ ) idmap[j]=0;
	merge_cstables(&merged, &remote, idmap);
	IPM_SEND(idmap, gmax_id, MPI_INT, i, 0, MPI_COMM_WORLD);

	
	for( j=0; j<gmax_id; j++ ) {
	  if( idmap[j] && idmap[j]!=j ) {
	    fprintf(mapfile, "rank %d: %u -> %u\n", i, j, idmap[j]);
	  }
	}
      }
    
    fclose(mapfile);
  } 
  else 
    {
      IPM_SEND(local.csids, local.ncs, MPI_INT, 0, 0, MPI_COMM_WORLD );
      IPM_SEND(local.cstable, local.ncs*MAXSIZE_CALLSTACKDEPTH*sizeof(void*),
		MPI_BYTE, 0, 0, MPI_COMM_WORLD);

      for( j=0; j<gmax_id; j++ ) idmap[j]=0;      
      IPM_RECV(idmap, gmax_id, MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
      remap_callsites( ipm_htable, idmap, gmax_id);
    }
#endif
}
示例#5
0
int
mpiPi_mergeResults ()
{
  int ac;
  callsite_stats_t **av;
  int totalCount = 0;
  int maxCount = 0;
  int retval = 1, sendval;

  /* gather local task data */
  h_gather_data (mpiPi.task_callsite_stats, &ac, (void ***) &av);

  /* determine size of space necessary on collector */
  PMPI_Allreduce (&ac, &totalCount, 1, MPI_INT, MPI_SUM, mpiPi.comm);
  PMPI_Reduce (&ac, &maxCount, 1, MPI_INT, MPI_MAX, mpiPi.collectorRank,
	       mpiPi.comm);

  if (totalCount < 1)
    {
      mpiPi_msg_warn
	("Collector found no records to merge. Omitting report.\n");
      return 0;
    }

  /* gather global data at collector */
  if (mpiPi.rank == mpiPi.collectorRank)
    {
      int i;
      int ndx = 0;

#ifdef ENABLE_BFD
      if (mpiPi.appFullName != NULL)
	{
	  if (open_bfd_executable (mpiPi.appFullName) == 0)
	    mpiPi.do_lookup = 0;
	}
#elif defined(USE_LIBDWARF)
      if (mpiPi.appFullName != NULL)
	{
	  if (open_dwarf_executable (mpiPi.appFullName) == 0)
	    mpiPi.do_lookup = 0;
	}
#endif
#if defined(ENABLE_BFD) || defined(USE_LIBDWARF)
      else
	{
	  mpiPi_msg_warn ("Failed to open executable\n");
	  mpiPi.do_lookup = 0;
	}
#endif
      /* convert data to src line; merge, if nec */
      mpiPi.global_callsite_stats = h_open (mpiPi.tableSize,
					    mpiPi_callsite_stats_src_hashkey,
					    mpiPi_callsite_stats_src_comparator);
      mpiPi.global_callsite_stats_agg = h_open (mpiPi.tableSize,
						mpiPi_callsite_stats_src_id_hashkey,
						mpiPi_callsite_stats_src_id_comparator);
      if (callsite_pc_cache == NULL)
	{
	  callsite_pc_cache = h_open (mpiPi.tableSize,
				      callsite_pc_cache_hashkey,
				      callsite_pc_cache_comparator);
	}
      if (callsite_src_id_cache == NULL)
	{
	  callsite_src_id_cache = h_open (mpiPi.tableSize,
					  callsite_src_id_cache_hashkey,
					  callsite_src_id_cache_comparator);
	}
      /* Try to allocate space for max count of callsite info from all tasks  */
      mpiPi.rawCallsiteData =
	(callsite_stats_t *) calloc (maxCount, sizeof (callsite_stats_t));
      if (mpiPi.rawCallsiteData == NULL)
	{
	  mpiPi_msg_warn
	    ("Failed to allocate memory to collect callsite info");
	  retval = 0;
	}

      /* Clear global_mpi_time and global_mpi_size before accumulation in mpiPi_insert_callsite_records */
      mpiPi.global_mpi_time = 0.0;
      mpiPi.global_mpi_size = 0.0;

      if (retval == 1)
	{
	  /* Insert collector callsite data into global and task-specific hash tables */
	  for (ndx = 0; ndx < ac; ndx++)
	    {
	      mpiPi_insert_callsite_records (av[ndx]);
	    }
	  ndx = 0;
	  for (i = 1; i < mpiPi.size; i++)	/* n-1 */
	    {
	      MPI_Status status;
	      int count;
	      int j;

	      /* okay in any order */
	      PMPI_Probe (MPI_ANY_SOURCE, mpiPi.tag, mpiPi.comm, &status);
	      PMPI_Get_count (&status, MPI_CHAR, &count);
	      PMPI_Recv (&(mpiPi.rawCallsiteData[ndx]), count, MPI_CHAR,
			 status.MPI_SOURCE, mpiPi.tag, mpiPi.comm, &status);
	      count /= sizeof (callsite_stats_t);


	      for (j = 0; j < count; j++)
		{
		  mpiPi_insert_callsite_records (&(mpiPi.rawCallsiteData[j]));
		}
	    }
	  free (mpiPi.rawCallsiteData);
	}
    }
  else
    {
      int ndx;
      char *sbuf = (char *) malloc (ac * sizeof (callsite_stats_t));
      for (ndx = 0; ndx < ac; ndx++)
	{
	  bcopy (av[ndx],
		 &(sbuf[ndx * sizeof (callsite_stats_t)]),
		 sizeof (callsite_stats_t));
	}
      PMPI_Send (sbuf, ac * sizeof (callsite_stats_t),
		 MPI_CHAR, mpiPi.collectorRank, mpiPi.tag, mpiPi.comm);
      free (sbuf);
    }
  if (mpiPi.rank == mpiPi.collectorRank && retval == 1)
    {
      if (mpiPi.collective_report == 0)
	mpiPi_msg_debug
	  ("MEMORY : Allocated for global_callsite_stats     : %13ld\n",
	   h_count (mpiPi.global_callsite_stats) * sizeof (callsite_stats_t));
      mpiPi_msg_debug
	("MEMORY : Allocated for global_callsite_stats_agg : %13ld\n",
	 h_count (mpiPi.global_callsite_stats_agg) *
	 sizeof (callsite_stats_t));
    }

  /* TODO: need to free all these pointers as well. */
  free (av);

  if (mpiPi.rank == mpiPi.collectorRank)
    {
      if (mpiPi.do_lookup == 1)
	{
#ifdef ENABLE_BFD
	  /* clean up */
	  close_bfd_executable ();
#elif defined(USE_LIBDWARF)
	  close_dwarf_executable ();
#endif
	}
    }

  /*  Quadrics MPI does not appear to support MPI_IN_PLACE   */
  sendval = retval;
  PMPI_Allreduce (&sendval, &retval, 1, MPI_INT, MPI_MIN, mpiPi.comm);
  return retval;
}
示例#6
0
文件: mpiPi.c 项目: saxena/mpip
void
mpiPi_recv_pt2pt_stats(int ac, pt2pt_stats_t** av)
{
  int i;
  int pt2pt_size = sizeof(pt2pt_stats_t);
  int nsenders = 0;

  /* Count number of senders, receiver will wait for them */
  /* i = 0 is copied locally */
  for(i = 1; i < mpiPi.size; i++)
    {
      if (mpiPi.accumulatedPt2ptCounts[i])
	nsenders++;
    }

  mpiPi_msg_debug("(%d) Waiting for %d senders\n",mpiPi.rank,nsenders);

  /* Allocate a pointer for each rank */
  mpiPi.accumulatedPt2ptData = (pt2pt_stats_t **) calloc (mpiPi.size, sizeof(pt2pt_stats_t*));
  if (mpiPi.accumulatedPt2ptData == NULL)
    {
      mpiPi_msg_warn
	("Failed to allocate memory to collect point to point info");
      assert(0);
    }

  /* Copy Data for collector rank */
  if (ac)
    {
      mpiPi.accumulatedPt2ptData[mpiPi.rank] = *av;
    }

  i = 0;
  /* Insert pt2pt data into aggregate array indexed by rank */
  while(i < nsenders)
    {
      MPI_Status status;
      int count;
      pt2pt_stats_t* ptp;
      unsigned src_rank;

      /* okay in any order */
      PMPI_Probe (MPI_ANY_SOURCE, mpiPi.tag, mpiPi.comm, &status);
      PMPI_Get_count (&status, MPI_CHAR, &count);
      src_rank = status.MPI_SOURCE;

      /* Allocate space for count number of pt2pt_stat_t structs */
      ptp = (pt2pt_stats_t*) calloc(count, pt2pt_size);

      mpiPi_msg_debug("(%d): Receiving %d bytes in pt2pt records from %d...\n",
		      mpiPi.rank, count, src_rank);

      PMPI_Recv (ptp, count, MPI_CHAR, src_rank,
		 mpiPi.tag, mpiPi.comm, &status);

      mpiPi_msg_debug("(%d): Received\n",mpiPi.rank);

      count /= pt2pt_size;

      assert(src_rank < mpiPi.size);
      assert(mpiPi.accumulatedPt2ptCounts[src_rank] == count);

      mpiPi.accumulatedPt2ptData[src_rank] = ptp;

      i++;
    }

}
示例#7
0
int MPI_Get_count(MPI_Status * status, MPI_Datatype datatype, int *count)
{
  return PMPI_Get_count(status, datatype, count);
}
示例#8
0
int MPI_Get_count(MPI_Status *status, MPI_Datatype datatype, int *count)
{
    int err;
    err=PMPI_Get_count(status, datatype, count);
    return err;
}