Beispiel #1
0
int
mpiPi_query_pc (void *pc, char **filename, char **functname, int *lineno)
{
  int rc = 0;
  callsite_pc_cache_entry_t key;
  callsite_pc_cache_entry_t *csp;
  char addr_buf[24];

  key.pc = pc;
  /* do we have a cache entry for this pc? If so, use entry */
  if (h_search (callsite_pc_cache, &key, (void **) &csp) == NULL)
    {
      /* no cache entry: create, lookup, and insert */
      csp =
	(callsite_pc_cache_entry_t *)
	malloc (sizeof (callsite_pc_cache_entry_t));
      csp->pc = pc;
#if defined(ENABLE_BFD) || defined(USE_LIBDWARF)
      if (mpiP_find_src_loc (pc, filename, lineno, functname) == 0)
	{
	  if (*filename == NULL || strcmp (*filename, "??") == 0)
	    *filename = "[unknown]";

	  if (*functname == NULL)
	    *functname = "[unknown]";

	  mpiPi_msg_debug
	    ("Successful Source lookup for [%s]: %s, %d, %s\n",
	     mpiP_format_address (pc, addr_buf), *filename, *lineno,
	     *functname);

	  csp->filename = strdup (*filename);
	  csp->functname = strdup (*functname);
	  csp->line = *lineno;
	}
      else
	{
	  mpiPi_msg_debug ("Unsuccessful Source lookup for [%s]\n",
			   mpiP_format_address (pc, addr_buf));
	  csp->filename = strdup ("[unknown]");
	  csp->functname = strdup ("[unknown]");
	  csp->line = 0;
	}
#else /* ! ENABLE_BFD || USE_LIBDWARF */
      csp->filename = strdup ("[unknown]");
      csp->functname = strdup ("[unknown]");
      csp->line = 0;
#endif
      h_insert (callsite_pc_cache, csp);
    }

  *filename = csp->filename;
  *functname = csp->functname;
  *lineno = csp->line;

  if (*lineno == 0)
    rc = 1;			/* use this value to indicate a failed lookup */

  return rc;
}
Beispiel #2
0
/* take a callstats record (the pc) and determine src file, line, if
   possible and assign a callsite id.
 */
int
mpiPi_query_src (callsite_stats_t * p)
{
  int i;
  callsite_src_id_cache_entry_t key;
  callsite_src_id_cache_entry_t *csp;
  assert (p);

  /* Because multiple pcs can map to the same source line, we must
     check that mapping here. If we got unknown, then we assign
     different ids */
  bzero (&key, sizeof (callsite_src_id_cache_entry_t));

  for (i = 0; (i < MPIP_CALLSITE_STACK_DEPTH) && (p->pc[i] != NULL); i++)
    {
      if (mpiPi.do_lookup == 1)
	mpiPi_query_pc (p->pc[i], &(p->filename[i]), &(p->functname[i]),
			&(p->lineno[i]));
      else
	{
	  p->filename[i] = strdup ("[unknown]");
	  p->functname[i] = strdup ("[unknown]");
	  p->lineno[i] = 0;
	}

      key.filename[i] = p->filename[i];
      key.functname[i] = p->functname[i];
      key.line[i] = p->lineno[i];
      key.pc[i] = p->pc[i];
    }

  /* lookup/generate an ID based on the callstack, not just the callsite pc */
  if (h_search (callsite_src_id_cache, &key, (void **) &csp) == NULL)
    {
      /* create a new entry, and assign an id based on callstack */
      csp =
	(callsite_src_id_cache_entry_t *)
	malloc (sizeof (callsite_src_id_cache_entry_t));
      bzero (csp, sizeof (callsite_src_id_cache_entry_t));

      for (i = 0; (i < MPIP_CALLSITE_STACK_DEPTH) && (p->pc[i] != NULL); i++)
	{
	  csp->filename[i] = strdup (key.filename[i]);
	  csp->functname[i] = strdup (key.functname[i]);
	  csp->line[i] = key.line[i];
	  csp->pc[i] = p->pc[i];
	}
      csp->id = callsite_src_id_counter++;
      csp->op = p->op;
      h_insert (callsite_src_id_cache, csp);
    }

  /* assign ID to this record */
  p->csid = csp->id;
  return p->csid;
}
Beispiel #3
0
/*  Aggregate individual MPI call data by iterating through call sites.  */
static int
mpiPi_insert_MPI_records ()
{
  callsite_stats_t *csp = NULL;
  int i, ac;
  callsite_stats_t **av;
  callsite_stats_t *p;

  if (mpiPi.rank == mpiPi.collectorRank)
    {
      /*  Open hash table for MPI call data.  */
      mpiPi.global_MPI_stats_agg = h_open (mpiPi.tableSize,
					   mpiPi_callsite_stats_MPI_id_hashkey,
					   mpiPi_callsite_stats_op_comparator);

      /*  Get individual call data.  */
      h_gather_data (mpiPi.global_callsite_stats_agg, &ac, (void ***) &av);

      /*  Sort by MPI op.  */
      qsort (av, ac, sizeof (void *), callsite_sort_by_MPI_op);

      /*  For each call site, add call site info to hash table entry for MPI op, independent of rank.  */
      for (i = 0; i < ac; i++)
	{
	  p = av[i];

	  /* Check if there is already an entry for the MPI op. */
	  if (NULL ==
	      h_search (mpiPi.global_MPI_stats_agg, p, (void **) &csp))
	    {
	      callsite_stats_t *newp = NULL;
	      newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));
	      memcpy (newp, p, sizeof (callsite_stats_t));
	      newp->rank = -1;
	      newp->csid = p->op - mpiPi_BASE;

	      /* insert new record into global */
	      h_insert (mpiPi.global_MPI_stats_agg, newp);
	    }
	  else
	    {
	      mpiPi_merge_individual_callsite_records (csp, p);
	    }
	}
    }

  return 1;
}
Beispiel #4
0
void
mpiPi_update_callsite_stats (unsigned op, unsigned rank, void **pc,
			     double dur, double sendSize, double ioSize)
{
  int i;
  callsite_stats_t *csp = NULL;
  callsite_stats_t key;

  if (!mpiPi.enabled)
    return;

  assert (mpiPi.task_callsite_stats != NULL);
  assert (dur >= 0);


  key.op = op;
  key.rank = rank;
  key.cookie = MPIP_CALLSITE_STATS_COOKIE;
  for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++)
    {
      key.pc[i] = pc[i];
    }

  if (NULL == h_search (mpiPi.task_callsite_stats, &key, (void **) &csp))
    {
      /* create and insert */
      csp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));
      bzero (csp, sizeof (callsite_stats_t));
      csp->op = op;
      csp->rank = rank;
      for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++)
	{
	  csp->pc[i] = pc[i];
	}
      csp->cookie = MPIP_CALLSITE_STATS_COOKIE;
      csp->minDur = DBL_MAX;
      csp->minDataSent = DBL_MAX;
      csp->minIO = DBL_MAX;
      csp->arbitraryMessageCount = 0;
      h_insert (mpiPi.task_callsite_stats, csp);
    }
  /* ASSUME: csp cannot be deleted from list */
  csp->count++;
  csp->cumulativeTime += dur;
  assert (csp->cumulativeTime >= 0);
  csp->cumulativeTimeSquared += (dur * dur);
  assert (csp->cumulativeTimeSquared >= 0);
  csp->maxDur = max (csp->maxDur, dur);
  csp->minDur = min (csp->minDur, dur);
  csp->cumulativeDataSent += sendSize;
  csp->cumulativeIO += ioSize;

  csp->maxDataSent = max (csp->maxDataSent, sendSize);
  csp->minDataSent = min (csp->minDataSent, sendSize);

  csp->maxIO = max (csp->maxIO, ioSize);
  csp->minIO = min (csp->minIO, ioSize);


  if (mpiPi.messageCountThreshold > -1
      && sendSize >= (double) mpiPi.messageCountThreshold)
    csp->arbitraryMessageCount++;

#if 0
  mpiPi_msg_debug ("mpiPi.messageCountThreshold is %d\n",
		   mpiPi.messageCountThreshold);
  mpiPi_msg_debug ("sendSize is %f\n", sendSize);
  mpiPi_msg_debug ("csp->arbitraryMessageCount is %lld\n",
		   csp->arbitraryMessageCount);
#endif

  return;
}
Beispiel #5
0
static int
mpiPi_insert_callsite_records (callsite_stats_t * p)
{
  callsite_stats_t *csp = NULL;

  mpiPi_query_src (p);		/* sets the file/line in p */

  /* If exists, accumulate, otherwise insert. This is
     specifically for optimizations that have multiple PCs for
     one src line. We aggregate across rank after this. 

     The collective_report reporting approach does not aggregate individual 
     process callsite information at the collector process.
   */
  if (mpiPi.collective_report == 0)
    {
      if (NULL == h_search (mpiPi.global_callsite_stats, p, (void **) &csp))
	{
	  int j;
	  callsite_stats_t *newp = NULL;
	  newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));
	  bzero (newp, sizeof (callsite_stats_t));
	  newp->op = p->op;
	  newp->rank = p->rank;
	  for (j = 0; j < MPIP_CALLSITE_STACK_DEPTH; j++)
	    {
	      newp->pc[j] = p->pc[j];
	      newp->filename[j] = p->filename[j];
	      newp->functname[j] = p->functname[j];
	      newp->lineno[j] = p->lineno[j];
	    }
	  newp->csid = p->csid;
	  newp->count = p->count;
	  newp->cumulativeTime = p->cumulativeTime;
	  newp->cumulativeTimeSquared = p->cumulativeTimeSquared;
	  newp->maxDur = p->maxDur;
	  newp->minDur = p->minDur;
	  newp->maxDataSent = p->maxDataSent;
	  newp->minDataSent = p->minDataSent;
	  newp->cumulativeDataSent = p->cumulativeDataSent;
	  newp->maxIO = p->maxIO;
	  newp->minIO = p->minIO;
	  newp->cumulativeIO = p->cumulativeIO;
	  newp->arbitraryMessageCount = p->arbitraryMessageCount;
	  newp->cookie = MPIP_CALLSITE_STATS_COOKIE;

	  /* insert new record into global */
	  h_insert (mpiPi.global_callsite_stats, newp);
	}
      else
	{
	  csp->count += p->count;
	  csp->cumulativeTime += p->cumulativeTime;
	  assert (csp->cumulativeTime >= 0);
	  csp->cumulativeTimeSquared += p->cumulativeTimeSquared;
	  assert (csp->cumulativeTimeSquared >= 0);
	  csp->maxDur = max (csp->maxDur, p->maxDur);
	  csp->minDur = min (csp->minDur, p->minDur);
	  csp->maxDataSent = max (csp->maxDataSent, p->maxDataSent);
	  csp->minDataSent = min (csp->minDataSent, p->minDataSent);
	  csp->cumulativeDataSent += p->cumulativeDataSent;
	  csp->maxIO = max (csp->maxIO, p->maxIO);
	  csp->minIO = min (csp->minIO, p->minIO);
	  csp->cumulativeIO += p->cumulativeIO;
	  csp->arbitraryMessageCount += p->arbitraryMessageCount;
	}
    }

  /* Collect aggregate callsite summary information indpendent of rank. */
  if (NULL == h_search (mpiPi.global_callsite_stats_agg, p, (void **) &csp))
    {
      int j;
      callsite_stats_t *newp = NULL;
      newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));
      bzero (newp, sizeof (callsite_stats_t));
      newp->op = p->op;
      newp->rank = -1;
      for (j = 0; j < MPIP_CALLSITE_STACK_DEPTH; j++)
	{
	  newp->pc[j] = p->pc[j];
	  newp->filename[j] = p->filename[j];
	  newp->functname[j] = p->functname[j];
	  newp->lineno[j] = p->lineno[j];
	}
      newp->csid = p->csid;
      newp->count = p->count;
      newp->cumulativeTime = p->cumulativeTime;
      newp->cumulativeTimeSquared = p->cumulativeTimeSquared;
      newp->maxDur = p->maxDur;
      newp->minDur = p->minDur;
      newp->maxDataSent = p->maxDataSent;
      newp->minDataSent = p->minDataSent;
      newp->cumulativeDataSent = p->cumulativeDataSent;
      newp->cumulativeIO = p->cumulativeIO;
      newp->maxIO = p->maxIO;
      newp->minIO = p->minIO;
      newp->cookie = MPIP_CALLSITE_STATS_COOKIE;

      if (mpiPi.calcCOV)
	{
	  newp->siteData = (double *) malloc (mpiPi.size * sizeof (double));
	  newp->siteData[0] = p->cumulativeTime;
	  newp->siteDataIdx = 1;
	}

      /* insert new record into global */
      h_insert (mpiPi.global_callsite_stats_agg, newp);
    }
  else
    {
      csp->count += p->count;
      csp->cumulativeTime += p->cumulativeTime;
      assert (csp->cumulativeTime >= 0);
      csp->cumulativeTimeSquared += p->cumulativeTimeSquared;
      assert (csp->cumulativeTimeSquared >= 0);
      csp->maxDur = max (csp->maxDur, p->maxDur);
      csp->minDur = min (csp->minDur, p->minDur);
      csp->maxDataSent = max (csp->maxDataSent, p->maxDataSent);
      csp->minDataSent = min (csp->minDataSent, p->minDataSent);
      csp->cumulativeDataSent += p->cumulativeDataSent;
      csp->maxIO = max (csp->maxIO, p->maxIO);
      csp->minIO = min (csp->minIO, p->minIO);
      csp->cumulativeIO += p->cumulativeIO;

      if (mpiPi.calcCOV)
	{
	  csp->siteData[csp->siteDataIdx] = p->cumulativeTime;
	  csp->siteDataIdx += 1;
	}
    }

  /* Do global accumulation while we are iterating through individual callsites */
  mpiPi.global_task_info[p->rank].mpi_time += p->cumulativeTime;

  mpiPi.global_mpi_time += p->cumulativeTime;
  assert (mpiPi.global_mpi_time >= 0);
  mpiPi.global_mpi_size += p->cumulativeDataSent;
  mpiPi.global_mpi_io += p->cumulativeIO;
  if (p->cumulativeTime > 0)
    mpiPi.global_time_callsite_count++;

  if (p->cumulativeDataSent > 0)
    {
      mpiPi.global_mpi_msize_threshold_count += p->arbitraryMessageCount;
      mpiPi.global_mpi_sent_count += p->count;
    }

  return 1;
}
Beispiel #6
0
static int
mpiPi_insert_callsite_records (callsite_stats_t * p)
{
  callsite_stats_t *csp = NULL;

  mpiPi_query_src (p);		/* sets the file/line in p */

  /* If exists, accumulate, otherwise insert. This is
     specifically for optimizations that have multiple PCs for
     one src line. We aggregate across rank after this.

     The collective_report reporting approach does not aggregate individual
     process callsite information at the collector process.
   */
  if (mpiPi.collective_report == 0)
    {
      if (NULL == h_search (mpiPi.global_callsite_stats, p, (void **) &csp))
	{
	  callsite_stats_t *newp = NULL;
	  newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));

	  memcpy (newp, p, sizeof (callsite_stats_t));
	  /* insert new record into global */
	  h_insert (mpiPi.global_callsite_stats, newp);
	}
      else
	mpiPi_merge_individual_callsite_records (csp, p);
    }

  /* Collect aggregate callsite summary information indpendent of rank. */
  if (NULL == h_search (mpiPi.global_callsite_stats_agg, p, (void **) &csp))
    {
      callsite_stats_t *newp = NULL;
      newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));

      memcpy (newp, p, sizeof (callsite_stats_t));
      newp->rank = -1;

      if (mpiPi.calcCOV)
	{
	  newp->siteData = (double *) malloc (mpiPi.size * sizeof (double));
	  newp->siteData[0] = p->cumulativeTime;
	  newp->siteDataIdx = 1;
	}

      /* insert new record into global */
      h_insert (mpiPi.global_callsite_stats_agg, newp);
    }
  else
    {
      mpiPi_merge_individual_callsite_records (csp, p);

      if (mpiPi.calcCOV)
	{
	  csp->siteData[csp->siteDataIdx] = p->cumulativeTime;
	  csp->siteDataIdx += 1;
	}
    }

  /* Do global accumulation while we are iterating through individual callsites */
  mpiPi.global_task_mpi_time[p->rank] += p->cumulativeTime;

  mpiPi.global_mpi_time += p->cumulativeTime;
  assert (mpiPi.global_mpi_time >= 0);
  mpiPi.global_mpi_size += p->cumulativeDataSent;
  mpiPi.global_mpi_io += p->cumulativeIO;
  mpiPi.global_mpi_rma += p->cumulativeRMA;
  if (p->cumulativeTime > 0)
    mpiPi.global_time_callsite_count++;

  if (p->cumulativeDataSent > 0)
    {
      mpiPi.global_mpi_msize_threshold_count += p->arbitraryMessageCount;
      mpiPi.global_mpi_sent_count += p->count;
    }

  return 1;
}