Ejemplo n.º 1
0
static int
mpiPi_insert_callsite_records (callsite_stats_t * p)
{
  callsite_stats_t *csp = NULL;

  mpiPi_query_src (p);		/* sets the file/line in p */

  /* If exists, accumulate, otherwise insert. This is
     specifically for optimizations that have multiple PCs for
     one src line. We aggregate across rank after this. 

     The collective_report reporting approach does not aggregate individual 
     process callsite information at the collector process.
   */
  if (mpiPi.collective_report == 0)
    {
      if (NULL == h_search (mpiPi.global_callsite_stats, p, (void **) &csp))
	{
	  int j;
	  callsite_stats_t *newp = NULL;
	  newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));
	  bzero (newp, sizeof (callsite_stats_t));
	  newp->op = p->op;
	  newp->rank = p->rank;
	  for (j = 0; j < MPIP_CALLSITE_STACK_DEPTH; j++)
	    {
	      newp->pc[j] = p->pc[j];
	      newp->filename[j] = p->filename[j];
	      newp->functname[j] = p->functname[j];
	      newp->lineno[j] = p->lineno[j];
	    }
	  newp->csid = p->csid;
	  newp->count = p->count;
	  newp->cumulativeTime = p->cumulativeTime;
	  newp->cumulativeTimeSquared = p->cumulativeTimeSquared;
	  newp->maxDur = p->maxDur;
	  newp->minDur = p->minDur;
	  newp->maxDataSent = p->maxDataSent;
	  newp->minDataSent = p->minDataSent;
	  newp->cumulativeDataSent = p->cumulativeDataSent;
	  newp->maxIO = p->maxIO;
	  newp->minIO = p->minIO;
	  newp->cumulativeIO = p->cumulativeIO;
	  newp->arbitraryMessageCount = p->arbitraryMessageCount;
	  newp->cookie = MPIP_CALLSITE_STATS_COOKIE;

	  /* insert new record into global */
	  h_insert (mpiPi.global_callsite_stats, newp);
	}
      else
	{
	  csp->count += p->count;
	  csp->cumulativeTime += p->cumulativeTime;
	  assert (csp->cumulativeTime >= 0);
	  csp->cumulativeTimeSquared += p->cumulativeTimeSquared;
	  assert (csp->cumulativeTimeSquared >= 0);
	  csp->maxDur = max (csp->maxDur, p->maxDur);
	  csp->minDur = min (csp->minDur, p->minDur);
	  csp->maxDataSent = max (csp->maxDataSent, p->maxDataSent);
	  csp->minDataSent = min (csp->minDataSent, p->minDataSent);
	  csp->cumulativeDataSent += p->cumulativeDataSent;
	  csp->maxIO = max (csp->maxIO, p->maxIO);
	  csp->minIO = min (csp->minIO, p->minIO);
	  csp->cumulativeIO += p->cumulativeIO;
	  csp->arbitraryMessageCount += p->arbitraryMessageCount;
	}
    }

  /* Collect aggregate callsite summary information indpendent of rank. */
  if (NULL == h_search (mpiPi.global_callsite_stats_agg, p, (void **) &csp))
    {
      int j;
      callsite_stats_t *newp = NULL;
      newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));
      bzero (newp, sizeof (callsite_stats_t));
      newp->op = p->op;
      newp->rank = -1;
      for (j = 0; j < MPIP_CALLSITE_STACK_DEPTH; j++)
	{
	  newp->pc[j] = p->pc[j];
	  newp->filename[j] = p->filename[j];
	  newp->functname[j] = p->functname[j];
	  newp->lineno[j] = p->lineno[j];
	}
      newp->csid = p->csid;
      newp->count = p->count;
      newp->cumulativeTime = p->cumulativeTime;
      newp->cumulativeTimeSquared = p->cumulativeTimeSquared;
      newp->maxDur = p->maxDur;
      newp->minDur = p->minDur;
      newp->maxDataSent = p->maxDataSent;
      newp->minDataSent = p->minDataSent;
      newp->cumulativeDataSent = p->cumulativeDataSent;
      newp->cumulativeIO = p->cumulativeIO;
      newp->maxIO = p->maxIO;
      newp->minIO = p->minIO;
      newp->cookie = MPIP_CALLSITE_STATS_COOKIE;

      if (mpiPi.calcCOV)
	{
	  newp->siteData = (double *) malloc (mpiPi.size * sizeof (double));
	  newp->siteData[0] = p->cumulativeTime;
	  newp->siteDataIdx = 1;
	}

      /* insert new record into global */
      h_insert (mpiPi.global_callsite_stats_agg, newp);
    }
  else
    {
      csp->count += p->count;
      csp->cumulativeTime += p->cumulativeTime;
      assert (csp->cumulativeTime >= 0);
      csp->cumulativeTimeSquared += p->cumulativeTimeSquared;
      assert (csp->cumulativeTimeSquared >= 0);
      csp->maxDur = max (csp->maxDur, p->maxDur);
      csp->minDur = min (csp->minDur, p->minDur);
      csp->maxDataSent = max (csp->maxDataSent, p->maxDataSent);
      csp->minDataSent = min (csp->minDataSent, p->minDataSent);
      csp->cumulativeDataSent += p->cumulativeDataSent;
      csp->maxIO = max (csp->maxIO, p->maxIO);
      csp->minIO = min (csp->minIO, p->minIO);
      csp->cumulativeIO += p->cumulativeIO;

      if (mpiPi.calcCOV)
	{
	  csp->siteData[csp->siteDataIdx] = p->cumulativeTime;
	  csp->siteDataIdx += 1;
	}
    }

  /* Do global accumulation while we are iterating through individual callsites */
  mpiPi.global_task_info[p->rank].mpi_time += p->cumulativeTime;

  mpiPi.global_mpi_time += p->cumulativeTime;
  assert (mpiPi.global_mpi_time >= 0);
  mpiPi.global_mpi_size += p->cumulativeDataSent;
  mpiPi.global_mpi_io += p->cumulativeIO;
  if (p->cumulativeTime > 0)
    mpiPi.global_time_callsite_count++;

  if (p->cumulativeDataSent > 0)
    {
      mpiPi.global_mpi_msize_threshold_count += p->arbitraryMessageCount;
      mpiPi.global_mpi_sent_count += p->count;
    }

  return 1;
}
Ejemplo n.º 2
0
Archivo: mpiPi.c Proyecto: saxena/mpip
static int
mpiPi_insert_callsite_records (callsite_stats_t * p)
{
  callsite_stats_t *csp = NULL;

  mpiPi_query_src (p);		/* sets the file/line in p */

  /* If exists, accumulate, otherwise insert. This is
     specifically for optimizations that have multiple PCs for
     one src line. We aggregate across rank after this.

     The collective_report reporting approach does not aggregate individual
     process callsite information at the collector process.
   */
  if (mpiPi.collective_report == 0)
    {
      if (NULL == h_search (mpiPi.global_callsite_stats, p, (void **) &csp))
	{
	  callsite_stats_t *newp = NULL;
	  newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));

	  memcpy (newp, p, sizeof (callsite_stats_t));
	  /* insert new record into global */
	  h_insert (mpiPi.global_callsite_stats, newp);
	}
      else
	mpiPi_merge_individual_callsite_records (csp, p);
    }

  /* Collect aggregate callsite summary information indpendent of rank. */
  if (NULL == h_search (mpiPi.global_callsite_stats_agg, p, (void **) &csp))
    {
      callsite_stats_t *newp = NULL;
      newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t));

      memcpy (newp, p, sizeof (callsite_stats_t));
      newp->rank = -1;

      if (mpiPi.calcCOV)
	{
	  newp->siteData = (double *) malloc (mpiPi.size * sizeof (double));
	  newp->siteData[0] = p->cumulativeTime;
	  newp->siteDataIdx = 1;
	}

      /* insert new record into global */
      h_insert (mpiPi.global_callsite_stats_agg, newp);
    }
  else
    {
      mpiPi_merge_individual_callsite_records (csp, p);

      if (mpiPi.calcCOV)
	{
	  csp->siteData[csp->siteDataIdx] = p->cumulativeTime;
	  csp->siteDataIdx += 1;
	}
    }

  /* Do global accumulation while we are iterating through individual callsites */
  mpiPi.global_task_mpi_time[p->rank] += p->cumulativeTime;

  mpiPi.global_mpi_time += p->cumulativeTime;
  assert (mpiPi.global_mpi_time >= 0);
  mpiPi.global_mpi_size += p->cumulativeDataSent;
  mpiPi.global_mpi_io += p->cumulativeIO;
  mpiPi.global_mpi_rma += p->cumulativeRMA;
  if (p->cumulativeTime > 0)
    mpiPi.global_time_callsite_count++;

  if (p->cumulativeDataSent > 0)
    {
      mpiPi.global_mpi_msize_threshold_count += p->arbitraryMessageCount;
      mpiPi.global_mpi_sent_count += p->count;
    }

  return 1;
}