static int mpiPi_insert_callsite_records (callsite_stats_t * p) { callsite_stats_t *csp = NULL; mpiPi_query_src (p); /* sets the file/line in p */ /* If exists, accumulate, otherwise insert. This is specifically for optimizations that have multiple PCs for one src line. We aggregate across rank after this. The collective_report reporting approach does not aggregate individual process callsite information at the collector process. */ if (mpiPi.collective_report == 0) { if (NULL == h_search (mpiPi.global_callsite_stats, p, (void **) &csp)) { int j; callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); bzero (newp, sizeof (callsite_stats_t)); newp->op = p->op; newp->rank = p->rank; for (j = 0; j < MPIP_CALLSITE_STACK_DEPTH; j++) { newp->pc[j] = p->pc[j]; newp->filename[j] = p->filename[j]; newp->functname[j] = p->functname[j]; newp->lineno[j] = p->lineno[j]; } newp->csid = p->csid; newp->count = p->count; newp->cumulativeTime = p->cumulativeTime; newp->cumulativeTimeSquared = p->cumulativeTimeSquared; newp->maxDur = p->maxDur; newp->minDur = p->minDur; newp->maxDataSent = p->maxDataSent; newp->minDataSent = p->minDataSent; newp->cumulativeDataSent = p->cumulativeDataSent; newp->maxIO = p->maxIO; newp->minIO = p->minIO; newp->cumulativeIO = p->cumulativeIO; newp->arbitraryMessageCount = p->arbitraryMessageCount; newp->cookie = MPIP_CALLSITE_STATS_COOKIE; /* insert new record into global */ h_insert (mpiPi.global_callsite_stats, newp); } else { csp->count += p->count; csp->cumulativeTime += p->cumulativeTime; assert (csp->cumulativeTime >= 0); csp->cumulativeTimeSquared += p->cumulativeTimeSquared; assert (csp->cumulativeTimeSquared >= 0); csp->maxDur = max (csp->maxDur, p->maxDur); csp->minDur = min (csp->minDur, p->minDur); csp->maxDataSent = max (csp->maxDataSent, p->maxDataSent); csp->minDataSent = min (csp->minDataSent, p->minDataSent); csp->cumulativeDataSent += p->cumulativeDataSent; csp->maxIO = max (csp->maxIO, p->maxIO); csp->minIO = min (csp->minIO, p->minIO); csp->cumulativeIO += p->cumulativeIO; csp->arbitraryMessageCount += p->arbitraryMessageCount; } } /* Collect aggregate callsite summary information indpendent of rank. */ if (NULL == h_search (mpiPi.global_callsite_stats_agg, p, (void **) &csp)) { int j; callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); bzero (newp, sizeof (callsite_stats_t)); newp->op = p->op; newp->rank = -1; for (j = 0; j < MPIP_CALLSITE_STACK_DEPTH; j++) { newp->pc[j] = p->pc[j]; newp->filename[j] = p->filename[j]; newp->functname[j] = p->functname[j]; newp->lineno[j] = p->lineno[j]; } newp->csid = p->csid; newp->count = p->count; newp->cumulativeTime = p->cumulativeTime; newp->cumulativeTimeSquared = p->cumulativeTimeSquared; newp->maxDur = p->maxDur; newp->minDur = p->minDur; newp->maxDataSent = p->maxDataSent; newp->minDataSent = p->minDataSent; newp->cumulativeDataSent = p->cumulativeDataSent; newp->cumulativeIO = p->cumulativeIO; newp->maxIO = p->maxIO; newp->minIO = p->minIO; newp->cookie = MPIP_CALLSITE_STATS_COOKIE; if (mpiPi.calcCOV) { newp->siteData = (double *) malloc (mpiPi.size * sizeof (double)); newp->siteData[0] = p->cumulativeTime; newp->siteDataIdx = 1; } /* insert new record into global */ h_insert (mpiPi.global_callsite_stats_agg, newp); } else { csp->count += p->count; csp->cumulativeTime += p->cumulativeTime; assert (csp->cumulativeTime >= 0); csp->cumulativeTimeSquared += p->cumulativeTimeSquared; assert (csp->cumulativeTimeSquared >= 0); csp->maxDur = max (csp->maxDur, p->maxDur); csp->minDur = min (csp->minDur, p->minDur); csp->maxDataSent = max (csp->maxDataSent, p->maxDataSent); csp->minDataSent = min (csp->minDataSent, p->minDataSent); csp->cumulativeDataSent += p->cumulativeDataSent; csp->maxIO = max (csp->maxIO, p->maxIO); csp->minIO = min (csp->minIO, p->minIO); csp->cumulativeIO += p->cumulativeIO; if (mpiPi.calcCOV) { csp->siteData[csp->siteDataIdx] = p->cumulativeTime; csp->siteDataIdx += 1; } } /* Do global accumulation while we are iterating through individual callsites */ mpiPi.global_task_info[p->rank].mpi_time += p->cumulativeTime; mpiPi.global_mpi_time += p->cumulativeTime; assert (mpiPi.global_mpi_time >= 0); mpiPi.global_mpi_size += p->cumulativeDataSent; mpiPi.global_mpi_io += p->cumulativeIO; if (p->cumulativeTime > 0) mpiPi.global_time_callsite_count++; if (p->cumulativeDataSent > 0) { mpiPi.global_mpi_msize_threshold_count += p->arbitraryMessageCount; mpiPi.global_mpi_sent_count += p->count; } return 1; }
static int mpiPi_insert_callsite_records (callsite_stats_t * p) { callsite_stats_t *csp = NULL; mpiPi_query_src (p); /* sets the file/line in p */ /* If exists, accumulate, otherwise insert. This is specifically for optimizations that have multiple PCs for one src line. We aggregate across rank after this. The collective_report reporting approach does not aggregate individual process callsite information at the collector process. */ if (mpiPi.collective_report == 0) { if (NULL == h_search (mpiPi.global_callsite_stats, p, (void **) &csp)) { callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); memcpy (newp, p, sizeof (callsite_stats_t)); /* insert new record into global */ h_insert (mpiPi.global_callsite_stats, newp); } else mpiPi_merge_individual_callsite_records (csp, p); } /* Collect aggregate callsite summary information indpendent of rank. */ if (NULL == h_search (mpiPi.global_callsite_stats_agg, p, (void **) &csp)) { callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); memcpy (newp, p, sizeof (callsite_stats_t)); newp->rank = -1; if (mpiPi.calcCOV) { newp->siteData = (double *) malloc (mpiPi.size * sizeof (double)); newp->siteData[0] = p->cumulativeTime; newp->siteDataIdx = 1; } /* insert new record into global */ h_insert (mpiPi.global_callsite_stats_agg, newp); } else { mpiPi_merge_individual_callsite_records (csp, p); if (mpiPi.calcCOV) { csp->siteData[csp->siteDataIdx] = p->cumulativeTime; csp->siteDataIdx += 1; } } /* Do global accumulation while we are iterating through individual callsites */ mpiPi.global_task_mpi_time[p->rank] += p->cumulativeTime; mpiPi.global_mpi_time += p->cumulativeTime; assert (mpiPi.global_mpi_time >= 0); mpiPi.global_mpi_size += p->cumulativeDataSent; mpiPi.global_mpi_io += p->cumulativeIO; mpiPi.global_mpi_rma += p->cumulativeRMA; if (p->cumulativeTime > 0) mpiPi.global_time_callsite_count++; if (p->cumulativeDataSent > 0) { mpiPi.global_mpi_msize_threshold_count += p->arbitraryMessageCount; mpiPi.global_mpi_sent_count += p->count; } return 1; }