int mpiPi_query_pc (void *pc, char **filename, char **functname, int *lineno) { int rc = 0; callsite_pc_cache_entry_t key; callsite_pc_cache_entry_t *csp; char addr_buf[24]; key.pc = pc; /* do we have a cache entry for this pc? If so, use entry */ if (h_search (callsite_pc_cache, &key, (void **) &csp) == NULL) { /* no cache entry: create, lookup, and insert */ csp = (callsite_pc_cache_entry_t *) malloc (sizeof (callsite_pc_cache_entry_t)); csp->pc = pc; #if defined(ENABLE_BFD) || defined(USE_LIBDWARF) if (mpiP_find_src_loc (pc, filename, lineno, functname) == 0) { if (*filename == NULL || strcmp (*filename, "??") == 0) *filename = "[unknown]"; if (*functname == NULL) *functname = "[unknown]"; mpiPi_msg_debug ("Successful Source lookup for [%s]: %s, %d, %s\n", mpiP_format_address (pc, addr_buf), *filename, *lineno, *functname); csp->filename = strdup (*filename); csp->functname = strdup (*functname); csp->line = *lineno; } else { mpiPi_msg_debug ("Unsuccessful Source lookup for [%s]\n", mpiP_format_address (pc, addr_buf)); csp->filename = strdup ("[unknown]"); csp->functname = strdup ("[unknown]"); csp->line = 0; } #else /* ! ENABLE_BFD || USE_LIBDWARF */ csp->filename = strdup ("[unknown]"); csp->functname = strdup ("[unknown]"); csp->line = 0; #endif h_insert (callsite_pc_cache, csp); } *filename = csp->filename; *functname = csp->functname; *lineno = csp->line; if (*lineno == 0) rc = 1; /* use this value to indicate a failed lookup */ return rc; }
/* take a callstats record (the pc) and determine src file, line, if possible and assign a callsite id. */ int mpiPi_query_src (callsite_stats_t * p) { int i; callsite_src_id_cache_entry_t key; callsite_src_id_cache_entry_t *csp; assert (p); /* Because multiple pcs can map to the same source line, we must check that mapping here. If we got unknown, then we assign different ids */ bzero (&key, sizeof (callsite_src_id_cache_entry_t)); for (i = 0; (i < MPIP_CALLSITE_STACK_DEPTH) && (p->pc[i] != NULL); i++) { if (mpiPi.do_lookup == 1) mpiPi_query_pc (p->pc[i], &(p->filename[i]), &(p->functname[i]), &(p->lineno[i])); else { p->filename[i] = strdup ("[unknown]"); p->functname[i] = strdup ("[unknown]"); p->lineno[i] = 0; } key.filename[i] = p->filename[i]; key.functname[i] = p->functname[i]; key.line[i] = p->lineno[i]; key.pc[i] = p->pc[i]; } /* lookup/generate an ID based on the callstack, not just the callsite pc */ if (h_search (callsite_src_id_cache, &key, (void **) &csp) == NULL) { /* create a new entry, and assign an id based on callstack */ csp = (callsite_src_id_cache_entry_t *) malloc (sizeof (callsite_src_id_cache_entry_t)); bzero (csp, sizeof (callsite_src_id_cache_entry_t)); for (i = 0; (i < MPIP_CALLSITE_STACK_DEPTH) && (p->pc[i] != NULL); i++) { csp->filename[i] = strdup (key.filename[i]); csp->functname[i] = strdup (key.functname[i]); csp->line[i] = key.line[i]; csp->pc[i] = p->pc[i]; } csp->id = callsite_src_id_counter++; csp->op = p->op; h_insert (callsite_src_id_cache, csp); } /* assign ID to this record */ p->csid = csp->id; return p->csid; }
/* Aggregate individual MPI call data by iterating through call sites. */ static int mpiPi_insert_MPI_records () { callsite_stats_t *csp = NULL; int i, ac; callsite_stats_t **av; callsite_stats_t *p; if (mpiPi.rank == mpiPi.collectorRank) { /* Open hash table for MPI call data. */ mpiPi.global_MPI_stats_agg = h_open (mpiPi.tableSize, mpiPi_callsite_stats_MPI_id_hashkey, mpiPi_callsite_stats_op_comparator); /* Get individual call data. */ h_gather_data (mpiPi.global_callsite_stats_agg, &ac, (void ***) &av); /* Sort by MPI op. */ qsort (av, ac, sizeof (void *), callsite_sort_by_MPI_op); /* For each call site, add call site info to hash table entry for MPI op, independent of rank. */ for (i = 0; i < ac; i++) { p = av[i]; /* Check if there is already an entry for the MPI op. */ if (NULL == h_search (mpiPi.global_MPI_stats_agg, p, (void **) &csp)) { callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); memcpy (newp, p, sizeof (callsite_stats_t)); newp->rank = -1; newp->csid = p->op - mpiPi_BASE; /* insert new record into global */ h_insert (mpiPi.global_MPI_stats_agg, newp); } else { mpiPi_merge_individual_callsite_records (csp, p); } } } return 1; }
void mpiPi_update_callsite_stats (unsigned op, unsigned rank, void **pc, double dur, double sendSize, double ioSize) { int i; callsite_stats_t *csp = NULL; callsite_stats_t key; if (!mpiPi.enabled) return; assert (mpiPi.task_callsite_stats != NULL); assert (dur >= 0); key.op = op; key.rank = rank; key.cookie = MPIP_CALLSITE_STATS_COOKIE; for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++) { key.pc[i] = pc[i]; } if (NULL == h_search (mpiPi.task_callsite_stats, &key, (void **) &csp)) { /* create and insert */ csp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); bzero (csp, sizeof (callsite_stats_t)); csp->op = op; csp->rank = rank; for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++) { csp->pc[i] = pc[i]; } csp->cookie = MPIP_CALLSITE_STATS_COOKIE; csp->minDur = DBL_MAX; csp->minDataSent = DBL_MAX; csp->minIO = DBL_MAX; csp->arbitraryMessageCount = 0; h_insert (mpiPi.task_callsite_stats, csp); } /* ASSUME: csp cannot be deleted from list */ csp->count++; csp->cumulativeTime += dur; assert (csp->cumulativeTime >= 0); csp->cumulativeTimeSquared += (dur * dur); assert (csp->cumulativeTimeSquared >= 0); csp->maxDur = max (csp->maxDur, dur); csp->minDur = min (csp->minDur, dur); csp->cumulativeDataSent += sendSize; csp->cumulativeIO += ioSize; csp->maxDataSent = max (csp->maxDataSent, sendSize); csp->minDataSent = min (csp->minDataSent, sendSize); csp->maxIO = max (csp->maxIO, ioSize); csp->minIO = min (csp->minIO, ioSize); if (mpiPi.messageCountThreshold > -1 && sendSize >= (double) mpiPi.messageCountThreshold) csp->arbitraryMessageCount++; #if 0 mpiPi_msg_debug ("mpiPi.messageCountThreshold is %d\n", mpiPi.messageCountThreshold); mpiPi_msg_debug ("sendSize is %f\n", sendSize); mpiPi_msg_debug ("csp->arbitraryMessageCount is %lld\n", csp->arbitraryMessageCount); #endif return; }
static int mpiPi_insert_callsite_records (callsite_stats_t * p) { callsite_stats_t *csp = NULL; mpiPi_query_src (p); /* sets the file/line in p */ /* If exists, accumulate, otherwise insert. This is specifically for optimizations that have multiple PCs for one src line. We aggregate across rank after this. The collective_report reporting approach does not aggregate individual process callsite information at the collector process. */ if (mpiPi.collective_report == 0) { if (NULL == h_search (mpiPi.global_callsite_stats, p, (void **) &csp)) { int j; callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); bzero (newp, sizeof (callsite_stats_t)); newp->op = p->op; newp->rank = p->rank; for (j = 0; j < MPIP_CALLSITE_STACK_DEPTH; j++) { newp->pc[j] = p->pc[j]; newp->filename[j] = p->filename[j]; newp->functname[j] = p->functname[j]; newp->lineno[j] = p->lineno[j]; } newp->csid = p->csid; newp->count = p->count; newp->cumulativeTime = p->cumulativeTime; newp->cumulativeTimeSquared = p->cumulativeTimeSquared; newp->maxDur = p->maxDur; newp->minDur = p->minDur; newp->maxDataSent = p->maxDataSent; newp->minDataSent = p->minDataSent; newp->cumulativeDataSent = p->cumulativeDataSent; newp->maxIO = p->maxIO; newp->minIO = p->minIO; newp->cumulativeIO = p->cumulativeIO; newp->arbitraryMessageCount = p->arbitraryMessageCount; newp->cookie = MPIP_CALLSITE_STATS_COOKIE; /* insert new record into global */ h_insert (mpiPi.global_callsite_stats, newp); } else { csp->count += p->count; csp->cumulativeTime += p->cumulativeTime; assert (csp->cumulativeTime >= 0); csp->cumulativeTimeSquared += p->cumulativeTimeSquared; assert (csp->cumulativeTimeSquared >= 0); csp->maxDur = max (csp->maxDur, p->maxDur); csp->minDur = min (csp->minDur, p->minDur); csp->maxDataSent = max (csp->maxDataSent, p->maxDataSent); csp->minDataSent = min (csp->minDataSent, p->minDataSent); csp->cumulativeDataSent += p->cumulativeDataSent; csp->maxIO = max (csp->maxIO, p->maxIO); csp->minIO = min (csp->minIO, p->minIO); csp->cumulativeIO += p->cumulativeIO; csp->arbitraryMessageCount += p->arbitraryMessageCount; } } /* Collect aggregate callsite summary information indpendent of rank. */ if (NULL == h_search (mpiPi.global_callsite_stats_agg, p, (void **) &csp)) { int j; callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); bzero (newp, sizeof (callsite_stats_t)); newp->op = p->op; newp->rank = -1; for (j = 0; j < MPIP_CALLSITE_STACK_DEPTH; j++) { newp->pc[j] = p->pc[j]; newp->filename[j] = p->filename[j]; newp->functname[j] = p->functname[j]; newp->lineno[j] = p->lineno[j]; } newp->csid = p->csid; newp->count = p->count; newp->cumulativeTime = p->cumulativeTime; newp->cumulativeTimeSquared = p->cumulativeTimeSquared; newp->maxDur = p->maxDur; newp->minDur = p->minDur; newp->maxDataSent = p->maxDataSent; newp->minDataSent = p->minDataSent; newp->cumulativeDataSent = p->cumulativeDataSent; newp->cumulativeIO = p->cumulativeIO; newp->maxIO = p->maxIO; newp->minIO = p->minIO; newp->cookie = MPIP_CALLSITE_STATS_COOKIE; if (mpiPi.calcCOV) { newp->siteData = (double *) malloc (mpiPi.size * sizeof (double)); newp->siteData[0] = p->cumulativeTime; newp->siteDataIdx = 1; } /* insert new record into global */ h_insert (mpiPi.global_callsite_stats_agg, newp); } else { csp->count += p->count; csp->cumulativeTime += p->cumulativeTime; assert (csp->cumulativeTime >= 0); csp->cumulativeTimeSquared += p->cumulativeTimeSquared; assert (csp->cumulativeTimeSquared >= 0); csp->maxDur = max (csp->maxDur, p->maxDur); csp->minDur = min (csp->minDur, p->minDur); csp->maxDataSent = max (csp->maxDataSent, p->maxDataSent); csp->minDataSent = min (csp->minDataSent, p->minDataSent); csp->cumulativeDataSent += p->cumulativeDataSent; csp->maxIO = max (csp->maxIO, p->maxIO); csp->minIO = min (csp->minIO, p->minIO); csp->cumulativeIO += p->cumulativeIO; if (mpiPi.calcCOV) { csp->siteData[csp->siteDataIdx] = p->cumulativeTime; csp->siteDataIdx += 1; } } /* Do global accumulation while we are iterating through individual callsites */ mpiPi.global_task_info[p->rank].mpi_time += p->cumulativeTime; mpiPi.global_mpi_time += p->cumulativeTime; assert (mpiPi.global_mpi_time >= 0); mpiPi.global_mpi_size += p->cumulativeDataSent; mpiPi.global_mpi_io += p->cumulativeIO; if (p->cumulativeTime > 0) mpiPi.global_time_callsite_count++; if (p->cumulativeDataSent > 0) { mpiPi.global_mpi_msize_threshold_count += p->arbitraryMessageCount; mpiPi.global_mpi_sent_count += p->count; } return 1; }
static int mpiPi_insert_callsite_records (callsite_stats_t * p) { callsite_stats_t *csp = NULL; mpiPi_query_src (p); /* sets the file/line in p */ /* If exists, accumulate, otherwise insert. This is specifically for optimizations that have multiple PCs for one src line. We aggregate across rank after this. The collective_report reporting approach does not aggregate individual process callsite information at the collector process. */ if (mpiPi.collective_report == 0) { if (NULL == h_search (mpiPi.global_callsite_stats, p, (void **) &csp)) { callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); memcpy (newp, p, sizeof (callsite_stats_t)); /* insert new record into global */ h_insert (mpiPi.global_callsite_stats, newp); } else mpiPi_merge_individual_callsite_records (csp, p); } /* Collect aggregate callsite summary information indpendent of rank. */ if (NULL == h_search (mpiPi.global_callsite_stats_agg, p, (void **) &csp)) { callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); memcpy (newp, p, sizeof (callsite_stats_t)); newp->rank = -1; if (mpiPi.calcCOV) { newp->siteData = (double *) malloc (mpiPi.size * sizeof (double)); newp->siteData[0] = p->cumulativeTime; newp->siteDataIdx = 1; } /* insert new record into global */ h_insert (mpiPi.global_callsite_stats_agg, newp); } else { mpiPi_merge_individual_callsite_records (csp, p); if (mpiPi.calcCOV) { csp->siteData[csp->siteDataIdx] = p->cumulativeTime; csp->siteDataIdx += 1; } } /* Do global accumulation while we are iterating through individual callsites */ mpiPi.global_task_mpi_time[p->rank] += p->cumulativeTime; mpiPi.global_mpi_time += p->cumulativeTime; assert (mpiPi.global_mpi_time >= 0); mpiPi.global_mpi_size += p->cumulativeDataSent; mpiPi.global_mpi_io += p->cumulativeIO; mpiPi.global_mpi_rma += p->cumulativeRMA; if (p->cumulativeTime > 0) mpiPi.global_time_callsite_count++; if (p->cumulativeDataSent > 0) { mpiPi.global_mpi_msize_threshold_count += p->arbitraryMessageCount; mpiPi.global_mpi_sent_count += p->count; } return 1; }