/* Aggregate individual MPI call data by iterating through call sites. */ static int mpiPi_insert_MPI_records () { callsite_stats_t *csp = NULL; int i, ac; callsite_stats_t **av; callsite_stats_t *p; if (mpiPi.rank == mpiPi.collectorRank) { /* Open hash table for MPI call data. */ mpiPi.global_MPI_stats_agg = h_open (mpiPi.tableSize, mpiPi_callsite_stats_MPI_id_hashkey, mpiPi_callsite_stats_op_comparator); /* Get individual call data. */ h_gather_data (mpiPi.global_callsite_stats_agg, &ac, (void ***) &av); /* Sort by MPI op. */ qsort (av, ac, sizeof (void *), callsite_sort_by_MPI_op); /* For each call site, add call site info to hash table entry for MPI op, independent of rank. */ for (i = 0; i < ac; i++) { p = av[i]; /* Check if there is already an entry for the MPI op. */ if (NULL == h_search (mpiPi.global_MPI_stats_agg, p, (void **) &csp)) { callsite_stats_t *newp = NULL; newp = (callsite_stats_t *) malloc (sizeof (callsite_stats_t)); memcpy (newp, p, sizeof (callsite_stats_t)); newp->rank = -1; newp->csid = p->op - mpiPi_BASE; /* insert new record into global */ h_insert (mpiPi.global_MPI_stats_agg, newp); } else { mpiPi_merge_individual_callsite_records (csp, p); } } } return 1; }
int mpiPi_mergeResults () { int ac; callsite_stats_t **av; int totalCount = 0; int maxCount = 0; int retval = 1, sendval; /* gather local task data */ h_gather_data (mpiPi.task_callsite_stats, &ac, (void ***) &av); /* determine size of space necessary on collector */ PMPI_Allreduce (&ac, &totalCount, 1, MPI_INT, MPI_SUM, mpiPi.comm); PMPI_Reduce (&ac, &maxCount, 1, MPI_INT, MPI_MAX, mpiPi.collectorRank, mpiPi.comm); if (totalCount < 1) { mpiPi_msg_warn ("Collector found no records to merge. Omitting report.\n"); return 0; } /* gather global data at collector */ if (mpiPi.rank == mpiPi.collectorRank) { int i; int ndx = 0; #ifdef ENABLE_BFD if (mpiPi.appFullName != NULL) { if (open_bfd_executable (mpiPi.appFullName) == 0) mpiPi.do_lookup = 0; } #elif defined(USE_LIBDWARF) if (mpiPi.appFullName != NULL) { if (open_dwarf_executable (mpiPi.appFullName) == 0) mpiPi.do_lookup = 0; } #endif #if defined(ENABLE_BFD) || defined(USE_LIBDWARF) else { mpiPi_msg_warn ("Failed to open executable\n"); mpiPi.do_lookup = 0; } #endif /* convert data to src line; merge, if nec */ mpiPi.global_callsite_stats = h_open (mpiPi.tableSize, mpiPi_callsite_stats_src_hashkey, mpiPi_callsite_stats_src_comparator); mpiPi.global_callsite_stats_agg = h_open (mpiPi.tableSize, mpiPi_callsite_stats_src_id_hashkey, mpiPi_callsite_stats_src_id_comparator); if (callsite_pc_cache == NULL) { callsite_pc_cache = h_open (mpiPi.tableSize, callsite_pc_cache_hashkey, callsite_pc_cache_comparator); } if (callsite_src_id_cache == NULL) { callsite_src_id_cache = h_open (mpiPi.tableSize, callsite_src_id_cache_hashkey, callsite_src_id_cache_comparator); } /* Try to allocate space for max count of callsite info from all tasks */ mpiPi.rawCallsiteData = (callsite_stats_t *) calloc (maxCount, sizeof (callsite_stats_t)); if (mpiPi.rawCallsiteData == NULL) { mpiPi_msg_warn ("Failed to allocate memory to collect callsite info"); retval = 0; } /* Clear global_mpi_time and global_mpi_size before accumulation in mpiPi_insert_callsite_records */ mpiPi.global_mpi_time = 0.0; mpiPi.global_mpi_size = 0.0; if (retval == 1) { /* Insert collector callsite data into global and task-specific hash tables */ for (ndx = 0; ndx < ac; ndx++) { mpiPi_insert_callsite_records (av[ndx]); } ndx = 0; for (i = 1; i < mpiPi.size; i++) /* n-1 */ { MPI_Status status; int count; int j; /* okay in any order */ PMPI_Probe (MPI_ANY_SOURCE, mpiPi.tag, mpiPi.comm, &status); PMPI_Get_count (&status, MPI_CHAR, &count); PMPI_Recv (&(mpiPi.rawCallsiteData[ndx]), count, MPI_CHAR, status.MPI_SOURCE, mpiPi.tag, mpiPi.comm, &status); count /= sizeof (callsite_stats_t); for (j = 0; j < count; j++) { mpiPi_insert_callsite_records (&(mpiPi.rawCallsiteData[j])); } } free (mpiPi.rawCallsiteData); } } else { int ndx; char *sbuf = (char *) malloc (ac * sizeof (callsite_stats_t)); for (ndx = 0; ndx < ac; ndx++) { bcopy (av[ndx], &(sbuf[ndx * sizeof (callsite_stats_t)]), sizeof (callsite_stats_t)); } PMPI_Send (sbuf, ac * sizeof (callsite_stats_t), MPI_CHAR, mpiPi.collectorRank, mpiPi.tag, mpiPi.comm); free (sbuf); } if (mpiPi.rank == mpiPi.collectorRank && retval == 1) { if (mpiPi.collective_report == 0) mpiPi_msg_debug ("MEMORY : Allocated for global_callsite_stats : %13ld\n", h_count (mpiPi.global_callsite_stats) * sizeof (callsite_stats_t)); mpiPi_msg_debug ("MEMORY : Allocated for global_callsite_stats_agg : %13ld\n", h_count (mpiPi.global_callsite_stats_agg) * sizeof (callsite_stats_t)); } /* TODO: need to free all these pointers as well. */ free (av); if (mpiPi.rank == mpiPi.collectorRank) { if (mpiPi.do_lookup == 1) { #ifdef ENABLE_BFD /* clean up */ close_bfd_executable (); #elif defined(USE_LIBDWARF) close_dwarf_executable (); #endif } } /* Quadrics MPI does not appear to support MPI_IN_PLACE */ sendval = retval; PMPI_Allreduce (&sendval, &retval, 1, MPI_INT, MPI_MIN, mpiPi.comm); return retval; }
/* task level init - executed by each MPI task only once immediately after MPI_Init */ void mpiPi_init (char *appName) { if (time (&mpiPi.start_timeofday) == (time_t) - 1) { mpiPi_msg_warn ("Could not get time of day from time()\n"); } mpiPi.toolname = "mpiP"; mpiPi.comm = MPI_COMM_WORLD; mpiPi.tag = 9821; mpiPi.procID = getpid (); mpiPi.appName = strdup (appName); PMPI_Comm_rank (mpiPi.comm, &mpiPi.rank); PMPI_Comm_size (mpiPi.comm, &mpiPi.size); PMPI_Get_processor_name (mpiPi.hostname, &mpiPi.hostnamelen); mpiPi.stdout_ = stdout; mpiPi.stderr_ = stderr; mpiPi.lookup = mpiPi_lookup; mpiPi.enabled = 1; mpiPi.enabledCount = 1; mpiPi.cumulativeTime = 0.0; mpiPi.global_app_time = 0.0; mpiPi.global_mpi_time = 0.0; mpiPi.global_mpi_size = 0.0; mpiPi.global_mpi_io = 0.0; mpiPi.global_mpi_msize_threshold_count = 0; mpiPi.global_mpi_sent_count = 0; mpiPi.global_time_callsite_count = 0; mpiPi.global_task_info = NULL; char tmpfilename[64]; sprintf(tmpfilename,"%d.trace\0",mpiPi.rank); mpiPi.recfile = fopen(tmpfilename,"wb"); printf("Open Rec File %s !\n", tmpfilename); /* set some defaults values */ mpiPi.collectorRank = 0; mpiPi.tableSize = 256; mpiPi.stackDepth = 1; /* the value 2 includes parent wrapper function */ mpiPi.reportPrintThreshold = 0.0; mpiPi.baseNames = 0; mpiPi.reportFormat = MPIP_REPORT_SCI_FORMAT; mpiPi.calcCOV = 1; mpiPi.inAPIrtb = 0; mpiPi.do_lookup = 1; mpiPi.messageCountThreshold = -1; mpiPi.report_style = mpiPi_style_verbose; mpiPi.print_callsite_detail = 1; #ifdef COLLECTIVE_REPORT_DEFAULT mpiPi.collective_report = 1; #else mpiPi.collective_report = 0; #endif mpiPi_getenv (); mpiPi.task_callsite_stats = h_open (mpiPi.tableSize, mpiPi_callsite_stats_pc_hashkey, mpiPi_callsite_stats_pc_comparator); /* -- welcome msg only collector */ if (mpiPi.collectorRank == mpiPi.rank) { mpiPi_msg ("\n"); mpiPi_msg ("%s V%d.%d.%d (Build %s/%s)\n", mpiPi.toolname, mpiPi_vmajor, mpiPi_vminor, mpiPi_vpatch, mpiPi_vdate, mpiPi_vtime); mpiPi_msg ("Direct questions and errors to %s\n", MPIP_HELP_LIST); mpiPi_msg ("\n"); } mpiPi_msg_debug ("appName is %s\n", appName); mpiPi_msg_debug ("successful init on %d, %s\n", mpiPi.rank, mpiPi.hostname); if (mpiPi.enabled) { mpiPi_GETTIME (&mpiPi.startTime); } return; }
void mpiPi_init (char *appName) { if (time (&mpiPi.start_timeofday) == (time_t) - 1) { mpiPi_msg_warn ("Could not get time of day from time()\n"); } mpiPi.toolname = "mpiP"; mpiPi.comm = MPI_COMM_WORLD; mpiPi.tag = 9821; mpiPi.procID = getpid (); mpiPi.appName = strdup (appName); PMPI_Comm_rank (mpiPi.comm, &mpiPi.rank); PMPI_Comm_size (mpiPi.comm, &mpiPi.size); PMPI_Get_processor_name (mpiPi.hostname, &mpiPi.hostnamelen); mpiPi.stdout_ = stdout; mpiPi.stderr_ = stderr; mpiPi.lookup = mpiPi_lookup; mpiPi.enabled = 1; mpiPi.enabledCount = 1; mpiPi.cumulativeTime = 0.0; mpiPi.global_app_time = 0.0; mpiPi.global_mpi_time = 0.0; mpiPi.global_mpi_size = 0.0; mpiPi.global_mpi_io = 0.0; mpiPi.global_mpi_rma = 0.0; mpiPi.global_mpi_msize_threshold_count = 0; mpiPi.global_mpi_sent_count = 0; mpiPi.global_time_callsite_count = 0; mpiPi.global_task_hostnames = NULL; mpiPi.global_task_app_time = NULL; mpiPi.global_task_mpi_time = NULL; /* set some defaults values */ mpiPi.collectorRank = 0; mpiPi.tableSize = 256; mpiPi.reportPrintThreshold = 0.0; mpiPi.baseNames = 0; mpiPi.reportFormat = MPIP_REPORT_SCI_FORMAT; mpiPi.calcCOV = 1; mpiPi.inAPIrtb = 0; mpiPi.do_lookup = 1; mpiPi.messageCountThreshold = -1; if (DEFAULT_REPORT_FORMAT == mpiPi_style_concise) { mpiPi.report_style = mpiPi_style_concise; mpiPi.stackDepth = 0; mpiPi.print_callsite_detail = 0; } else // verbose default { mpiPi.report_style = mpiPi_style_verbose; mpiPi.stackDepth = 1; mpiPi.print_callsite_detail = 1; } #ifdef COLLECTIVE_REPORT_DEFAULT mpiPi.collective_report = 1; #else mpiPi.collective_report = 0; #endif mpiPi.disable_finalize_report = 0; mpiPi.do_collective_stats_report = 0; mpiPi.do_pt2pt_stats_report = 0; #ifdef SO_LOOKUP mpiPi.so_info = NULL; #endif mpiPi.do_pt2pt_detail_report = 0; mpiPi_getenv (); mpiPi.task_callsite_stats = h_open (mpiPi.tableSize, mpiPi_callsite_stats_pc_hashkey, mpiPi_callsite_stats_pc_comparator); mpiPi.accumulatedPt2ptCounts = NULL; mpiPi.accumulatedPt2ptData = NULL; if (mpiPi.do_collective_stats_report == 1) { init_histogram (&mpiPi.coll_comm_histogram, 7, 32, NULL); init_histogram (&mpiPi.coll_size_histogram, 7, 32, NULL); } if (mpiPi.do_pt2pt_stats_report == 1) { init_histogram (&mpiPi.pt2pt_comm_histogram, 7, 32, NULL); init_histogram (&mpiPi.pt2pt_size_histogram, 7, 32, NULL); if(mpiPi.do_pt2pt_detail_report == 1) { mpiPi.task_pt2pt_stats = h_open(mpiPi.tableSize, mpiPi_pt2pt_stats_rank_hashkey, mpiPi_pt2pt_stats_rank_comparator); } } /* -- welcome msg only collector */ if (mpiPi.collectorRank == mpiPi.rank) { mpiPi_msg (""); mpiPi_msg ("%s V%d.%d.%d (Build %s/%s)\n", mpiPi.toolname, mpiPi_vmajor, mpiPi_vminor, mpiPi_vpatch, mpiPi_vdate, mpiPi_vtime); mpiPi_msg ("Direct questions and errors to %s\n", MPIP_HELP_LIST); mpiPi_msg ("\n"); } mpiPi_msg_debug ("appName is %s\n", appName); mpiPi_msg_debug ("sizeof(callsite_stats_t) is %d\n", sizeof (callsite_stats_t)); mpiPi_msg_debug ("successful init on %d, %s\n", mpiPi.rank, mpiPi.hostname); if (mpiPi.enabled) { mpiPi_GETTIME (&mpiPi.startTime); } return; }