extern void getGCStats( GCStats *s ) { nat total_collections = 0; nat g; Time gc_cpu = 0; Time gc_elapsed = 0; Time current_elapsed = 0; Time current_cpu = 0; getProcessTimes(¤t_cpu, ¤t_elapsed); /* EZY: static inline'ify these */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) total_collections += generations[g].collections; for (g = 0; g < RtsFlags.GcFlags.generations; g++) { gc_cpu += GC_coll_cpu[g]; gc_elapsed += GC_coll_elapsed[g]; } s->bytes_allocated = GC_tot_alloc*(StgWord64)sizeof(W_); s->num_gcs = total_collections; s->num_byte_usage_samples = residency_samples; s->max_bytes_used = max_residency*sizeof(W_); s->cumulative_bytes_used = cumulative_residency*(StgWord64)sizeof(W_); s->peak_megabytes_allocated = (StgWord64)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)); s->bytes_copied = GC_tot_copied*(StgWord64)sizeof(W_); s->max_bytes_slop = max_slop*(StgWord64)sizeof(W_); s->current_bytes_used = current_residency*(StgWord64)sizeof(W_); s->current_bytes_slop = current_slop*(StgWord64)sizeof(W_); /* s->init_cpu_seconds = TimeToSecondsDbl(get_init_cpu()); s->init_wall_seconds = TimeToSecondsDbl(get_init_elapsed()); */ s->mutator_cpu_seconds = TimeToSecondsDbl(current_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time)); s->mutator_wall_seconds = TimeToSecondsDbl(current_elapsed- end_init_elapsed - gc_elapsed); s->gc_cpu_seconds = TimeToSecondsDbl(gc_cpu); s->gc_wall_seconds = TimeToSecondsDbl(gc_elapsed); /* EZY: Being consistent with incremental output, but maybe should also discount init */ s->cpu_seconds = TimeToSecondsDbl(current_cpu); s->wall_seconds = TimeToSecondsDbl(current_elapsed - end_init_elapsed); s->par_tot_bytes_copied = GC_par_tot_copied*(StgWord64)sizeof(W_); s->par_max_bytes_copied = GC_par_max_copied*(StgWord64)sizeof(W_); }
int main(int argc, char **argv) { if (argc != 4) { fputs("We need 3 arguments\n", stdout); return 1; } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); MPI_Comm_size(MPI_COMM_WORLD, &mpi_world_size); rebuild_target = atoi(argv[1]); const char *store_dir = argv[2]; const char *data_file = argv[3]; int ntargets = mpi_world_size - 1; if (ntargets > MAX_STORAGE_TARGETS) return 1; if (rebuild_target < 0 || rebuild_target > ntargets) return 1; helper = 1; while (helper == rebuild_target) helper += 1; if (helper == rebuild_target) return 1; PROF_START(total); PROF_START(init); int last_run_fd = -1; RunData last_run; memset(&last_run, 0, sizeof(RunData)); if (mpi_rank == 0) { last_run_fd = open(data_file, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); read(last_run_fd, &last_run, sizeof(RunData)); } /* Create mapping from storage targets to ranks, and vice versa */ Target targetIDs[MAX_STORAGE_TARGETS] = {{0,0}}; Target targetID = {0,0}; if (mpi_rank != 0) { int store_fd = open(store_dir, O_DIRECTORY | O_RDONLY); int target_ID_fd = openat(store_fd, "targetNumID", O_RDONLY); char targetID_s[20] = {0}; read(target_ID_fd, targetID_s, sizeof(targetID_s)); close(target_ID_fd); close(store_fd); targetID.id = atoi(targetID_s); targetID.rank = mpi_rank; } MPI_Gather( &targetID, sizeof(Target), MPI_BYTE, targetIDs, sizeof(Target), MPI_BYTE, 0, MPI_COMM_WORLD); if (mpi_rank == 0) { if (last_run.ntargets != ntargets) { /* ERROR - new number of targets */ assert(0); } for (int i = 0; i < ntargets; i++) targetIDs[i] = targetIDs[i+1]; for (int i = 0; i < ntargets; i++) last_run.targetIDs[i].rank = -1; for (int i = 0; i < ntargets; i++) { Target target = targetIDs[i]; int j = 0; int found = 0; for (; j < ntargets; j++) if (last_run.targetIDs[j].id == target.id) { last_run.targetIDs[j] = target; found = 1; } if (!found) { /* ERROR - new target introduced */ printf(" > %d, %d\n", target.id, target.rank); assert(0); } } rank2st[0] = -1; for (int i = 0; i < ntargets; i++) { st2rank[i] = last_run.targetIDs[i].rank; rank2st[st2rank[i]] = i; } } MPI_Bcast(st2rank, sizeof(st2rank), MPI_BYTE, 0, MPI_COMM_WORLD); MPI_Bcast(rank2st, sizeof(rank2st), MPI_BYTE, 0, MPI_COMM_WORLD); PROF_END(init); if (mpi_rank == 0) printf("%d(rank=%d), %d(rank=%d)\n", rebuild_target, st2rank[rebuild_target], helper, st2rank[helper]); PROF_START(main_work); memset(&pr_sender, 0, sizeof(pr_sender)); if (mpi_rank != 0 && rank2st[mpi_rank] != rebuild_target) { PersistentDB *pdb = pdb_init(); pdb_iterate(pdb, do_file); pdb_term(pdb); if (rank2st[mpi_rank] == helper) { int dummy; MPI_Ssend((void*)&dummy, sizeof(dummy), MPI_BYTE, st2rank[rebuild_target], 0, MPI_COMM_WORLD); } pr_add_tmp_to_total(&pr_sample); pr_report_progress(&pr_sender, pr_sample); pr_report_done(&pr_sender); } else if (rank2st[mpi_rank] == rebuild_target) { int helper_rank = st2rank[helper]; MPI_Status stat; int count; FileInfo fi; MPI_Recv(&fi, sizeof(FileInfo), MPI_BYTE, helper_rank, 0, MPI_COMM_WORLD, &stat); MPI_Get_count(&stat, MPI_BYTE, &count); while (count == sizeof(FileInfo)) { char key[200]; MPI_Recv(key, sizeof(key), MPI_BYTE, helper_rank, 0, MPI_COMM_WORLD, &stat); int keylen; MPI_Get_count(&stat, MPI_BYTE, &keylen); key[keylen] = '\0'; do_file(key, keylen, &fi); MPI_Recv(&fi, sizeof(FileInfo), MPI_BYTE, helper_rank, 0, MPI_COMM_WORLD, &stat); MPI_Get_count(&stat, MPI_BYTE, &count); } pr_add_tmp_to_total(&pr_sample); pr_report_progress(&pr_sender, pr_sample); pr_report_done(&pr_sender); } else if (mpi_rank == 0) { printf("st - total files | data read | data written | disk I/O\n"); pr_receive_loop(ntargets-1); } PROF_END(main_work); PROF_END(total); if (mpi_rank == 0) { printf("Overall timings: \n"); printf("init | %9.2f ms\n", 1e3*PROF_VAL(init)); printf("main_work | %9.2f ms\n", 1e3*PROF_VAL(main_work)); printf("total | %9.2f ms\n", 1e3*PROF_VAL(total)); } MPI_Barrier(MPI_COMM_WORLD); char *iter = hs.corrupt; for (size_t i = 0; i < hs.corrupt_count; i++) { printf("Potentially corrupt chunk: '%s'\n", iter); iter += strlen(iter); } MPI_Finalize(); }
double mut_user_time_until( Ticks t ) { return TICK_TO_DBL(t - GC_tot_cpu - PROF_VAL(RP_tot_time)); }
void stat_exit (void) { generation *gen; Time gc_cpu = 0; Time gc_elapsed = 0; Time init_cpu = 0; Time init_elapsed = 0; Time mut_cpu = 0; Time mut_elapsed = 0; Time exit_cpu = 0; Time exit_elapsed = 0; W_ tot_alloc; W_ alloc; if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { char temp[BIG_STRING_LEN]; Time tot_cpu; Time tot_elapsed; nat i, g, total_collections = 0; getProcessTimes( &tot_cpu, &tot_elapsed ); tot_elapsed -= start_init_elapsed; tot_alloc = calcTotalAllocated(); // allocated since the last GC alloc = tot_alloc - GC_tot_alloc; GC_tot_alloc = tot_alloc; /* Count total garbage collections */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) total_collections += generations[g].collections; /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */ if (tot_cpu == 0.0) tot_cpu = 1; if (tot_elapsed == 0.0) tot_elapsed = 1; if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) { statsPrintf("%9" FMT_SizeT " %9.9s %9.9s", (W_)alloc*sizeof(W_), "", ""); statsPrintf(" %6.3f %6.3f\n\n", 0.0, 0.0); } for (i = 0; i < RtsFlags.GcFlags.generations; i++) { gc_cpu += GC_coll_cpu[i]; gc_elapsed += GC_coll_elapsed[i]; } // heapCensus() is called by the GC, so RP and HC time are // included in the GC stats. We therefore subtract them to // obtain the actual GC cpu time. gc_cpu -= PROF_VAL(RP_tot_time + HC_tot_time); gc_elapsed -= PROF_VAL(RPe_tot_time + HCe_tot_time); init_cpu = get_init_cpu(); init_elapsed = get_init_elapsed(); exit_cpu = end_exit_cpu - start_exit_cpu; exit_elapsed = end_exit_elapsed - start_exit_elapsed; mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed; mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time); if (mut_cpu < 0) { mut_cpu = 0; } if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) { showStgWord64(GC_tot_alloc*sizeof(W_), temp, rtsTrue/*commas*/); statsPrintf("%16s bytes allocated in the heap\n", temp); showStgWord64(GC_tot_copied*sizeof(W_), temp, rtsTrue/*commas*/); statsPrintf("%16s bytes copied during GC\n", temp); if ( residency_samples > 0 ) { showStgWord64(max_residency*sizeof(W_), temp, rtsTrue/*commas*/); statsPrintf("%16s bytes maximum residency (%" FMT_Word " sample(s))\n", temp, residency_samples); } showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/); statsPrintf("%16s bytes maximum slop\n", temp); statsPrintf("%16" FMT_SizeT " MB total memory in use (%" FMT_SizeT " MB lost due to fragmentation)\n\n", (size_t)(peak_mblocks_allocated * MBLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_)), (size_t)(peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_))); /* Print garbage collections in each gen */ statsPrintf(" Tot time (elapsed) Avg pause Max pause\n"); for (g = 0; g < RtsFlags.GcFlags.generations; g++) { gen = &generations[g]; statsPrintf(" Gen %2d %5d colls, %5d par %6.3fs %6.3fs %3.4fs %3.4fs\n", gen->no, gen->collections, gen->par_collections, TimeToSecondsDbl(GC_coll_cpu[g]), TimeToSecondsDbl(GC_coll_elapsed[g]), gen->collections == 0 ? 0 : TimeToSecondsDbl(GC_coll_elapsed[g] / gen->collections), TimeToSecondsDbl(GC_coll_max_pause[g])); } #if defined(THREADED_RTS) if (RtsFlags.ParFlags.parGcEnabled && n_capabilities > 1) { statsPrintf("\n Parallel GC work balance: %.2f%% (serial 0%%, perfect 100%%)\n", 100 * (((double)GC_par_tot_copied / (double)GC_par_max_copied) - 1) / (n_capabilities - 1) ); } #endif statsPrintf("\n"); #if defined(THREADED_RTS) statsPrintf(" TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n", taskCount, taskCount - workerCount, peakWorkerCount, workerCount, n_capabilities); statsPrintf("\n"); { nat i; SparkCounters sparks = { 0, 0, 0, 0, 0, 0}; for (i = 0; i < n_capabilities; i++) { sparks.created += capabilities[i]->spark_stats.created; sparks.dud += capabilities[i]->spark_stats.dud; sparks.overflowed+= capabilities[i]->spark_stats.overflowed; sparks.converted += capabilities[i]->spark_stats.converted; sparks.gcd += capabilities[i]->spark_stats.gcd; sparks.fizzled += capabilities[i]->spark_stats.fizzled; } statsPrintf(" SPARKS: %" FMT_Word " (%" FMT_Word " converted, %" FMT_Word " overflowed, %" FMT_Word " dud, %" FMT_Word " GC'd, %" FMT_Word " fizzled)\n\n", sparks.created + sparks.dud + sparks.overflowed, sparks.converted, sparks.overflowed, sparks.dud, sparks.gcd, sparks.fizzled); } #endif statsPrintf(" INIT time %7.3fs (%7.3fs elapsed)\n", TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed)); statsPrintf(" MUT time %7.3fs (%7.3fs elapsed)\n", TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed)); statsPrintf(" GC time %7.3fs (%7.3fs elapsed)\n", TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed)); #ifdef PROFILING statsPrintf(" RP time %7.3fs (%7.3fs elapsed)\n", TimeToSecondsDbl(RP_tot_time), TimeToSecondsDbl(RPe_tot_time)); statsPrintf(" PROF time %7.3fs (%7.3fs elapsed)\n", TimeToSecondsDbl(HC_tot_time), TimeToSecondsDbl(HCe_tot_time)); #endif statsPrintf(" EXIT time %7.3fs (%7.3fs elapsed)\n", TimeToSecondsDbl(exit_cpu), TimeToSecondsDbl(exit_elapsed)); statsPrintf(" Total time %7.3fs (%7.3fs elapsed)\n\n", TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_elapsed)); #ifndef THREADED_RTS statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n", TimeToSecondsDbl(gc_cpu)*100/TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(gc_elapsed)*100/TimeToSecondsDbl(tot_elapsed)); #endif if (mut_cpu == 0) { showStgWord64(0, temp, rtsTrue/*commas*/); } else { showStgWord64( (StgWord64)((GC_tot_alloc*sizeof(W_)) / TimeToSecondsDbl(mut_cpu)), temp, rtsTrue/*commas*/); } statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp); statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n", TimeToSecondsDbl(tot_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 / TimeToSecondsDbl(tot_cpu), TimeToSecondsDbl(tot_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 / TimeToSecondsDbl(tot_elapsed)); /* TICK_PRINT(1); TICK_PRINT(2); REPORT(TOTAL_CALLS); TICK_PRINT_TOT(1); TICK_PRINT_TOT(2); */ #if USE_PAPI papi_stats_report(); #endif #if defined(THREADED_RTS) && defined(PROF_SPIN) { nat g; statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin); statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin); for (g = 0; g < RtsFlags.GcFlags.generations; g++) { statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin); } } #endif } if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) { char *fmt1, *fmt2; if (RtsFlags.MiscFlags.machineReadable) { fmt1 = " [(\"bytes allocated\", \"%llu\")\n"; fmt2 = " ,(\"num_GCs\", \"%d\")\n" " ,(\"average_bytes_used\", \"%ld\")\n" " ,(\"max_bytes_used\", \"%ld\")\n" " ,(\"num_byte_usage_samples\", \"%ld\")\n" " ,(\"peak_megabytes_allocated\", \"%lu\")\n" " ,(\"init_cpu_seconds\", \"%.3f\")\n" " ,(\"init_wall_seconds\", \"%.3f\")\n" " ,(\"mutator_cpu_seconds\", \"%.3f\")\n" " ,(\"mutator_wall_seconds\", \"%.3f\")\n" " ,(\"GC_cpu_seconds\", \"%.3f\")\n" " ,(\"GC_wall_seconds\", \"%.3f\")\n" " ]\n"; } else { fmt1 = "<<ghc: %llu bytes, "; fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.3f INIT (%.3f elapsed), %.3f MUT (%.3f elapsed), %.3f GC (%.3f elapsed) :ghc>>\n"; } /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */ statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_)); statsPrintf(fmt2, total_collections, residency_samples == 0 ? 0 : cumulative_residency*sizeof(W_)/residency_samples, max_residency*sizeof(W_), residency_samples, (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)), TimeToSecondsDbl(init_cpu), TimeToSecondsDbl(init_elapsed), TimeToSecondsDbl(mut_cpu), TimeToSecondsDbl(mut_elapsed), TimeToSecondsDbl(gc_cpu), TimeToSecondsDbl(gc_elapsed)); } statsFlush(); statsClose(); } if (GC_coll_cpu) { stgFree(GC_coll_cpu); GC_coll_cpu = NULL; } if (GC_coll_elapsed) { stgFree(GC_coll_elapsed); GC_coll_elapsed = NULL; } if (GC_coll_max_pause) { stgFree(GC_coll_max_pause); GC_coll_max_pause = NULL; } }
void stat_exit(int alloc) { generation *gen; Ticks gc_cpu = 0; Ticks gc_elapsed = 0; Ticks init_cpu = 0; Ticks init_elapsed = 0; Ticks mut_cpu = 0; Ticks mut_elapsed = 0; Ticks exit_cpu = 0; Ticks exit_elapsed = 0; if (RtsFlags.GcFlags.giveStats != NO_GC_STATS) { char temp[BIG_STRING_LEN]; Ticks tot_cpu; Ticks tot_elapsed; nat i, g, total_collections = 0; getProcessTimes( &tot_cpu, &tot_elapsed ); tot_elapsed -= start_init_elapsed; GC_tot_alloc += alloc; /* Count total garbage collections */ for (g = 0; g < RtsFlags.GcFlags.generations; g++) total_collections += generations[g].collections; /* avoid divide by zero if tot_cpu is measured as 0.00 seconds -- SDM */ if (tot_cpu == 0.0) tot_cpu = 1; if (tot_elapsed == 0.0) tot_elapsed = 1; if (RtsFlags.GcFlags.giveStats >= VERBOSE_GC_STATS) { statsPrintf("%9ld %9.9s %9.9s", (lnat)alloc*sizeof(W_), "", ""); statsPrintf(" %5.2f %5.2f\n\n", 0.0, 0.0); } for (i = 0; i < RtsFlags.GcFlags.generations; i++) { gc_cpu += GC_coll_cpu[i]; gc_elapsed += GC_coll_elapsed[i]; } init_cpu = get_init_cpu(); init_elapsed = get_init_elapsed(); exit_cpu = end_exit_cpu - start_exit_cpu; exit_elapsed = end_exit_elapsed - start_exit_elapsed; mut_elapsed = start_exit_elapsed - end_init_elapsed - gc_elapsed; mut_cpu = start_exit_cpu - end_init_cpu - gc_cpu - PROF_VAL(RP_tot_time + HC_tot_time); if (mut_cpu < 0) { mut_cpu = 0; } if (RtsFlags.GcFlags.giveStats >= SUMMARY_GC_STATS) { showStgWord64(GC_tot_alloc*sizeof(W_), temp, rtsTrue/*commas*/); statsPrintf("%16s bytes allocated in the heap\n", temp); showStgWord64(GC_tot_copied*sizeof(W_), temp, rtsTrue/*commas*/); statsPrintf("%16s bytes copied during GC\n", temp); if ( residency_samples > 0 ) { showStgWord64(max_residency*sizeof(W_), temp, rtsTrue/*commas*/); statsPrintf("%16s bytes maximum residency (%ld sample(s))\n", temp, residency_samples); } showStgWord64(max_slop*sizeof(W_), temp, rtsTrue/*commas*/); statsPrintf("%16s bytes maximum slop\n", temp); statsPrintf("%16ld MB total memory in use (%ld MB lost due to fragmentation)\n\n", peak_mblocks_allocated * MBLOCK_SIZE_W / (1024 * 1024 / sizeof(W_)), (peak_mblocks_allocated * BLOCKS_PER_MBLOCK * BLOCK_SIZE_W - hw_alloc_blocks * BLOCK_SIZE_W) / (1024 * 1024 / sizeof(W_))); /* Print garbage collections in each gen */ statsPrintf(" Tot time (elapsed) Avg pause Max pause\n"); for (g = 0; g < RtsFlags.GcFlags.generations; g++) { gen = &generations[g]; statsPrintf(" Gen %2d %5d colls, %5d par %5.2fs %5.2fs %3.4fs %3.4fs\n", gen->no, gen->collections, gen->par_collections, TICK_TO_DBL(GC_coll_cpu[g]), TICK_TO_DBL(GC_coll_elapsed[g]), gen->collections == 0 ? 0 : TICK_TO_DBL(GC_coll_elapsed[g] / gen->collections), TICK_TO_DBL(GC_coll_max_pause[g])); } #if defined(THREADED_RTS) if (RtsFlags.ParFlags.parGcEnabled) { statsPrintf("\n Parallel GC work balance: %.2f (%ld / %ld, ideal %d)\n", (double)GC_par_avg_copied / (double)GC_par_max_copied, (lnat)GC_par_avg_copied, (lnat)GC_par_max_copied, RtsFlags.ParFlags.nNodes ); } #endif statsPrintf("\n"); #if defined(THREADED_RTS) { nat i; Task *task; statsPrintf(" MUT time (elapsed) GC time (elapsed)\n"); for (i = 0, task = all_tasks; task != NULL; i++, task = task->all_link) { statsPrintf(" Task %2d %-8s : %6.2fs (%6.2fs) %6.2fs (%6.2fs)\n", i, (task->worker) ? "(worker)" : "(bound)", TICK_TO_DBL(task->mut_time), TICK_TO_DBL(task->mut_etime), TICK_TO_DBL(task->gc_time), TICK_TO_DBL(task->gc_etime)); } } statsPrintf("\n"); { nat i; SparkCounters sparks = { 0, 0, 0, 0, 0, 0}; for (i = 0; i < n_capabilities; i++) { sparks.created += capabilities[i].spark_stats.created; sparks.dud += capabilities[i].spark_stats.dud; sparks.overflowed+= capabilities[i].spark_stats.overflowed; sparks.converted += capabilities[i].spark_stats.converted; sparks.gcd += capabilities[i].spark_stats.gcd; sparks.fizzled += capabilities[i].spark_stats.fizzled; } statsPrintf(" SPARKS: %ld (%ld converted, %ld overflowed, %ld dud, %ld GC'd, %ld fizzled)\n\n", sparks.created + sparks.dud + sparks.overflowed, sparks.converted, sparks.overflowed, sparks.dud, sparks.gcd, sparks.fizzled); } #endif statsPrintf(" INIT time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed)); statsPrintf(" MUT time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed)); statsPrintf(" GC time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed)); #ifdef PROFILING statsPrintf(" RP time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(RP_tot_time), TICK_TO_DBL(RPe_tot_time)); statsPrintf(" PROF time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(HC_tot_time), TICK_TO_DBL(HCe_tot_time)); #endif statsPrintf(" EXIT time %6.2fs (%6.2fs elapsed)\n", TICK_TO_DBL(exit_cpu), TICK_TO_DBL(exit_elapsed)); statsPrintf(" Total time %6.2fs (%6.2fs elapsed)\n\n", TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_elapsed)); #ifndef THREADED_RTS statsPrintf(" %%GC time %5.1f%% (%.1f%% elapsed)\n\n", TICK_TO_DBL(gc_cpu)*100/TICK_TO_DBL(tot_cpu), TICK_TO_DBL(gc_elapsed)*100/TICK_TO_DBL(tot_elapsed)); #endif if (tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) == 0) showStgWord64(0, temp, rtsTrue/*commas*/); else showStgWord64( (StgWord64)((GC_tot_alloc*sizeof(W_))/ TICK_TO_DBL(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time))), temp, rtsTrue/*commas*/); statsPrintf(" Alloc rate %s bytes per MUT second\n\n", temp); statsPrintf(" Productivity %5.1f%% of total user, %.1f%% of total elapsed\n\n", TICK_TO_DBL(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 / TICK_TO_DBL(tot_cpu), TICK_TO_DBL(tot_cpu - GC_tot_cpu - PROF_VAL(RP_tot_time + HC_tot_time) - init_cpu) * 100 / TICK_TO_DBL(tot_elapsed)); /* TICK_PRINT(1); TICK_PRINT(2); REPORT(TOTAL_CALLS); TICK_PRINT_TOT(1); TICK_PRINT_TOT(2); */ #if USE_PAPI papi_stats_report(); #endif #if defined(THREADED_RTS) && defined(PROF_SPIN) { nat g; statsPrintf("gc_alloc_block_sync: %"FMT_Word64"\n", gc_alloc_block_sync.spin); statsPrintf("whitehole_spin: %"FMT_Word64"\n", whitehole_spin); for (g = 0; g < RtsFlags.GcFlags.generations; g++) { statsPrintf("gen[%d].sync: %"FMT_Word64"\n", g, generations[g].sync.spin); } } #endif } if (RtsFlags.GcFlags.giveStats == ONELINE_GC_STATS) { char *fmt1, *fmt2; if (RtsFlags.MiscFlags.machineReadable) { fmt1 = " [(\"bytes allocated\", \"%llu\")\n"; fmt2 = " ,(\"num_GCs\", \"%d\")\n" " ,(\"average_bytes_used\", \"%ld\")\n" " ,(\"max_bytes_used\", \"%ld\")\n" " ,(\"num_byte_usage_samples\", \"%ld\")\n" " ,(\"peak_megabytes_allocated\", \"%lu\")\n" " ,(\"init_cpu_seconds\", \"%.2f\")\n" " ,(\"init_wall_seconds\", \"%.2f\")\n" " ,(\"mutator_cpu_seconds\", \"%.2f\")\n" " ,(\"mutator_wall_seconds\", \"%.2f\")\n" " ,(\"GC_cpu_seconds\", \"%.2f\")\n" " ,(\"GC_wall_seconds\", \"%.2f\")\n" " ]\n"; } else { fmt1 = "<<ghc: %llu bytes, "; fmt2 = "%d GCs, %ld/%ld avg/max bytes residency (%ld samples), %luM in use, %.2f INIT (%.2f elapsed), %.2f MUT (%.2f elapsed), %.2f GC (%.2f elapsed) :ghc>>\n"; } /* print the long long separately to avoid bugginess on mingwin (2001-07-02, mingw-0.5) */ statsPrintf(fmt1, GC_tot_alloc*(StgWord64)sizeof(W_)); statsPrintf(fmt2, total_collections, residency_samples == 0 ? 0 : cumulative_residency*sizeof(W_)/residency_samples, max_residency*sizeof(W_), residency_samples, (unsigned long)(peak_mblocks_allocated * MBLOCK_SIZE / (1024L * 1024L)), TICK_TO_DBL(init_cpu), TICK_TO_DBL(init_elapsed), TICK_TO_DBL(mut_cpu), TICK_TO_DBL(mut_elapsed), TICK_TO_DBL(gc_cpu), TICK_TO_DBL(gc_elapsed)); } statsFlush(); statsClose(); } if (GC_coll_cpu) { stgFree(GC_coll_cpu); GC_coll_cpu = NULL; } if (GC_coll_elapsed) { stgFree(GC_coll_elapsed); GC_coll_elapsed = NULL; } if (GC_coll_max_pause) { stgFree(GC_coll_max_pause); GC_coll_max_pause = NULL; } }