/*@C PetscMemoryShowUsage - Shows the amount of memory currently being used in a communicator. Collective on PetscViewer Input Parameter: + viewer - the viewer that defines the communicator - message - string printed before values Level: intermediate Concepts: memory usage .seealso: PetscMallocDump(), PetscMemoryGetCurrentUsage() @*/ PetscErrorCode PetscMemoryShowUsage(PetscViewer viewer,const char message[]) { PetscLogDouble allocated,maximum,resident,residentmax; PetscErrorCode ierr; PetscMPIInt rank; MPI_Comm comm; PetscFunctionBegin; if (!viewer) viewer = PETSC_VIEWER_STDOUT_WORLD; ierr = PetscMallocGetCurrentUsage(&allocated);CHKERRQ(ierr); ierr = PetscMallocGetMaximumUsage(&maximum);CHKERRQ(ierr); ierr = PetscMemoryGetCurrentUsage(&resident);CHKERRQ(ierr); ierr = PetscMemoryGetMaximumUsage(&residentmax);CHKERRQ(ierr); if (residentmax > 0) residentmax = PetscMax(resident,residentmax); ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,message);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); if (resident && residentmax && allocated) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Current space PetscMalloc()ed %g, max space PetscMalloced() %g\n[%d]Current process memory %g max process memory %g\n",rank,allocated,maximum,rank,resident,residentmax);CHKERRQ(ierr); } else if (resident && residentmax) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Run with -malloc to get statistics on PetscMalloc() calls\n[%d]Current process memory %g max process memory %g\n",rank,rank,resident,residentmax);CHKERRQ(ierr); } else if (resident && allocated) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Current space PetscMalloc()ed %g, max space PetscMalloced() %g\n[%d]Current process memory %g, run with -memory_info to get max memory usage\n",rank,allocated,maximum,rank,resident);CHKERRQ(ierr); } else if (allocated) { ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Current space PetscMalloc()ed %g, max space PetscMalloced() %g\n[%d]OS cannot compute process memory\n",rank,allocated,maximum,rank);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(viewer,"Run with -malloc to get statistics on PetscMalloc() calls\nOS cannot compute process memory\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode PetscLogEventEndComplete(PetscLogEvent event, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) { PetscStageLog stageLog; PetscEventRegLog eventRegLog; PetscEventPerfLog eventPerfLog = NULL; Action *tmpAction; PetscLogDouble start, end; PetscLogDouble curTime; int stage; PetscErrorCode ierr; PetscFunctionBegin; /* Dynamically enlarge logging structures */ if (petsc_numActions >= petsc_maxActions) { PetscTime(&start); ierr = PetscMalloc(petsc_maxActions*2 * sizeof(Action), &tmpAction);CHKERRQ(ierr); ierr = PetscMemcpy(tmpAction, petsc_actions, petsc_maxActions * sizeof(Action));CHKERRQ(ierr); ierr = PetscFree(petsc_actions);CHKERRQ(ierr); petsc_actions = tmpAction; petsc_maxActions *= 2; PetscTime(&end); petsc_BaseTime += (end - start); } /* Record the event */ ierr = PetscLogGetStageLog(&stageLog);CHKERRQ(ierr); ierr = PetscStageLogGetCurrent(stageLog, &stage);CHKERRQ(ierr); ierr = PetscStageLogGetEventRegLog(stageLog, &eventRegLog);CHKERRQ(ierr); ierr = PetscStageLogGetEventPerfLog(stageLog, stage, &eventPerfLog);CHKERRQ(ierr); PetscTime(&curTime); if (petsc_logActions) { petsc_actions[petsc_numActions].time = curTime - petsc_BaseTime; petsc_actions[petsc_numActions].action = ACTIONEND; petsc_actions[petsc_numActions].event = event; petsc_actions[petsc_numActions].classid = eventRegLog->eventInfo[event].classid; if (o1) petsc_actions[petsc_numActions].id1 = o1->id; else petsc_actions[petsc_numActions].id1 = -1; if (o2) petsc_actions[petsc_numActions].id2 = o2->id; else petsc_actions[petsc_numActions].id2 = -1; if (o3) petsc_actions[petsc_numActions].id3 = o3->id; else petsc_actions[petsc_numActions].id3 = -1; petsc_actions[petsc_numActions].flops = petsc_TotalFlops; ierr = PetscMallocGetCurrentUsage(&petsc_actions[petsc_numActions].mem);CHKERRQ(ierr); ierr = PetscMallocGetMaximumUsage(&petsc_actions[petsc_numActions].maxmem);CHKERRQ(ierr); petsc_numActions++; } /* Check for double counting */ eventPerfLog->eventInfo[event].depth--; if (eventPerfLog->eventInfo[event].depth > 0) PetscFunctionReturn(0); else if (eventPerfLog->eventInfo[event].depth < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs"); /* Log the performance info */ eventPerfLog->eventInfo[event].count++; eventPerfLog->eventInfo[event].time += curTime; eventPerfLog->eventInfo[event].flops += petsc_TotalFlops; eventPerfLog->eventInfo[event].numMessages += petsc_irecv_ct + petsc_isend_ct + petsc_recv_ct + petsc_send_ct; eventPerfLog->eventInfo[event].messageLength += petsc_irecv_len + petsc_isend_len + petsc_recv_len + petsc_send_len; eventPerfLog->eventInfo[event].numReductions += petsc_allreduce_ct + petsc_gather_ct + petsc_scatter_ct; PetscFunctionReturn(0); }
PetscErrorCode PetscLogEventEndComplete(PetscLogEvent event, int t, PetscObject o1, PetscObject o2, PetscObject o3, PetscObject o4) { StageLog stageLog; EventRegLog eventRegLog; EventPerfLog eventPerfLog; Action *tmpAction; PetscLogDouble start, end; PetscLogDouble curTime; int stage; PetscErrorCode ierr; PetscFunctionBegin; /* Dynamically enlarge logging structures */ if (numActions >= maxActions) { PetscTime(start); ierr = PetscMalloc(maxActions*2 * sizeof(Action), &tmpAction);CHKERRQ(ierr); ierr = PetscMemcpy(tmpAction, actions, maxActions * sizeof(Action));CHKERRQ(ierr); ierr = PetscFree(actions);CHKERRQ(ierr); actions = tmpAction; maxActions *= 2; PetscTime(end); BaseTime += (end - start); } /* Record the event */ ierr = PetscLogGetStageLog(&stageLog);CHKERRQ(ierr); ierr = StageLogGetCurrent(stageLog, &stage);CHKERRQ(ierr); ierr = StageLogGetEventRegLog(stageLog, &eventRegLog);CHKERRQ(ierr); ierr = StageLogGetEventPerfLog(stageLog, stage, &eventPerfLog);CHKERRQ(ierr); PetscTime(curTime); if (logActions) { actions[numActions].time = curTime - BaseTime; actions[numActions].action = ACTIONEND; actions[numActions].event = event; actions[numActions].cookie = eventRegLog->eventInfo[event].cookie; if (o1) actions[numActions].id1 = o1->id; else actions[numActions].id1 = -1; if (o2) actions[numActions].id2 = o2->id; else actions[numActions].id2 = -1; if (o3) actions[numActions].id3 = o3->id; else actions[numActions].id3 = -1; actions[numActions].flops = _TotalFlops; ierr = PetscMallocGetCurrentUsage(&actions[numActions].mem);CHKERRQ(ierr); ierr = PetscMallocGetMaximumUsage(&actions[numActions].maxmem);CHKERRQ(ierr); numActions++; } /* Check for double counting */ eventPerfLog->eventInfo[event].depth--; if (eventPerfLog->eventInfo[event].depth > 0) { PetscFunctionReturn(0); } else if (eventPerfLog->eventInfo[event].depth < 0) { SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Logging event had unbalanced begin/end pairs"); } /* Log the performance info */ eventPerfLog->eventInfo[event].count++; eventPerfLog->eventInfo[event].time += curTime; eventPerfLog->eventInfo[event].flops += _TotalFlops; eventPerfLog->eventInfo[event].numMessages += irecv_ct + isend_ct + recv_ct + send_ct; eventPerfLog->eventInfo[event].messageLength += irecv_len + isend_len + recv_len + send_len; eventPerfLog->eventInfo[event].numReductions += allreduce_ct; PetscFunctionReturn(0); }
PetscErrorCode PetscLogPrintSummaryToPy(MPI_Comm comm, PetscViewer viewer) { PetscViewer_ASCII *ascii = (PetscViewer_ASCII*)viewer->data; FILE *fd = ascii->fd; PetscLogDouble zero = 0.0; StageLog stageLog; StageInfo *stageInfo = PETSC_NULL; EventPerfInfo *eventInfo = PETSC_NULL; ClassPerfInfo *classInfo; const char *name; PetscLogDouble locTotalTime, TotalTime, TotalFlops; PetscLogDouble numMessages, messageLength, avgMessLen, numReductions; PetscLogDouble stageTime, flops, mem, mess, messLen, red; PetscLogDouble fracTime, fracFlops, fracMessages, fracLength; PetscLogDouble fracReductions; PetscLogDouble tot,avg,x,y,*mydata; PetscMPIInt minCt, maxCt; PetscMPIInt size, rank, *mycount; PetscTruth *localStageUsed, *stageUsed; PetscTruth *localStageVisible, *stageVisible; int numStages, localNumEvents, numEvents; int stage, lastStage; PetscLogEvent event; PetscErrorCode ierr; PetscInt i; /* remove these two lines! */ PetscLogDouble PETSC_DLLEXPORT BaseTime = 0.0; int numObjects = 0; PetscFunctionBegin; ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(PetscLogDouble), &mydata);CHKERRQ(ierr); ierr = PetscMalloc(size*sizeof(PetscMPIInt), &mycount);CHKERRQ(ierr); /* Pop off any stages the user forgot to remove */ lastStage = 0; ierr = PetscLogGetStageLog(&stageLog);CHKERRQ(ierr); ierr = StageLogGetCurrent(stageLog, &stage);CHKERRQ(ierr); while (stage >= 0) { lastStage = stage; ierr = StageLogPop(stageLog);CHKERRQ(ierr); ierr = StageLogGetCurrent(stageLog, &stage);CHKERRQ(ierr); } /* Get the total elapsed time */ PetscTime(locTotalTime); locTotalTime -= BaseTime; ierr = PetscFPrintf(comm, fd, "\n#------ PETSc Performance Summary ----------\n\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm, fd, "Nproc = %d\n",size);CHKERRQ(ierr); /* Must preserve reduction count before we go on */ red = (allreduce_ct + gather_ct + scatter_ct)/((PetscLogDouble) size); /* Calculate summary information */ /* Time */ ierr = MPI_Gather(&locTotalTime,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "Time = [ " );CHKERRQ(ierr); tot = 0.0; for (i=0; i<size; i++){ tot += mydata[i]; ierr = PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n" );CHKERRQ(ierr); avg = (tot)/((PetscLogDouble) size); TotalTime = tot; } /* Objects */ avg = (PetscLogDouble) numObjects; ierr = MPI_Gather(&avg,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "Objects = [ " );CHKERRQ(ierr); for (i=0; i<size; i++){ ierr = PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n" );CHKERRQ(ierr); } /* Flops */ ierr = MPI_Gather(&_TotalFlops,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "Flops = [ " );CHKERRQ(ierr); tot = 0.0; for (i=0; i<size; i++){ tot += mydata[i]; ierr = PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n");CHKERRQ(ierr); TotalFlops = tot; } /* Memory */ ierr = PetscMallocGetMaximumUsage(&mem);CHKERRQ(ierr); ierr = MPI_Gather(&mem,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "Memory = [ " );CHKERRQ(ierr); for (i=0; i<size; i++){ ierr = PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n" );CHKERRQ(ierr); } /* Messages */ mess = 0.5*(irecv_ct + isend_ct + recv_ct + send_ct); ierr = MPI_Gather(&mess,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "MPIMessages = [ " );CHKERRQ(ierr); tot = 0.0; for (i=0; i<size; i++){ tot += mydata[i]; ierr = PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n" );CHKERRQ(ierr); numMessages = tot; } /* Message Lengths */ mess = 0.5*(irecv_len + isend_len + recv_len + send_len); ierr = MPI_Gather(&mess,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "MPIMessageLengths = [ " );CHKERRQ(ierr); tot = 0.0; for (i=0; i<size; i++){ tot += mydata[i]; ierr = PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n" );CHKERRQ(ierr); messageLength = tot; } /* Reductions */ ierr = MPI_Gather(&red,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "MPIReductions = [ " );CHKERRQ(ierr); tot = 0.0; for (i=0; i<size; i++){ tot += mydata[i]; ierr = PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n" );CHKERRQ(ierr); numReductions = tot; } /* Get total number of stages -- Currently, a single processor can register more stages than another, but stages must all be registered in order. We can removed this requirement if necessary by having a global stage numbering and indirection on the stage ID. This seems best accomplished by assoicating a communicator with each stage. */ ierr = MPI_Allreduce(&stageLog->numStages, &numStages, 1, MPI_INT, MPI_MAX, comm);CHKERRQ(ierr); ierr = PetscMalloc(numStages * sizeof(PetscTruth), &localStageUsed);CHKERRQ(ierr); ierr = PetscMalloc(numStages * sizeof(PetscTruth), &stageUsed);CHKERRQ(ierr); ierr = PetscMalloc(numStages * sizeof(PetscTruth), &localStageVisible);CHKERRQ(ierr); ierr = PetscMalloc(numStages * sizeof(PetscTruth), &stageVisible);CHKERRQ(ierr); if (numStages > 0) { stageInfo = stageLog->stageInfo; for(stage = 0; stage < numStages; stage++) { if (stage < stageLog->numStages) { localStageUsed[stage] = stageInfo[stage].used; localStageVisible[stage] = stageInfo[stage].perfInfo.visible; } else { localStageUsed[stage] = PETSC_FALSE; localStageVisible[stage] = PETSC_TRUE; } } ierr = MPI_Allreduce(localStageUsed, stageUsed, numStages, MPI_INT, MPI_LOR, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(localStageVisible, stageVisible, numStages, MPI_INT, MPI_LAND, comm);CHKERRQ(ierr); for(stage = 0; stage < numStages; stage++) { if (stageUsed[stage]) { ierr = PetscFPrintf(comm, fd, "\n#Summary of Stages: ----- Time ------ ----- Flops ----- --- Messages --- -- Message Lengths -- -- Reductions --\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm, fd, "# Avg %%Total Avg %%Total counts %%Total Avg %%Total counts %%Total \n");CHKERRQ(ierr); break; } } for(stage = 0; stage < numStages; stage++) { if (!stageUsed[stage]) continue; if (localStageUsed[stage]) { ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); name = stageInfo[stage].name; } else { ierr = MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); name = ""; } mess *= 0.5; messLen *= 0.5; red /= size; if (TotalTime != 0.0) fracTime = stageTime/TotalTime; else fracTime = 0.0; if (TotalFlops != 0.0) fracFlops = flops/TotalFlops; else fracFlops = 0.0; /* Talk to Barry if (stageTime != 0.0) flops = (size*flops)/stageTime; else flops = 0.0; */ if (numMessages != 0.0) fracMessages = mess/numMessages; else fracMessages = 0.0; if (numMessages != 0.0) avgMessLen = messLen/numMessages; else avgMessLen = 0.0; if (messageLength != 0.0) fracLength = messLen/messageLength; else fracLength = 0.0; if (numReductions != 0.0) fracReductions = red/numReductions; else fracReductions = 0.0; ierr = PetscFPrintf(comm, fd, "# "); ierr = PetscFPrintf(comm, fd, "%2d: %15s: %6.4e %5.1f%% %6.4e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% %5.3e %5.1f%% \n", stage, name, stageTime/size, 100.0*fracTime, flops, 100.0*fracFlops, mess, 100.0*fracMessages, avgMessLen, 100.0*fracLength, red, 100.0*fracReductions);CHKERRQ(ierr); } } /* Report events */ ierr = PetscFPrintf(comm, fd,"\n# Event\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm,fd,"# ------------------------------------------------------\n"); CHKERRQ(ierr); /* Problem: The stage name will not show up unless the stage executed on proc 1 */ for(stage = 0; stage < numStages; stage++) { if (!stageVisible[stage]) continue; if (localStageUsed[stage]) { ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.time, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.flops, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.numMessages, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.messageLength, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&stageInfo[stage].perfInfo.numReductions, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); } else { ierr = PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);CHKERRQ(ierr); ierr = MPI_Allreduce(&zero, &stageTime, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&zero, &flops, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&zero, &mess, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&zero, &messLen, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); ierr = MPI_Allreduce(&zero, &red, 1, MPIU_PETSCLOGDOUBLE, MPI_SUM, comm);CHKERRQ(ierr); } mess *= 0.5; messLen *= 0.5; red /= size; /* Get total number of events in this stage -- Currently, a single processor can register more events than another, but events must all be registered in order, just like stages. We can removed this requirement if necessary by having a global event numbering and indirection on the event ID. This seems best accomplished by assoicating a communicator with each stage. Problem: If the event did not happen on proc 1, its name will not be available. Problem: Event visibility is not implemented */ if (!rank){ ierr = PetscFPrintf(comm, fd, "class Dummy(object):\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm, fd, " def foo(x):\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm, fd, " print x\n");CHKERRQ(ierr); ierr = PetscFPrintf(comm, fd, "Event = {}\n");CHKERRQ(ierr); } if (localStageUsed[stage]) { eventInfo = stageLog->stageInfo[stage].eventLog->eventInfo; localNumEvents = stageLog->stageInfo[stage].eventLog->numEvents; } else { localNumEvents = 0; } ierr = MPI_Allreduce(&localNumEvents, &numEvents, 1, MPI_INT, MPI_MAX, comm);CHKERRQ(ierr); for(event = 0; event < numEvents; event++) { if (localStageUsed[stage] && (event < stageLog->stageInfo[stage].eventLog->numEvents) && (eventInfo[event].depth == 0)) { ierr = MPI_Allreduce(&eventInfo[event].count, &maxCt, 1, MPI_INT, MPI_MAX, comm);CHKERRQ(ierr); name = stageLog->eventLog->eventInfo[event].name; } else { ierr = MPI_Allreduce(&ierr, &maxCt, 1, MPI_INT, MPI_MAX, comm);CHKERRQ(ierr); name = ""; } if (maxCt != 0) { ierr = PetscFPrintf(comm, fd,"#\n");CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "%s = Dummy()\n",name);CHKERRQ(ierr); ierr = PetscFPrintf(comm, fd, "Event['%s'] = %s\n",name,name);CHKERRQ(ierr); } /* Count */ ierr = MPI_Gather(&eventInfo[event].count,1,MPI_INT,mycount,1,MPI_INT,0,comm);CHKERRQ(ierr); ierr = PetscFPrintf(comm, fd, "%s.Count = [ ", name);CHKERRQ(ierr); for (i=0; i<size; i++){ ierr = PetscFPrintf(comm, fd, " %7d,",mycount[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n" );CHKERRQ(ierr); /* Time */ ierr = MPI_Gather(&eventInfo[event].time,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "%s.Time = [ ", name);CHKERRQ(ierr); for (i=0; i<size; i++){ ierr = PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n" );CHKERRQ(ierr); } /* Flops */ ierr = MPI_Gather(&eventInfo[event].flops,1,MPIU_PETSCLOGDOUBLE,mydata,1,MPIU_PETSCLOGDOUBLE,0,comm);CHKERRQ(ierr); if (!rank){ ierr = PetscFPrintf(comm, fd, "%s.Flops = [ ", name);CHKERRQ(ierr); for (i=0; i<size; i++){ ierr = PetscFPrintf(comm, fd, " %5.3e,",mydata[i] );CHKERRQ(ierr); } ierr = PetscFPrintf(comm, fd, "]\n" );CHKERRQ(ierr); } } } } /* Right now, only stages on the first processor are reported here, meaning only objects associated with the global communicator, or MPI_COMM_SELF for proc 1. We really should report global stats and then stats for stages local to processor sets. */ for(stage = 0; stage < numStages; stage++) { if (localStageUsed[stage]) { classInfo = stageLog->stageInfo[stage].classLog->classInfo; } else { ierr = PetscFPrintf(comm, fd, "\n--- Event Stage %d: Unknown\n\n", stage);CHKERRQ(ierr); } } ierr = PetscFree(localStageUsed);CHKERRQ(ierr); ierr = PetscFree(stageUsed);CHKERRQ(ierr); ierr = PetscFree(localStageVisible);CHKERRQ(ierr); ierr = PetscFree(stageVisible);CHKERRQ(ierr); ierr = PetscFree(mydata);CHKERRQ(ierr); ierr = PetscFree(mycount);CHKERRQ(ierr); /* Information unrelated to this particular run */ ierr = PetscFPrintf(comm, fd, "# ========================================================================================================================\n");CHKERRQ(ierr); PetscTime(y); PetscTime(x); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); PetscTime(y); ierr = PetscFPrintf(comm,fd,"AveragetimetogetPetscTime = %g\n", (y-x)/10.0);CHKERRQ(ierr); /* MPI information */ if (size > 1) { MPI_Status status; PetscMPIInt tag; MPI_Comm newcomm; ierr = MPI_Barrier(comm);CHKERRQ(ierr); PetscTime(x); ierr = MPI_Barrier(comm);CHKERRQ(ierr); ierr = MPI_Barrier(comm);CHKERRQ(ierr); ierr = MPI_Barrier(comm);CHKERRQ(ierr); ierr = MPI_Barrier(comm);CHKERRQ(ierr); ierr = MPI_Barrier(comm);CHKERRQ(ierr); PetscTime(y); ierr = PetscFPrintf(comm, fd, "AveragetimeforMPI_Barrier = %g\n", (y-x)/5.0);CHKERRQ(ierr); ierr = PetscCommDuplicate(comm,&newcomm, &tag);CHKERRQ(ierr); ierr = MPI_Barrier(comm);CHKERRQ(ierr); if (rank) { ierr = MPI_Recv(0, 0, MPI_INT, rank-1, tag, newcomm, &status);CHKERRQ(ierr); ierr = MPI_Send(0, 0, MPI_INT, (rank+1)%size, tag, newcomm);CHKERRQ(ierr); } else { PetscTime(x); ierr = MPI_Send(0, 0, MPI_INT, 1, tag, newcomm);CHKERRQ(ierr); ierr = MPI_Recv(0, 0, MPI_INT, size-1, tag, newcomm, &status);CHKERRQ(ierr); PetscTime(y); ierr = PetscFPrintf(comm,fd,"AveragetimforzerosizeMPI_Send = %g\n", (y-x)/size);CHKERRQ(ierr); } ierr = PetscCommDestroy(&newcomm);CHKERRQ(ierr); } if (!rank) { /* print Optiontable */ ierr = PetscFPrintf(comm,fd,"# ");CHKERRQ(ierr); //ierr = PetscOptionsPrint(fd);CHKERRQ(ierr); } /* Cleanup */ ierr = PetscFPrintf(comm, fd, "\n");CHKERRQ(ierr); ierr = StageLogPush(stageLog, lastStage);CHKERRQ(ierr); PetscFunctionReturn(0); }
/*@C PetscMemoryView - Shows the amount of memory currently being used in a communicator. Collective on PetscViewer Input Parameter: + viewer - the viewer that defines the communicator - message - string printed before values Options Database: + -malloc - have PETSc track how much memory it has allocated - -memory_view - during PetscFinalize() have this routine called Level: intermediate Concepts: memory usage .seealso: PetscMallocDump(), PetscMemoryGetCurrentUsage(), PetscMemorySetGetMaximumUsage() @*/ PetscErrorCode PetscMemoryView(PetscViewer viewer,const char message[]) { PetscLogDouble allocated,allocatedmax,resident,residentmax,gallocated,gallocatedmax,gresident,gresidentmax,maxgallocated,maxgallocatedmax,maxgresident,maxgresidentmax; PetscLogDouble mingallocated,mingallocatedmax,mingresident,mingresidentmax; PetscErrorCode ierr; MPI_Comm comm; PetscFunctionBegin; if (!viewer) viewer = PETSC_VIEWER_STDOUT_WORLD; ierr = PetscMallocGetCurrentUsage(&allocated);CHKERRQ(ierr); ierr = PetscMallocGetMaximumUsage(&allocatedmax);CHKERRQ(ierr); ierr = PetscMemoryGetCurrentUsage(&resident);CHKERRQ(ierr); ierr = PetscMemoryGetMaximumUsage(&residentmax);CHKERRQ(ierr); if (residentmax > 0) residentmax = PetscMax(resident,residentmax); ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,message);CHKERRQ(ierr); if (resident && residentmax && allocated) { ierr = MPI_Reduce(&residentmax,&gresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&residentmax,&maxgresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&residentmax,&mingresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Maximum (over computational time) process memory: total %5.4e max %5.4e min %5.4e\n",gresidentmax,maxgresidentmax,mingresidentmax);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&gresident,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&maxgresident,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&mingresident,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current process memory: total %5.4e max %5.4e min %5.4e\n",gresident,maxgresident,mingresident);CHKERRQ(ierr); ierr = MPI_Reduce(&allocatedmax,&gallocatedmax,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocatedmax,&maxgallocatedmax,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocatedmax,&mingallocatedmax,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Maximum (over computational time) space PetscMalloc()ed: total %5.4e max %5.4e min %5.4e\n",gallocatedmax,maxgallocatedmax,mingallocatedmax);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&gallocated,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&maxgallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&mingallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current space PetscMalloc()ed: total %5.4e max %5.4e min %5.4e\n",gallocated,maxgallocated,mingallocated);CHKERRQ(ierr); } else if (resident && residentmax) { ierr = MPI_Reduce(&residentmax,&gresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&residentmax,&maxgresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&residentmax,&mingresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Maximum (over computational time) process memory: total %5.4e max %5.4e min %5.4e\n",gresidentmax,maxgresidentmax,mingresidentmax);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&gresident,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&maxgresident,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&mingresident,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current process memory: total %5.4e max %5.4e min %5.4e\n",gresident,maxgresident,mingresident);CHKERRQ(ierr); } else if (resident && allocated) { ierr = MPI_Reduce(&resident,&gresident,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&maxgresident,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&resident,&mingresident,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current process memory: total %5.4e max %5.4e min %5.4e\n",gresident,maxgresident,mingresident);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&gallocated,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&maxgallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&mingallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current space PetscMalloc()ed: total %5.4e max %5.4e min %5.4e\n",gallocated,maxgallocated,mingallocated);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Run with -memory_view to get maximum memory usage\n");CHKERRQ(ierr); } else if (allocated) { ierr = MPI_Reduce(&allocated,&gallocated,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&maxgallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr); ierr = MPI_Reduce(&allocated,&mingallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Current space PetscMalloc()ed: total %5.4e max %5.4e min %5.4e\n",gallocated,maxgallocated,mingallocated);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"Run with -memory_view to get maximum memory usage\n");CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(viewer,"OS cannot compute process memory\n");CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(viewer,"Run with -malloc to get statistics on PetscMalloc() calls\nOS cannot compute process memory\n");CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); PetscFunctionReturn(0); }