// called on PE 0 void PVT::resumeAfterLB(eventMsg *m) { static int count = 0; count ++; if (count != CkNumPes()) { CkFreeMsg(m); return; } count = 0; #ifndef CMK_OPTIMIZE if(pose_config.stats) localStats->TimerStart(GVT_TIMER); #endif if (parLBInProgress) { CkPrintf("POSE: load balancing complete on processor %d at GVT=%lld sim time=%.1f sec\n", CkMyPe(), estGVT, CmiWallTimer() + parStartTime); parLBInProgress = 0; parLastLBGVT = estGVT; } CkFreeMsg(m); CProxy_PVT p(ThePVT); startPhaseActive = 0; prioBcMsg *startMsg = new (8*sizeof(int)) prioBcMsg; startMsg->bc = 1; *((int *)CkPriorityPtr(startMsg)) = 0; CkSetQueueing(startMsg, CK_QUEUEING_IFIFO); p[CkMyPe()].startPhase(startMsg); #ifndef CMK_OPTIMIZE if(pose_config.stats) localStats->TimerStop(); #endif }
/// Basic Constructor GVT::GVT() { #ifdef VERBOSE_DEBUG CkPrintf("[%d] constructing GVT\n",CkMyPe()); #endif optGVT = POSE_UnsetTS, conGVT = POSE_UnsetTS; done=0; SRs = NULL; startOffset = 0; gvtIterationCount = 0; #ifndef CMK_OPTIMIZE localStats = (localStat *)CkLocalBranch(theLocalStats); #endif #ifndef SEQUENTIAL_POSE if(pose_config.lb_on) nextLBstart = pose_config.lb_skip - 1; #endif estGVT = lastEarliest = inactiveTime = POSE_UnsetTS; lastSends = lastRecvs = inactive = 0; reportsExpected = 1; if (CkNumPes() >= 2) reportsExpected = 2; // CkPrintf("GVT expects %d reports!\n", reportsExpected); if (CkMyPe() == 0) { // start the PVT phase of the GVT algorithm CProxy_PVT p(ThePVT); prioBcMsg *startMsg = new (8*sizeof(int)) prioBcMsg; startMsg->bc = 1; *((int *)CkPriorityPtr(startMsg)) = 0; CkSetQueueing(startMsg, CK_QUEUEING_IFIFO); p.startPhase(startMsg); // broadcast PVT calculation to all PVT branches } }
// called on a parent node void HybridBaseLB::NotifyObjectMigrationDone(int fromlevel) { int atlevel = fromlevel + 1; LevelData *lData = levelData[atlevel]; lData->mig_reported ++; if (lData->mig_reported == lData->nChildren) { lData->mig_reported = 0; // start load balancing at this level if (atlevel > 1) { // I am done at the level, propagate load balancing to next level thisProxy.Loadbalancing(atlevel-1, lData->nChildren, lData->children); } else { // atlevel = 1 if (_lb_args.debug() > 1) CkPrintf("[%d] NotifyObjectMigrationDone at level %d started at %f\n", CkMyPe(), atlevel, CkWallTimer()); DummyMsg *m = new (8*sizeof(int)) DummyMsg; *((int *)CkPriorityPtr(m)) = -100-atlevel; CkSetQueueing(m, CK_QUEUEING_IFIFO); thisProxy.StartCollectInfo(m, lData->nChildren, lData->children); } } }
/// Make a prioritized LV3D_RenderMsg: LV3D_RenderMsg *LV3D_RenderMsg::new_( int client,int frame,int viewable,double prioAdj) { int prioBits=8*sizeof(prioAdj); LV3D_RenderMsg *m=new (prioBits) LV3D_RenderMsg; m->clientID=client; m->frameID=frame; m->viewableID=viewable; unsigned int *p=(unsigned int *)CkPriorityPtr(m); p[0]=LV3D_build_priority(frame,prioAdj); if (LV3D_Disable_Render_Prio) p[0]=0; CkSetQueueing(m,CK_QUEUEING_BFIFO); return m; }
/// ENTRY: runs the PVT calculation and reports to GVT void PVT::startPhase(prioBcMsg *m) { CProxy_GVT g(TheGVT); CProxy_PVT p(ThePVT); register int i; if (startPhaseActive) return; #ifndef CMK_OPTIMIZE if(pose_config.stats) localStats->TimerStart(GVT_TIMER); #endif startPhaseActive = 1; if (m->bc) { prioBcMsg *startMsg = new (8*sizeof(POSE_TimeType)) prioBcMsg; startMsg->bc = 0; *((POSE_TimeType *)CkPriorityPtr(startMsg)) = 1-POSE_TimeMax; CkSetQueueing(startMsg, CK_QUEUEING_IFIFO); p.startPhaseExp(startMsg); } objs.Wake(); // wake objects to make sure all have reported // compute PVT optPVT = conPVT = POSE_UnsetTS; int end = objs.getNumSpaces(); for (i=0; i<end; i++) if (objs.objs[i].isPresent()) { if (objs.objs[i].isOptimistic()) { // check optPVT if ((optPVT < 0) || ((objs.objs[i].getOVT() < optPVT) && (objs.objs[i].getOVT() > POSE_UnsetTS))) { optPVT = objs.objs[i].getOVT(); CkAssert(simdone>0 || ((objs.objs[i].getOVT() >= estGVT) || (objs.objs[i].getOVT() == POSE_UnsetTS))); } } else if (objs.objs[i].isConservative()) { // check conPVT if ((conPVT < 0) || ((objs.objs[i].getOVT() < conPVT) && (objs.objs[i].getOVT() > POSE_UnsetTS))) conPVT = objs.objs[i].getOVT(); } CkAssert(simdone>0 || (optPVT >= estGVT)||(optPVT == POSE_UnsetTS)||(estGVT == POSE_UnsetTS)); CkAssert(simdone>0 || (conPVT >= estGVT)||(conPVT == POSE_UnsetTS)||(estGVT == POSE_UnsetTS)); } // (1) Find out the local PVT from optPVT and conPVT POSE_TimeType pvt = optPVT; if ((conPVT < pvt) && (conPVT > POSE_UnsetTS)) pvt = conPVT; if ((iterMin < pvt) && (iterMin > POSE_UnsetTS)) pvt = iterMin; if (waitForFirst) { waitForFirst = 0; if (pvt == POSE_UnsetTS) SendsAndRecvs->Restructure(estGVT, estGVT, POSE_UnsetTS); else SendsAndRecvs->Restructure(estGVT, pvt, POSE_UnsetTS); } // CkPrintf("[%d] pvt=%d gvt=%d optPVT=%d iterMin=%d\n", CkMyPe(), pvt, estGVT, optPVT, iterMin); // ovt2 in each pvtobj is used to store the ovt from the rep if the // poser is idle (i.e., it hasn't received any events since the last // time it reported to its PVT) POSE_TimeType xt; if (pvt == POSE_UnsetTS) { // all are idle; find max ovt POSE_TimeType maxOVT = POSE_UnsetTS; for (i=0; i<end; i++) if (objs.objs[i].isPresent()) { xt = objs.objs[i].getOVT2(); if (xt > maxOVT) maxOVT = xt; } if (maxOVT > estGVT) pvt = maxOVT; } // (2) Pack the SRtable data into the message POSE_TimeType maxSR; UpdateMsg *um = SendsAndRecvs->PackTable(pvt, &maxSR); // (3) Add the PVT info to the message um->optPVT = pvt; um->conPVT = conPVT; um->maxSR = maxSR; um->runGVTflag = 0; if (um->numEntries > 0) { //CkPrintf("PE %d has %d SRs reported to GVT; earliest=%d pvt=%d\n", CkMyPe(), um->numEntries, um->SRs[0].timestamp, pvt); } // send data to GVT estimation p[reportTo].reportReduce(um); /* if (simdone) // transmit final info to GVT on PE 0 g[0].computeGVT(um); else { g[gvtTurn].computeGVT(um); // transmit info to GVT gvtTurn = (gvtTurn + 1) % CkNumPes(); // calculate next GVT location } */ objs.SetIdle(); // Set objects to idle iterMin = POSE_UnsetTS; #ifndef CMK_OPTIMIZE if(pose_config.stats) localStats->TimerStop(); #endif }
void NormalLineArray::doFirstFFT(int fftid, int direction) { LineFFTinfo &fftinfo = (infoVec[fftid]->info); int ptype = fftinfo.ptype; int pblock = fftinfo.pblock; complex *line = fftinfo.dataPtr; int sizeX = fftinfo.sizeX; int sizeZ = fftinfo.sizeZ; int *xsquare = fftinfo.xsquare; int *ysquare = fftinfo.ysquare; int *zsquare = fftinfo.zsquare; #ifdef HEAVYVERBOSE { char fname[80]; if(direction) snprintf(fname,80,"xline_%d.y%d.z%d.out", fftid,thisIndex.x, thisIndex.y); else snprintf(fname,80,"zline_%d.x%d.y%d.out", fftid,thisIndex.x, thisIndex.y); FILE *fp=fopen(fname,"w"); for(int x = 0; x < sizeX*xsquare[0]*xsquare[1]; x++) fprintf(fp, "%d %g %g\n", x, line[x].re, line[x].im); fclose(fp); } #endif if(direction && ptype==PencilType::XLINE) fftw(fwdplan, xsquare[0]*xsquare[1], (fftw_complex*)line, 1, sizeX, NULL, 0, 0); // xPencilsPerSlab many 1-D fft's else if(!direction && ptype==PencilType::ZLINE) fftw(bwdplan, zsquare[0]*zsquare[1], (fftw_complex*)line, 1, sizeZ, NULL, 0, 0); else CkAbort("Can't do this FFT\n"); int x, y, z=0; #ifdef VERBOSE CkPrintf("First FFT done at [%d %d] [%d %d]\n", thisIndex.x, thisIndex.y,sizeX,sizeZ); #endif int baseX, ix, iy, iz; if(true) {//else if(pblock == PencilBlock::SQUAREBLOCK){ if(direction) { int sendSquarethick = ysquare[1] <= xsquare[1] ? ysquare[1]:xsquare[1]; int sendDataSize = ysquare[0]*xsquare[0] * sendSquarethick; int zpos = thisIndex.y; int index=0; complex *sendData = NULL; for(z = 0; z < xsquare[1]; z+=sendSquarethick){ for(x = 0; x < sizeX; x+=ysquare[0]) { SendFFTMsg *msg = new(sendDataSize, sizeof(int)*8) SendFFTMsg; sendData = msg->data; msg->ypos = thisIndex.x; msg->size = sendDataSize; msg->id = fftid; msg->direction = direction; msg->data = sendData; CkSetQueueing(msg, CK_QUEUEING_IFIFO); #ifdef _PRIOMSG_ int prioNum = (zpos+z) + x*sizeX; *(int*)CkPriorityPtr(msg) = prioNum; #endif index = 0; for(iz = z; iz < z+sendSquarethick; iz++) for(ix = x; ix < x+ysquare[0]; ix++) for(y = 0; y < xsquare[0]; y++) sendData[index++] = line[iz*sizeX*xsquare[0]+y*sizeX+ix]; #ifdef VERBOSE CkPrintf(" [%d %d] sending to YLINES [ %d %d] \n", thisIndex.x, thisIndex.y, thisIndex.y, x); #endif yProxy(zpos+z, x).doSecondFFT(msg); } //memset(sendData, 0, sizeof(complex)*yPencilsPerSlab*xPencilsPerSlab); } } else { int sendSquarewidth = ysquare[0]<=zsquare[0] ? ysquare[0]:zsquare[0]; int sendDataSize = ysquare[1] * sendSquarewidth * zsquare[1]; int xpos = thisIndex.x; int ypos = thisIndex.y; int index=0; complex *sendData = NULL; for(x = 0; x < zsquare[0]; x+=sendSquarewidth) for(z = 0; z < sizeZ; z+=ysquare[1]){ SendFFTMsg *msg = new(sendDataSize, sizeof(int)*8) SendFFTMsg; sendData = msg->data; msg->ypos = thisIndex.y; msg->size = sendDataSize; msg->id = fftid; msg->direction = direction; msg->data = sendData; CkSetQueueing(msg, CK_QUEUEING_IFIFO); #ifdef _PRIOMSG_ int prioNum = (z) + (x+xpos)*sizeX; *(int*)CkPriorityPtr(msg) = prioNum; #endif index = 0; for(iz = z; iz < z+ysquare[1]; iz++) for (ix = x; ix < x+sendSquarewidth; ix++) for(iy = 0; iy < zsquare[1]; iy++) sendData[index++] = line[iz+ix*sizeZ+iy*sizeZ*zsquare[0]]; #ifdef VERBOSE CkPrintf(" [%d %d] sending to YLINES [%d %d] \n", thisIndex.x, thisIndex.y, z, thisIndex.x); #endif yProxy(z, xpos+x).doSecondFFT(msg); } } } }
void NormalLineArray::doSecondFFT(int ypos, complex *val, int datasize, int fftid, int direction) { LineFFTinfo &fftinfo = (infoVec[fftid]->info); int ptype = fftinfo.ptype; complex *line = fftinfo.dataPtr; int sizeY = fftinfo.sizeY; int *xsquare = fftinfo.xsquare; int *ysquare = fftinfo.ysquare; int *zsquare = fftinfo.zsquare; int expectSize = 0, expectMsg = 0; int x,y,z,baseY; int ix,iy,iz; if(direction){ int sendSquarethick = ysquare[1]<=xsquare[1] ? ysquare[1]:xsquare[1]; expectSize = ysquare[0]*xsquare[0] * sendSquarethick; expectMsg = sizeY/xsquare[0] * (ysquare[1]/sendSquarethick); CkAssert(datasize == expectSize); int idx = 0; for(z=0; z<sendSquarethick; z++) for(x=0; x<ysquare[0]; x++) for(y=0; y<xsquare[0]; y++) line[z*sizeY*ysquare[0]+x*sizeY+ypos+y] = val[idx++]; } else { int sendSquarewidth = ysquare[0]<=zsquare[0] ? ysquare[0]:zsquare[0]; expectSize = ysquare[1] * sendSquarewidth * zsquare[1]; expectMsg = sizeY/zsquare[1] * (ysquare[0]/sendSquarewidth); CkAssert(datasize == expectSize); int idx=0; for(z=0; z<ysquare[1]; z++) for(x=0; x<sendSquarewidth; x++) for(y=0; y<zsquare[1]; y++) line[z*sizeY*ysquare[0]+x*sizeY+ypos+y] = val[idx++]; } infoVec[fftid]->count++; if (infoVec[fftid]->count == expectMsg) { infoVec[fftid]->count = 0; int y; if(direction && ptype==PencilType::YLINE) fftw(fwdplan, ysquare[0]*ysquare[1], (fftw_complex*)line, 1, sizeY, NULL, 0, 0); else if(!direction && ptype==PencilType::YLINE) fftw(bwdplan, ysquare[0]*ysquare[1], (fftw_complex*)line, 1, sizeY, NULL, 0, 0); else CkAbort("Can't do this FFT\n"); #ifdef HEAVYVERBOSE { char fname[80]; snprintf(fname,80,"yline_%d.x%d.z%d.out", fftid, thisIndex.y, thisIndex.x); FILE *fp=fopen(fname,"w"); for(int x = 0; x < sizeY*ysquare[0]*ysquare[1]; x++) fprintf(fp, "%d %g %g\n", x, line[x].re, line[x].im); fclose(fp); } #endif #ifdef VERBOSE CkPrintf("Second FFT done at [%d %d]\n", thisIndex.x, thisIndex.y); #endif // thisIndex.y is x-coord if(direction){ int sendSquarewidth = ysquare[0]<=zsquare[0] ? ysquare[0]:zsquare[0]; int sendDataSize = sendSquarewidth * ysquare[1] * zsquare[1]; int xpos = thisIndex.y; int index=0; complex *sendData = NULL; for(x = 0; x < ysquare[0]; x+=sendSquarewidth) for(y = 0; y < sizeY; y+=zsquare[1]) { SendFFTMsg *msg = new(sendDataSize, sizeof(int)*8) SendFFTMsg; sendData = msg->data; msg->zpos = thisIndex.x; msg->ypos = 0; msg->size = sendDataSize; msg->id = fftid; msg->direction = direction; msg->data = sendData; CkSetQueueing(msg, CK_QUEUEING_IFIFO); #ifdef _PRIOMSG_ int prioNum = (xpos+x) + y*sizeY; *(int*)CkPriorityPtr(msg) = prioNum; #endif index = 0; for(iy = y; iy < y+zsquare[1]; iy++) for(ix = x; ix < x+sendSquarewidth; ix++) for(iz = 0; iz < ysquare[1]; iz++) sendData[index++] = line[iz*sizeY*ysquare[0]+ix*sizeY+iy]; #ifdef VERBOSE CkPrintf(" [%d %d] sending to ZLINES [ %d %d] \n", thisIndex.x, thisIndex.y, thisIndex.y, y); #endif (zProxy)(x+xpos, y).doThirdFFT(msg); } } else { int sendSquarethick = ysquare[1]<=xsquare[1] ? ysquare[1]:xsquare[1]; int sendDataSize = ysquare[0]*xsquare[0] * sendSquarethick; int zpos = thisIndex.x; int index, ix, iy, iz; complex *sendData = NULL; for(z = 0; z < ysquare[1]; z+=sendSquarethick) for (y = 0; y < sizeY; y+=xsquare[0]) { SendFFTMsg *msg = new(sendDataSize, sizeof(int)*8) SendFFTMsg; sendData = msg->data; msg->zpos = 0; msg->ypos = thisIndex.y; msg->size = sendDataSize; msg->id = fftid; msg->direction = direction; msg->data = sendData; CkSetQueueing(msg, CK_QUEUEING_IFIFO); #ifdef _PRIOMSG_ int prioNum = (y)*sizeY + (zpos+z); *(int*)CkPriorityPtr(msg) = prioNum; #endif index = 0; for(iz = z; iz < z+sendSquarethick; iz++) for(iy = y; iy < y+xsquare[0]; iy++) for(x = 0; x < ysquare[0]; x++) sendData[index++] = line[iz*sizeY*ysquare[0]+x*sizeY+iy]; #ifdef VERBOSE CkPrintf(" [%d %d] sending to XLINES [%d %d] \n", thisIndex.x, thisIndex.y, y, zpos+z); #endif (xProxy)(y, zpos+z).doThirdFFT(msg); } } } }