void check_and_compute() { compute_kernel(); // calculate error // not being done right now since we are doing a fixed no. of iterations double *tmp; tmp = temperature; temperature = new_temperature; new_temperature = tmp; constrainBC(); if (iterations % CKP_FREQ == 0 || iterations > MAX_ITER) { #ifdef CMK_MEM_CHECKPOINT contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::report(), mainProxy)); #elif CMK_MESSAGE_LOGGING if(iterations > MAX_ITER) contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::report(), mainProxy)); else AtSync(); #else contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::report(), mainProxy)); #endif } else { doStep(); } }
void CP_Rho_GSpacePlane::divRhoVksGspace() { double tpi,*hmati; CPXCFNCTS::CP_fetch_hmati(&hmati,&tpi); memset(divRhoY, 0, sizeof(complex) * myGrid_size); memset(divRhoZ, 0, sizeof(complex) * myGrid_size); double gx,gy,gz; std::vector< gridPoint > & points = (*myPoints); double sumX = 0, sumY = 0, sumZ = 0; for(int p = 0; p < numPoints; p++) { int offset = points[p].offset; gx = tpi * (points[p].d3 * hmati[1] + points[p].d2 * hmati[2] + points[p].d1 * hmati[3]); gy = tpi * (points[p].d3 * hmati[4] + points[p].d2 * hmati[5] + points[p].d1 * hmati[6]); gz = tpi * (points[p].d3 * hmati[7] + points[p].d2 * hmati[8] + points[p].d1 * hmati[9]); complex tmp = (divRhoX[offset].multiplyByi())*(-1.0); divRhoX[offset] = tmp * gx; divRhoY[offset] = tmp * gy; divRhoZ[offset] = tmp * gz; #if _CP_DEBUG_RHOG_VERBOSE_ sumX += divRhoX[offset].re + divRhoX[offset].im; sumY += divRhoY[offset].re + divRhoY[offset].im; sumZ += divRhoZ[offset].re + divRhoZ[offset].im; #endif }//endfor #if _CP_DEBUG_RHOG_VERBOSE_ CkPrintf("{%d} Rho GS [%d] divSums %lf %lf %lf\n", thisInstance.proxyOffset, thisIndex, sumX, sumY, sumZ); #endif Charm_doBackwardFFT(CkCallback(CkIndex_CP_Rho_RealSpacePlane::acceptGradRhoVks(), UrhoRealProxy[thisInstance.proxyOffset]), Urho_fft_xProxy[thisInstance.proxyOffset], fft_xoffset, 1 / simReadOnly.vol); Charm_doBackwardFFT(CkCallback(CkIndex_CP_Rho_RealSpacePlane::acceptGradRhoVks(), UrhoRealProxy[thisInstance.proxyOffset]), Urho_fft_yProxy[thisInstance.proxyOffset], fft_yoffset, 1 / simReadOnly.vol); Charm_doBackwardFFT(CkCallback(CkIndex_CP_Rho_RealSpacePlane::acceptGradRhoVks(), UrhoRealProxy[thisInstance.proxyOffset]), Urho_fft_zProxy[thisInstance.proxyOffset], fft_zoffset, 1 / simReadOnly.vol); //--------------------------------------------------------------------------- }//end routine
void Compute::resetArrays() { int indexX = thisIndex.x; int indexY = thisIndex.y; int indexZ = thisIndex.z; float tmp; for(int i=indexZ*subBlockDimXz; i<(indexZ+1)*subBlockDimXz; i++) for(int j=0; j<blockDimY; j++) { tmp = (float)drand48(); while(tmp > MAX_LIMIT || tmp < (-1)*MAX_LIMIT) tmp = (float)drand48(); A[i*blockDimY + j] = tmp; } for(int j=indexX*subBlockDimYx; j<(indexX+1)*subBlockDimYx; j++) for(int k=0; k<blockDimZ; k++) { tmp = (float)drand48(); while(tmp > MAX_LIMIT || tmp < (-1)*MAX_LIMIT) tmp = (float)drand48(); B[j*blockDimZ + k] = tmp; } for(int i=0; i<blockDimX; i++) for(int k=0; k<blockDimZ; k++) { C[i*blockDimZ + k] = 0.0; #if USE_CKDIRECT tmpC[i*blockDimZ + k] = 0.0; #endif } contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::resetDone(), mainProxy)); }
void Compute::receiveC(float *data, int size, int who) { int indexY = thisIndex.y; if(who) { for(int i=0; i<subBlockDimXy; i++) for(int k=0; k<blockDimZ; k++) C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k] += data[i*blockDimZ + k]; } countC++; if(countC == num_chare_y) { /*char name[30]; sprintf(name, "%s_%d_%d_%d", "C", thisIndex.x, thisIndex.y, thisIndex.z); FILE *fp = fopen(name, "w"); for(int i=0; i<subBlockDimXy; i++) { for(int k=0; k<blockDimZ; k++) fprintf(fp, "%f ", C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k]); fprintf(fp, "\n"); } fclose(fp);*/ // counters to keep track of how many messages have been received countA = 0; countB = 0; countC = 0; contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::done(), mainProxy)); // mainProxy.done(); } }
void check_and_compute () { // if (--messages_due == 0) // messages_due = 4; compute (); // mainProxy.report(); if (thisIndex < majElements - 1) { // printf("DONE WITH index=%d and calling for ind=%d\n",thisIndex,thisIndex+1); #ifdef PRIOR opts = new CkEntryOptions (); opts1 = new CkEntryOptions (); opts->setPriority (-100); opts1->setPriority (100); //printf("-------- Jacobi[%d] sending message to next one at time=%f\n",thisIndex,CkWallTimer()); thisProxy[thisIndex + 1].begin_iteration (1, opts); for(int i=(thisIndex+1)*7;i<(thisIndex+1)*7+7;i++) minorProxy[i].begin_iteration(1,opts1); #else thisProxy[thisIndex + 1].begin_iteration (1); for (int i = (thisIndex + 1) * 7; i < (thisIndex + 1) * 7 + 7; i++) minorProxy[i].begin_iteration (1); #endif } else { // printf("CAlling report Jacobi[%d] time=%f!!!!!!!!!!1\n",thisIndex,CkWallTimer()); // else // mainProxy.report (); } if (iterations % ldbTime == 4) AtSync(); else contribute(CkCallback(CkIndex_Main::report(NULL),mainProxy)); }
void Compute::recvHandle(infiDirectUserHandle shdl, int index, int arr) { // --- B --- | --- C --- | --- A --- if(arr == SENDA) { sHandles[num_chare_x + num_chare_y + index] = shdl; CkDirect_assocLocalBuffer(&sHandles[num_chare_x + num_chare_y + index], &A[thisIndex.z*subBlockDimXz*blockDimY], sizeof(float)*subBlockDimXz*blockDimY); countA++; } if(arr == SENDB) { sHandles[index] = shdl; CkDirect_assocLocalBuffer(&sHandles[index], &B[thisIndex.x*subBlockDimYx*blockDimZ], sizeof(float)*subBlockDimYx*blockDimZ); countB++; } if(arr == SENDC) { sHandles[num_chare_x + index] = shdl; CkDirect_assocLocalBuffer(&sHandles[num_chare_x + index], &C[index*subBlockDimXy*blockDimZ], sizeof(float)*subBlockDimXy*blockDimZ); countC++; } if(countA == num_chare_z-1 && countB == num_chare_x-1 && countC == num_chare_y-1) { countA = 0; countB = 0; countC = 0; contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::setupDone(), mainProxy)); // mainProxy.setupDone(); } }
void communicate(int iters, bool useTram) { GroupMeshStreamer<DataItem, Participant, SimpleMeshRouter> *localStreamer; if (useTram) { localStreamer = aggregator.ckLocalBranch(); } int ctr = 0; for (int i = 0; i < iters; i++) { for (int j=0; j<CkNumPes(); j++) { if (useTram) { localStreamer->insertData(myItem, neighbors[j]); } else { allToAllGroup[neighbors[j]].receive(myItem); ctr++; } } if (!useTram) { if (ctr == 1024) { ctr = 0; CthYield(); } } } if (useTram) { localStreamer->done(); } else { contribute(CkCallback(CkReductionTarget(Main, allDone), mainProxy)); } }
//============================================================================ void CP_Rho_GSpacePlane::acceptWhiteByrd() { //============================================================================ #ifdef _CP_DEBUG_RHOG_VERBOSE_ CkPrintf("{%d} Rho GS [%d] acceptWhiteByrd_%d\n", thisInstance.proxyOffset, thisIndex, doneWhiteByrd); #endif doneWhiteByrd++; // When all 3 gradients are in g-space, then you will ready for the next step. if(doneWhiteByrd == 3){ doneWhiteByrd = 0; /** The partially FFT'ed white byrd correction to VKS arrives to RhoG and ffts invoked. Only happens if gradient corrections are on. */ #if CMK_TRACE_ENABLED double StartTime=CmiWallTimer(); #endif //============================================================================ // Compute my whiteByrd : store it in divrhox double tpi,*hmati; CPXCFNCTS::CP_fetch_hmati(&hmati, &tpi); double gx, gy, gz; complex *whitebyrd = divRhoX; // zeroing done carefully inside loop complex zero; zero.re = 0.0; zero.im = 0.0; std::vector< gridPoint > & points = (*myPoints); int last_offset = -1; for(int p = 0; p < numPoints; p++) { int offset = points[p].offset; if(offset != (last_offset + 1)) { for(int cur_off = last_offset + 1; cur_off < offset; cur_off++) { whitebyrd[cur_off] = zero; } } gx = tpi * (points[p].d3 * hmati[1] + points[p].d2 * hmati[2] + points[p].d1 * hmati[3]); gy = tpi * (points[p].d3 * hmati[4] + points[p].d2 * hmati[5] + points[p].d1 * hmati[6]); gz = tpi * (points[p].d3 * hmati[7] + points[p].d2 * hmati[8] + points[p].d1 * hmati[9]); complex tmp = divRhoX[offset]*gx + divRhoY[offset]*gy + divRhoZ[offset]*gz; whitebyrd[offset] = tmp.multiplyByi()*(-1.0); last_offset = offset; } for(int cur_off = last_offset + 1; cur_off < myGrid_size; cur_off++) { whitebyrd[cur_off] = zero; } Charm_doBackwardFFT(CkCallback(CkIndex_CP_Rho_RealSpacePlane::acceptWhiteByrd(), UrhoRealProxy[thisInstance.proxyOffset]), Urho_fft_xProxy[thisInstance.proxyOffset], fft_xoffset); myTime++; } //--------------------------------------------------------------------------- }//end routine
void ResumeFromSync () { // printf("Jacobi[%d] calling resumeSync\n",thisIndex); // if (thisIndex == 0) // mainProxy.report (); //CkPrintf("Coming in MAJ MAJ MAJ RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR ++++++++\n"); contribute(CkCallback(CkIndex_Main::report(NULL),mainProxy)); }
void getScannedVertexNum() { CmiUInt8 numScannedVertices = 0; typedef std::vector<BFSVertex>::iterator Iterator; for (Iterator it = vertices.begin(); it != vertices.end(); it++) numScannedVertices += it->getScannedVertexNum(); contribute(sizeof(CmiUInt8), &numScannedVertices, CkReduction::sum_long, CkCallback(CkReductionTarget(TestDriver, done), driverProxy)); }
FFTController::FFTController() { first_time = true; in_pointer = out_pointer = NULL; geps = new GSPACE(); // TODO: A group dependency could probably solve this better contribute(CkCallback(CkReductionTarget(Controller, fftControllerReady), controller_proxy)); }
void PsiCache::reportFTime() { CkReduction::statisticsElement stats(total_time); int tuple_size = 2; CkReduction::tupleElement tuple_reduction[] = { CkReduction::tupleElement(sizeof(double), &total_time, CkReduction::max_double), CkReduction::tupleElement(sizeof(CkReduction::statisticsElement), &stats, CkReduction::statistics) }; CkReductionMsg* msg = CkReductionMsg::buildFromTuple(tuple_reduction, tuple_size); msg->setCallback(CkCallback(CkIndex_Controller::reportFTime(NULL), controller_proxy)); contribute(msg); }
allToAll() { iter = 0; recvCnt = 0; msgs = new allToAllMsg*[numChares*msgCount]; for(int i = 0; i < msgCount*numChares; i++) { msgs[i] = new (msgSize) allToAllMsg; } // reduction to the mainchare to signal that initialization is complete contribute(CkCallback(CkReductionTarget(Main,allToAllReady), mainProxy)); }
/*entry*/ void start() { CkPrintf("Main: run calculations...\n"); CkPrintf("Main: start...\n"); startt = CkWallTimer(); alltoall_proxy->run(CkCallback(CkIndex_Main::done(), thisProxy)); //for (int i = 0; i < N_uChares; i++) // (*hello_proxy)[i]->ping(0); alltoall_proxy->start(); //alltoall_proxy->flush(); }
Main(CkArgMsg *m) { CkPrintf("running SDAG migration test\n"); CProxy_Test testProxy = CProxy_Test::ckNew(NUM_ELEMS); testProxy.wrapper(100, 200); for (int i = 0; i < NUM_ELEMS; i++) { char str[100]; sprintf(str, "test %d", i); Msg* m = new (strlen(str) + 1) Msg(i, str); testProxy[i].method2(i * 2, i * 2 + 1); testProxy[i].method3(m); testProxy[i].methodA(); } CkStartQD(CkCallback(CkIndex_Main::finished(), thisProxy)); }
Main(CkArgMsg* msg) { int n = atoi(msg->argv[1]); mainProxy = thisProxy; CkPrintf("n = %d\n",n); BProxy = CProxy_B::ckNew(n); AProxy = CProxy_A::ckNew(2); AProxy.F(); CkCallback cb = CkCallback(CkReductionTarget(Main, done), thisProxy); CkStartQD(cb); }
Pingping(std::size_t index, uChareSet<Pingping, CProxy_Pingping, CBase_Pingping> *uchareset) : uChare<Pingping, CProxy_Pingping, CBase_Pingping>(index, uchareset) { CkPrintf("[uchare=%d, chare=%d,pe=%d]: created \n", getId(), getuChareSet()->getId(), getuChareSet()->getPe()); pingDone = pongDone = false; pingCounters.resize(N_uChares); pingCounters.assign(N_uChares, -1); pongCounters.resize(N_uChares); pongCounters.assign(N_uChares, 999); contribute(CkCallback(CkReductionTarget(Main, start), mainProxy)); }
void Compute::receiveC() { int indexX = thisIndex.x; int indexY = thisIndex.y; int indexZ = thisIndex.z; // copy C from tmpC to the correct location for(int j=0; j<num_chare_y; j++) { if( j != indexY) { for(int i=0; i<subBlockDimXy; i++) for(int k=0; k<blockDimZ; k++) C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k] += tmpC[j*subBlockDimXy*blockDimZ + i*blockDimZ + k]; } } /*char name[30]; sprintf(name, "%s_%d_%d_%d", "C", thisIndex.x, thisIndex.y, thisIndex.z); FILE *fp = fopen(name, "w"); for(int i=0; i<subBlockDimXy; i++) { for(int k=0; k<blockDimZ; k++) fprintf(fp, "%f ", C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k]); fprintf(fp, "\n"); } fclose(fp); CkPrintf("%d_%d_%d\n", thisIndex.x, thisIndex.y, thisIndex.z); for(int i=0; i<subBlockDimXy; i++) { for(int k=0; k<blockDimZ; k++) CkPrintf("%f ", C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k]); CkPrintf("\n"); }*/ // call ready for the buffers for(int i=0; i<num_chare_x; i++) if(i != indexX) CkDirect_ready(&rHandles[i]); for(int j=0; j<num_chare_y; j++) if(j != indexY) CkDirect_ready(&rHandles[num_chare_x + j]); for(int k=0; k<num_chare_z; k++) if(k != indexZ) CkDirect_ready(&rHandles[num_chare_x + num_chare_y + k]); // counters to keep track of how many messages have been received countA = 0; countB = 0; countC = 0; contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::done(), mainProxy)); // mainProxy.done(); }
void Workers::complete() { int size = matrixSize * matrixSize * sizeof(ElementType); memcpy(C, h_C, size); #ifdef DEBUG CkPrintf("[%d] A\n", thisIndex); for (int i=0; i<matrixSize; i++) { CkPrintf("[%d] ", thisIndex); for (int j=0; j<matrixSize; j++) { CkPrintf("%.2f ", A[i*matrixSize+j]); } CkPrintf("\n"); } CkPrintf("[%d] B\n", thisIndex); for (int i=0; i<matrixSize; i++) { CkPrintf("[%d] ", thisIndex); for (int j=0; j<matrixSize; j++) { CkPrintf("%.2f ", B[i*matrixSize+j]); } CkPrintf("\n"); } CkPrintf("[%d] C\n", thisIndex); for (int i=0; i<matrixSize; i++) { CkPrintf("[%d] ", thisIndex); for (int j=0; j<matrixSize; j++) { if(useCublas) CkPrintf("%.2f ", C[j*matrixSize+i]); else CkPrintf("%.2f ", C[i*matrixSize+j]); } CkPrintf("\n"); } CkPrintf("[%d] C-gold\n", thisIndex); for (int i=0; i<matrixSize; i++) { CkPrintf("[%d] ", thisIndex); for (int j=0; j<matrixSize; j++) { C[i*matrixSize + j] = 0; for (int k=0; k<matrixSize; k++) { C[i*matrixSize + j] += A[i*matrixSize +k] * B[k * matrixSize + j]; } CkPrintf("%.2f ", C[i*matrixSize+j]); } CkPrintf("\n"); } #endif contribute(CkCallback(CkIndex_Main::finishWork(NULL), mainProxy)); }
// Function that checks whether it must start the following step or wait until other messages are received void Patch::checkNextStep(){ int i; double timer; if (updateFlag && incomingFlag) { // resetting flags updateFlag = false; incomingFlag = false; stepCount++; // adding new elements for (i = 0; i < incomingParticles.length(); i++) particles.push_back(incomingParticles[i]); incomingParticles.removeAll(); if (thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0 && stepCount%NUM_STEPS == 0) { timer = CmiWallTimer(); CkPrintf("Step %d Benchmark Time %f ms/step, Total Time Elapsed %f s\n", stepCount, ((timer - stepTime)/NUM_STEPS)*1000, timer); stepTime = timer; // if (stepCount == 300) // traceBegin(); // if (stepCount == 400) // traceEnd(); } // if (stepCount == 300 && thisIndex.x*patchArrayDimY*patchArrayDimZ + thisIndex.y*patchArrayDimZ + thisIndex.z < 8) // traceBegin(); // if (stepCount == 301 && thisIndex.x*patchArrayDimY*patchArrayDimZ + thisIndex.y*patchArrayDimZ + thisIndex.z < 8) // traceEnd(); // checking for next step if (stepCount >= finalStepCount) { // CkPrintf("Final number of particles is %d on Patch [%d][%d][%d]\n", particles.length(), thisIndex.x, thisIndex.y, thisIndex.z); print(); contribute(CkCallback(CkIndex_Main::allDone(), mainProxy)); } else { if (perform_lb){ AtSync(); LBTurnInstrumentOff(); perform_lb=false; } else{ thisProxy(thisIndex.x, thisIndex.y, thisIndex.z).start(); //contribute(CkCallback(CkIndex_Main::lbBarrier(),mainProxy)); } } } }
Participant() { int numPes = CkNumPes(); neighbors = new int[numPes]; for (int i = 0; i < numPes; i++) { neighbors[i] = i; } // shuffle to prevent bottlenecks for (int i = numPes-1; i >= 0; i--) { int shuffleIndex = rand() % (i+1); int temp = neighbors[i]; neighbors[i] = neighbors[shuffleIndex]; neighbors[shuffleIndex] = temp; } contribute(CkCallback(CkReductionTarget(Main, prepare), mainProxy)); }
void check_and_compute () { // if (--messages_due == 0) // messages_due = 4; compute (); if (iterations % ldbTime == 4/* || iterations == 100*/) { // printf("MINOR[%d] itr=%d ----------------------------- ssssssssssssss\n",thisIndex,iterations); AtSync (); } else // mainProxy.report (); contribute(CkCallback(CkIndex_Main::report(NULL),mainProxy)); }
/* Default constructor */ Patch::Patch(FileDataMsg* fdmsg) { LBTurnInstrumentOff(); inbrs = numNbrs; usesAtSync = CmiTrue; updateCount = 0; forceCount = 0; stepCount = 0; resumeCount = 0; updateFlag = false; incomingFlag = false; perform_lb = false; incomingParticles.resize(0); // setMigratable(CmiFalse); int i; // Particle initialization myNumParts = 0; for(i=0; i < fdmsg->length; i++) { particles.push_back(Particle()); particles[myNumParts].charge = fdmsg->charge[i]; particles[myNumParts].mass = fdmsg->mass[i]; particles[myNumParts].x = fdmsg->coords[i].x; particles[myNumParts].y = fdmsg->coords[i].y; particles[myNumParts].z = fdmsg->coords[i].z; particles[myNumParts].vx = 0; particles[myNumParts].vy = 0; particles[myNumParts].vz = 0; particles[myNumParts].fx = 0; particles[myNumParts].fy = 0; particles[myNumParts].fz = 0; particles[myNumParts].id = (thisIndex.x*patchArrayDimX + thisIndex.y) * numParts / (patchArrayDimX*patchArrayDimY) + i; particles[myNumParts].vdw_type = fdmsg->vdw_type[i]; myNumParts++; } delete fdmsg; contribute(CkCallback(CkIndex_Main::startUpDone(), mainProxy)); }
void run() { for (int i = 0 ; i < numelements; i++) { if(thisIndex % 2 == 0 && thisIndex != numelements -1 ) { myMsg* m = workerarray[thisIndex + 1].sendSmaller(val); val = m->val; delete m; } barrier(); if (thisIndex % 2 == 1 && thisIndex != numelements -1 ) { myMsg* m = workerarray[thisIndex + 1].sendSmaller(val); val = m->val; delete m; } barrier(); } contribute(CkCallback(CkIndex_Main::done(NULL), mainproxy)); }
void PsiCache::receivePsi(PsiMessage* msg) { if (msg->spin_index != 0) { CkAbort("Error: We don't support multiple spins yet!\n"); } CkAssert(msg->k_index < K); CkAssert(msg->state_index < L); CkAssert(msg->size == psi_size); if(msg->shifted==false){std::copy(msg->psi, msg->psi+psi_size, psis[msg->k_index][msg->state_index]);} if(msg->shifted==true){std::copy(msg->psi, msg->psi+psi_size, psis_shifted[msg->k_index][msg->state_index]);} delete msg; // Once the cache has received all of it's data start the sliding pipeline // sending of psis to P to start the accumulation of fxf'. int expected_psis = K*L; if(qindex == 0) expected_psis += K*L; if (++received_psis == expected_psis) { //CkPrintf("[%d]: Cache filled\n", CkMyPe()); contribute(CkCallback(CkReductionTarget(Controller,cachesFilled), controller_proxy)); } }
PsiCache::PsiCache() { GWBSE *gwbse = GWBSE::get(); K = gwbse->gw_parallel.K; L = gwbse->gw_parallel.L; qindex = Q_IDX; psi_size = gwbse->gw_parallel.n_elems; pipeline_stages = gwbse->gw_parallel.pipeline_stages; received_psis = 0; received_chunks = 0; psis = new complex**[K]; for (int k = 0; k < K; k++) { psis[k] = new complex*[L]; for (int l = 0; l < L; l++) { psis[k][l] = new complex[psi_size]; } } // shifted k grid psis. Need this for qindex=0 psis_shifted = new complex**[K]; for (int k = 0; k < K; k++) { psis_shifted[k] = new complex*[L]; for (int l = 0; l < L; l++) { psis_shifted[k][l] = new complex[psi_size]; } } fs = new complex[L*psi_size*pipeline_stages]; umklapp_factor = new complex[psi_size]; // Variables for chare region registration min_row = INT_MAX; min_col = INT_MAX; max_row = INT_MIN; max_col = INT_MIN; tile_lock = CmiCreateLock(); total_time = 0.0; contribute(CkCallback(CkReductionTarget(Controller,psiCacheReady), controller_proxy)); }
void next(void) { state++; expectParam=rand(); expectCount=1; switch(state) { case 0: //Send to chare expectType=typeChare; send(CkCallback(CkIndex_callbackChare::idx_accept(&callbackChare::accept), cp)); break; case 1: //Send to array element expectType=typeArray; send(CkCallback(CkIndex_callbackArray::accept(NULL), CkArrayIndex1D(nArr-1),ap)); break; case 2: //Send to group member 0 expectType=typeGroup; send(CkCallback(CkIndex_callbackGroup::accept(NULL), CkNumPes()-1,gp)); break; case 3: //Send to C function expectType=typeCfn; send(CkCallback(acceptCFnCall,&thisProxy)); break; case 4: //Broadcast to array expectCount=nArr; expectType=typeArray; send(CkCallback(CkIndex_callbackArray::accept(NULL),ap)); break; case 5: //Broadcast to group expectCount=CkNumPes(); expectType=typeGroup; send(CkCallback(CkIndex_callbackGroup::accept(NULL),gp)); break; case 6: //That's it expectType=-1; expectParam=-1; thisProxy.threadedTest(); break; }; }
void Patch::createSection() { localCreateSection(); contribute(CkCallback(CkIndex_Main::startUpDone(), mainProxy)); }
void Patch::createComputes() { //double d1 = CmiWallTimer(); int num; int x = thisIndex.x; int y = thisIndex.y; int z = thisIndex.z; int px1, py1, pz1, dx, dy, dz, px2, py2, pz2; // For Round Robin insertion int numPes = CkNumPes(); int currPe = CkMyPe(); computesList = new int*[numNbrs]; for (int i =0; i < numNbrs; i++){ computesList[i] = new int[6]; } /* The computes X are inserted by a given patch: * * ^ X X X * | 0 X X * y 0 0 0 * x ----> */ // these computes will be created by other patches for (num=0; num<numNbrs; num++) { dx = num / (nbrsY * nbrsZ) - nbrsX/2; dy = (num % (nbrsY * nbrsZ)) / nbrsZ - nbrsY/2; dz = num % nbrsZ - nbrsZ/2; if (num >= numNbrs/2){ px1 = x + 2; px2 = x+dx+2; py1 = y + 2; py2 = y+dy+2; pz1 = z + 2; pz2 = z+dz+2; computeArray(px1, py1, pz1, px2, py2, pz2).insert((++currPe)%numPes); computesList[num][0] = px1; computesList[num][1] = py1; computesList[num][2] = pz1; computesList[num][3] = px2; computesList[num][4] = py2; computesList[num][5] = pz2; } else { px2 = WRAP_X(x+dx); py2 = WRAP_Y(y+dy); pz2 = WRAP_Z(z+dz); px1 = x; py1 = y; pz1 = z; px1 = px2 - dx + 2; px2 = px2+2; py1 = py2 - dy + 2; py2 = py2+2; pz1 = pz2 - dz + 2; pz2 = pz2+2; computesList[num][0] = px2; computesList[num][1] = py2; computesList[num][2] = pz2; computesList[num][3] = px1; computesList[num][4] = py1; computesList[num][5] = pz1; } //insert only the upper right half computes } // end of for loop contribute(CkCallback(CkIndex_Main::startUpDone(), mainProxy)); //loadTime += CmiWallTimer()-d1; }
void States::fftGtoR() { // Set up the FFT data structures in the FFTController FFTController* fft_controller = fft_controller_proxy.ckLocalBranch(); int backward = 1; fft_controller->setup_fftw_3d(nfft,backward); fftw_complex* in_pointer = fft_controller->get_in_pointer(); fftw_complex* out_pointer = fft_controller->get_out_pointer(); // we need to setup fftidx int *g[3]; // put_into_fftbox routine takes 2D g array, so we need to do this g[0] = ga; g[1] = gb; g[2] = gc; int **fftidx; fftidx = new int *[numCoeff]; for(int i=0; i<numCoeff;i++){ fftidx[i] = new int [3]; } // this routine changes negative g index to be a positive numbers // since it is origianlly written with Fortran, fftidx has fortran counting, // i.e., if gidx is (0,0,0), then (1,1,1) in fftidx gidx_to_fftidx(numCoeff, g, nfft, fftidx); // state coefficients are copied to in_pointer // put_into_fftbox was originally written for doublePack = 0 (false) // for gamma point calculation, put_into_fftbox has been modified from the original version put_into_fftbox(numCoeff, stateCoeff, fftidx, nfft, in_pointer, doublePack); // tell the FFTController to do the fft fft_controller->do_fftw(); // transfer data from out_pointer to stateCoeffR // malloc stateRspace first int ndata = nfft[0]*nfft[1]*nfft[2]; stateCoeffR = new complex [ndata]; double scale = sqrt(1.0/double(ndata)); // IFFT requires normalization fftbox_to_array(ndata, out_pointer, stateCoeffR, scale); // delete stateCoeff delete [] stateCoeff; // fft for shifted states (only occupied states) int qindex = Q_IDX; if( istate < nocc && qindex == 0){ stateCoeffR_shifted = new complex [ndata]; put_into_fftbox(numCoeff, stateCoeff_shifted, fftidx, nfft, in_pointer, doublePack); fft_controller->do_fftw(); fftbox_to_array(ndata, out_pointer, stateCoeffR_shifted, scale); delete [] stateCoeff_shifted; } // delete space used for fftidx for (int i = 0; i < numCoeff; i++) { delete [] fftidx[i]; } delete [] fftidx; // tell the controller that the states are ready contribute(CkCallback(CkReductionTarget(Controller, fftComplete), controller_proxy)); }