void communicate(int iters, bool useTram) { GroupMeshStreamer<DataItem, Participant, SimpleMeshRouter> *localStreamer; if (useTram) { localStreamer = aggregator.ckLocalBranch(); } int ctr = 0; for (int i = 0; i < iters; i++) { for (int j=0; j<CkNumPes(); j++) { if (useTram) { localStreamer->insertData(myItem, neighbors[j]); } else { allToAllGroup[neighbors[j]].receive(myItem); ctr++; } } if (!useTram) { if (ctr == 1024) { ctr = 0; CthYield(); } } } if (useTram) { localStreamer->done(); } else { contribute(CkCallback(CkReductionTarget(Main, allDone), mainProxy)); } }
void getScannedVertexNum() { CmiUInt8 numScannedVertices = 0; typedef std::vector<BFSVertex>::iterator Iterator; for (Iterator it = vertices.begin(); it != vertices.end(); it++) numScannedVertices += it->getScannedVertexNum(); contribute(sizeof(CmiUInt8), &numScannedVertices, CkReduction::sum_long, CkCallback(CkReductionTarget(TestDriver, done), driverProxy)); }
FFTController::FFTController() { first_time = true; in_pointer = out_pointer = NULL; geps = new GSPACE(); // TODO: A group dependency could probably solve this better contribute(CkCallback(CkReductionTarget(Controller, fftControllerReady), controller_proxy)); }
allToAll() { iter = 0; recvCnt = 0; msgs = new allToAllMsg*[numChares*msgCount]; for(int i = 0; i < msgCount*numChares; i++) { msgs[i] = new (msgSize) allToAllMsg; } // reduction to the mainchare to signal that initialization is complete contribute(CkCallback(CkReductionTarget(Main,allToAllReady), mainProxy)); }
// Coordination for starting simulation, network partition setup // void Main::StartSim() { CkPrintf("Starting simulation\n"); // Start simulation CkCallback *cb = new CkCallback(CkReductionTarget(Main, StopSim), mainProxy); network.ckSetReductionClient(cb); network.Cycle(); // Start timer tstart = std::chrono::system_clock::now(); }
Main(CkArgMsg* msg) { int n = atoi(msg->argv[1]); mainProxy = thisProxy; CkPrintf("n = %d\n",n); BProxy = CProxy_B::ckNew(n); AProxy = CProxy_A::ckNew(2); AProxy.F(); CkCallback cb = CkCallback(CkReductionTarget(Main, done), thisProxy); CkStartQD(cb); }
Pingping(std::size_t index, uChareSet<Pingping, CProxy_Pingping, CBase_Pingping> *uchareset) : uChare<Pingping, CProxy_Pingping, CBase_Pingping>(index, uchareset) { CkPrintf("[uchare=%d, chare=%d,pe=%d]: created \n", getId(), getuChareSet()->getId(), getuChareSet()->getPe()); pingDone = pongDone = false; pingCounters.resize(N_uChares); pingCounters.assign(N_uChares, -1); pongCounters.resize(N_uChares); pongCounters.assign(N_uChares, 999); contribute(CkCallback(CkReductionTarget(Main, start), mainProxy)); }
// Coordination for file input, initialized chare arrays // void Main::InitSim() { // Wait on initialization if (++cinit == ninit) { CkPrintf("Setting up network parts\n"); // Load data from input files to network parts CkCallback *cb = new CkCallback(CkReductionTarget(Main, StartSim), mainProxy); network.ckSetReductionClient(cb); network.LoadNetwork(netdata); #ifdef STACS_WITH_YARP // Open RPC port streamrpc.Open(network); #endif } }
Participant() { int numPes = CkNumPes(); neighbors = new int[numPes]; for (int i = 0; i < numPes; i++) { neighbors[i] = i; } // shuffle to prevent bottlenecks for (int i = numPes-1; i >= 0; i--) { int shuffleIndex = rand() % (i+1); int temp = neighbors[i]; neighbors[i] = neighbors[shuffleIndex]; neighbors[shuffleIndex] = temp; } contribute(CkCallback(CkReductionTarget(Main, prepare), mainProxy)); }
// Coordination for stopping simulation // void Main::StopSim() { CkPrintf("Stopping simulation\n"); // Stop timer tfinish = std::chrono::system_clock::now(); // Print timing std::chrono::duration<real_t> tduration = std::chrono::duration_cast<std::chrono::milliseconds>(tfinish - tstart); CkPrintf("Elapsed time (wall clock): %" PRIrealsec " seconds\n", tduration.count()); // Save data from network parts to output files chalt = nhalt = 0; network.SaveNetwork(); ++nhalt; network.SaveRecord(); ++nhalt; // Set callback for halting CkCallback *cb = new CkCallback(CkReductionTarget(Main, FiniSim), mainProxy); netdata.ckSetReductionClient(cb); }
void PsiCache::receivePsi(PsiMessage* msg) { if (msg->spin_index != 0) { CkAbort("Error: We don't support multiple spins yet!\n"); } CkAssert(msg->k_index < K); CkAssert(msg->state_index < L); CkAssert(msg->size == psi_size); if(msg->shifted==false){std::copy(msg->psi, msg->psi+psi_size, psis[msg->k_index][msg->state_index]);} if(msg->shifted==true){std::copy(msg->psi, msg->psi+psi_size, psis_shifted[msg->k_index][msg->state_index]);} delete msg; // Once the cache has received all of it's data start the sliding pipeline // sending of psis to P to start the accumulation of fxf'. int expected_psis = K*L; if(qindex == 0) expected_psis += K*L; if (++received_psis == expected_psis) { //CkPrintf("[%d]: Cache filled\n", CkMyPe()); contribute(CkCallback(CkReductionTarget(Controller,cachesFilled), controller_proxy)); } }
PsiCache::PsiCache() { GWBSE *gwbse = GWBSE::get(); K = gwbse->gw_parallel.K; L = gwbse->gw_parallel.L; qindex = Q_IDX; psi_size = gwbse->gw_parallel.n_elems; pipeline_stages = gwbse->gw_parallel.pipeline_stages; received_psis = 0; received_chunks = 0; psis = new complex**[K]; for (int k = 0; k < K; k++) { psis[k] = new complex*[L]; for (int l = 0; l < L; l++) { psis[k][l] = new complex[psi_size]; } } // shifted k grid psis. Need this for qindex=0 psis_shifted = new complex**[K]; for (int k = 0; k < K; k++) { psis_shifted[k] = new complex*[L]; for (int l = 0; l < L; l++) { psis_shifted[k][l] = new complex[psi_size]; } } fs = new complex[L*psi_size*pipeline_stages]; umklapp_factor = new complex[psi_size]; // Variables for chare region registration min_row = INT_MAX; min_col = INT_MAX; max_row = INT_MIN; max_col = INT_MIN; tile_lock = CmiCreateLock(); total_time = 0.0; contribute(CkCallback(CkReductionTarget(Controller,psiCacheReady), controller_proxy)); }
// Main entry point // Main::Main(CkArgMsg *msg) { // Display title CkPrintf("Simulation Tool for Asynchrnous Cortical Streams (stacs)\n"); // Command line arguments std::string configfile; if (msg->argc < 2) { configfile = "config.yml"; // default } else { configfile = msg->argv[1]; } delete msg; // Parsing config if (ParseConfig(configfile)) { CkPrintf("Error loading config...\n"); CkExit(); } // Charm information real_t netpe = (real_t)npnet/CkNumPes(); if (netpe < 1) { netpe = 1; } // Display configuration information CkPrintf("Loaded config from %s\n" " Data Files (npdat): %" PRIidx "\n" " Network Parts (npnet): %" PRIidx "\n" " Processing Elements: %d\n" " Network Parts per PE: %.2g\n" " Total Simulation Time (tmax): %" PRItick "\n" " Simulation Time Step (tstep): %" PRItick "\n" " Checkpoint Interval (tcheck): %" PRItick "\n", configfile.c_str(), npdat, npnet, CkNumPes(), netpe, tmax, tstep, tcheck); // Read vertex distribution CkPrintf("Initializing simulation\n"); if (ReadDist()) { CkPrintf("Error loading distribution...\n"); CkExit(); } // Read model information if (ReadModel()) { CkPrintf("Error loading models...\n"); CkExit(); } // Setup Charm++ variables mainProxy = thisProxy; mCastGrpId = CProxy_CkMulticastMgr::ckNew(); // Initialize coordination cinit = 0; ninit = 0; #ifdef STACS_WITH_YARP // Initialize YARP yarp.init(); #endif // Setup chare arrays CkCallback *cb = new CkCallback(CkReductionTarget(Main, InitSim), mainProxy); // netdata ++ninit; mDist *mdist = BuildDist(); netdata = CProxy_NetData::ckNew(mdist, npdat); netdata.ckSetReductionClient(cb); // network ++ninit; mModel *mmodel = BuildModel(); network = CProxy_Network::ckNew(mmodel, npnet); network.ckSetReductionClient(cb); #ifdef STACS_WITH_YARP // streamrpc ++ninit; mVtxDist *mvtxdist = BuildVtxDist(); streamrpc = CProxy_StreamRPC::ckNew(mvtxdist); #endif }
void call_contribute(/*CkReduction::reducerType op,*/ const CmiUInt8 & v) { contribute(sizeof(CmiUInt8), &v, CkReduction::sum_long, CkCallback(CkReductionTarget(Main, verify_contribute), mainProxy)); }
Hello(const uChareAttr_Hello &attr) : CBase_uChare_Hello(attr) { //CkPrintf("[uchare=%d, chare=%d,pe=%d]: created \n", // getId(), getuChareSet()->getId(), getuChareSet()->getPe()); contribute(CkCallback(CkReductionTarget(Main, init), mainProxy)); }
void finish(){ recvCnt = 0; contribute(CkCallback(CkReductionTarget(Main,nextallToAll), mainProxy)); }
Hello_charm_ref() { //CkPrintf("[uchare=%d, chare=%d,pe=%d]: created \n", // getId(), getuChareSet()->getId(), getuChareSet()->getPe()); contribute(CkCallback(CkReductionTarget(Main, start), mainProxy)); }
void getScannedVertexNum() { CmiUInt8 c = (parent == -1 ? 0 : 1); contribute(sizeof(CmiUInt8), &c, CkReduction::sum_long, CkCallback(CkReductionTarget(TestDriver, done), driverProxy)); }
FVectorCache::FVectorCache() { eps_chares_x = 7; eps_chares_y = 7; totalSize = 0; GWBSE *gwbse = GWBSE::get(); L = gwbse->gw_parallel.L; int total_eps_chares = eps_chares_x*eps_chares_y; my_chare_count = total_eps_chares/CkNumNodes(); my_chare_start = CkMyNode()*my_chare_count; int remaining = total_eps_chares%CkNumNodes(); if(CkMyNode()>0) my_chare_start += remaining; if(CkMyNode()==0) my_chare_count += remaining; my_eps_chare_indices_x = new int[my_chare_count]; my_eps_chare_indices_y = new int[my_chare_count]; findIndices(); int count = 0; for(int i=eps_start_chare_x;i<=eps_end_chare_x;i++){ int j = 0; if(i==eps_start_chare_x) j = eps_start_chare_y; int j_end = eps_chares_y-1; if(i==eps_end_chare_x) j_end = eps_end_chare_y; while(j<=j_end){ my_eps_chare_indices_x[count] = i; my_eps_chare_indices_y[count++] = j; j++; } } ndata = gwbse->gw_parallel.n_elems; data_size_x = ndata/eps_chares_x; if(ndata%eps_chares_x > 0) data_size_x += 2; data_size_y = ndata/eps_chares_y; if(ndata%eps_chares_y > 0) data_size_y += 2; data_offset_x = new int[my_chare_count]; data_offset_y = new int[my_chare_count]; for(int i=0;i<my_chare_count;i++){ data_offset_x[i] = my_eps_chare_indices_x[i]*data_size_x; data_offset_y[i] = my_eps_chare_indices_y[i]*data_size_y; } int size_x = data_size_x; int size_y = data_size_y; local_offset = new int[my_chare_count*2]; global_offset = new int[my_chare_count*2]; for(int i=0;i<my_chare_count;i++){ global_offset[2*i] = data_offset_x[i];//totalSize; local_offset[2*i] = totalSize; totalSize += size_x; global_offset[2*i+1] = data_offset_y[i];//totalSize; local_offset[2*i+1] = totalSize; totalSize += size_y; } fs = new complex[NSIZE*L*totalSize]; contribute(CkCallback(CkReductionTarget(Controller,fCacheReady), controller_proxy)); }
void PsiCache::setVCoulb(std::vector<double> vcoulb_in){ vcoulb = vcoulb_in; contribute(CkCallback(CkReductionTarget(Controller,prepare_epsilon), controller_proxy)); }
// Receive an unoccupied psi, and split off the computation of all associated f // vectors across the node using CkLoop. void PsiCache::computeFs(PsiMessage* msg) { double start = CmiWallTimer(); if (msg->spin_index != 0) { CkAbort("Error: We don't support multiple spins yet!\n"); } CkAssert(msg->size == psi_size); // Compute ikq index and the associated umklapp factor // TODO: This should just be a table lookup unsigned ikq; int umklapp[3]; kqIndex(msg->k_index, ikq, umklapp); bool uproc = false; if (umklapp[0] != 0 || umklapp[1] != 0 || umklapp[2] != 0) { uproc = true; computeUmklappFactor(umklapp); } GWBSE* gwbse = GWBSE::get(); double*** e_occ = gwbse->gw_epsilon.Eocc; double*** e_occ_shifted = gwbse->gw_epsilon.Eocc_shifted; double*** e_unocc = gwbse->gw_epsilon.Eunocc; // Create the FComputePacket for this set of f vectors and start CkLoop f_packet.size = psi_size; f_packet.unocc_psi = msg->psi; if ( qindex == 0 ) { f_packet.occ_psis = psis_shifted[ikq]; f_packet.e_occ = e_occ_shifted[msg->spin_index][ikq]; } else { f_packet.occ_psis = psis[ikq]; f_packet.e_occ = e_occ[msg->spin_index][ikq]; } f_packet.e_unocc = e_unocc[msg->spin_index][msg->k_index][msg->state_index-L]; f_packet.fs = fs + (L*psi_size*(received_chunks%pipeline_stages)); if (uproc) { f_packet.umklapp_factor = umklapp_factor; } else { f_packet.umklapp_factor = NULL; } #ifdef USE_CKLOOP CkLoop_Parallelize(computeF, 1, &f_packet, L, 0, L - 1); #else for (int l = 0; l < L; l++) { computeF(l,l,NULL,1,&f_packet); } #endif received_chunks++; #ifdef TESTING { FVectorCache *fvec_cache = fvector_cache_proxy.ckLocalBranch(); fvec_cache->computeFTilde(fs); // fvec_cache->applyCutoff(msg->accept_size, msg->accept); // fvec_cache->init(140); //compute ftilde first - similar to ckloop above for all L's fvec_cache->putFVec(msg->state_index-L, fs); } #endif // Let the matrix chares know that the f vectors are ready CkCallback cb(CkReductionTarget(PMatrix, applyFs), pmatrix2D_proxy); contribute(cb); // Cleanup delete msg; total_time += CmiWallTimer() - start; }
void barrier() { contribute (CkCallback(CkReductionTarget(Worker, barrierH), workerarray)); t = CthSelf(); CthSuspend(); }
void States::fftGtoR() { // Set up the FFT data structures in the FFTController FFTController* fft_controller = fft_controller_proxy.ckLocalBranch(); int backward = 1; fft_controller->setup_fftw_3d(nfft,backward); fftw_complex* in_pointer = fft_controller->get_in_pointer(); fftw_complex* out_pointer = fft_controller->get_out_pointer(); // we need to setup fftidx int *g[3]; // put_into_fftbox routine takes 2D g array, so we need to do this g[0] = ga; g[1] = gb; g[2] = gc; int **fftidx; fftidx = new int *[numCoeff]; for(int i=0; i<numCoeff;i++){ fftidx[i] = new int [3]; } // this routine changes negative g index to be a positive numbers // since it is origianlly written with Fortran, fftidx has fortran counting, // i.e., if gidx is (0,0,0), then (1,1,1) in fftidx gidx_to_fftidx(numCoeff, g, nfft, fftidx); // state coefficients are copied to in_pointer // put_into_fftbox was originally written for doublePack = 0 (false) // for gamma point calculation, put_into_fftbox has been modified from the original version put_into_fftbox(numCoeff, stateCoeff, fftidx, nfft, in_pointer, doublePack); // tell the FFTController to do the fft fft_controller->do_fftw(); // transfer data from out_pointer to stateCoeffR // malloc stateRspace first int ndata = nfft[0]*nfft[1]*nfft[2]; stateCoeffR = new complex [ndata]; double scale = sqrt(1.0/double(ndata)); // IFFT requires normalization fftbox_to_array(ndata, out_pointer, stateCoeffR, scale); // delete stateCoeff delete [] stateCoeff; // fft for shifted states (only occupied states) int qindex = Q_IDX; if( istate < nocc && qindex == 0){ stateCoeffR_shifted = new complex [ndata]; put_into_fftbox(numCoeff, stateCoeff_shifted, fftidx, nfft, in_pointer, doublePack); fft_controller->do_fftw(); fftbox_to_array(ndata, out_pointer, stateCoeffR_shifted, scale); delete [] stateCoeff_shifted; } // delete space used for fftidx for (int i = 0; i < numCoeff; i++) { delete [] fftidx[i]; } delete [] fftidx; // tell the controller that the states are ready contribute(CkCallback(CkReductionTarget(Controller, fftComplete), controller_proxy)); }
void getScannedEdgesNum() { contribute(sizeof(CmiUInt8), &numScannedEdges, CkReduction::sum_long, CkCallback(CkReductionTarget(TestDriver, done), driverProxy)); }