Beispiel #1
0
  void communicate(int iters, bool useTram) {
    GroupMeshStreamer<DataItem, Participant, SimpleMeshRouter> *localStreamer;
    if (useTram) {
      localStreamer = aggregator.ckLocalBranch();
    }

    int ctr = 0;
    for (int i = 0; i < iters; i++) {
      for (int j=0; j<CkNumPes(); j++) {
        if (useTram) {
          localStreamer->insertData(myItem, neighbors[j]);
        }
        else {
          allToAllGroup[neighbors[j]].receive(myItem);
          ctr++;
        }
      }
      if (!useTram) {
        if (ctr == 1024) {
          ctr = 0;
          CthYield();
        }
      }
    }
    if (useTram) {
      localStreamer->done();
    }
    else {
      contribute(CkCallback(CkReductionTarget(Main, allDone), mainProxy));
    }
  }
Beispiel #2
0
		void getScannedVertexNum() {
			CmiUInt8 numScannedVertices = 0;
			typedef std::vector<BFSVertex>::iterator Iterator;
			for (Iterator it = vertices.begin(); it != vertices.end(); it++) 
				numScannedVertices += it->getScannedVertexNum();
			contribute(sizeof(CmiUInt8), &numScannedVertices, CkReduction::sum_long,
								 CkCallback(CkReductionTarget(TestDriver, done),
														driverProxy));
		}
Beispiel #3
0
FFTController::FFTController() {
  first_time = true;
  in_pointer = out_pointer = NULL;

  geps = new GSPACE();
  

  // TODO: A group dependency could probably solve this better
  contribute(CkCallback(CkReductionTarget(Controller, fftControllerReady), controller_proxy));
}
Beispiel #4
0
	allToAll() {
        iter = 0;
        recvCnt = 0;
		msgs = new allToAllMsg*[numChares*msgCount];
		for(int i = 0; i < msgCount*numChares; i++) {
			msgs[i] = new (msgSize) allToAllMsg;
		}

		// reduction to the mainchare to signal that initialization is complete
		contribute(CkCallback(CkReductionTarget(Main,allToAllReady), mainProxy));
	}
Beispiel #5
0
// Coordination for starting simulation, network partition setup
//
void Main::StartSim() {
  CkPrintf("Starting simulation\n");

  // Start simulation
  CkCallback *cb = new CkCallback(CkReductionTarget(Main, StopSim), mainProxy);
  network.ckSetReductionClient(cb);
  network.Cycle();

  // Start timer
  tstart = std::chrono::system_clock::now();
}
Beispiel #6
0
  Main(CkArgMsg* msg) {

    int n = atoi(msg->argv[1]);
    mainProxy = thisProxy;
    
    CkPrintf("n = %d\n",n);
    BProxy = CProxy_B::ckNew(n);
    AProxy = CProxy_A::ckNew(2);
    AProxy.F();

    CkCallback cb = CkCallback(CkReductionTarget(Main, done), thisProxy);
    CkStartQD(cb);

  }
Beispiel #7
0
		Pingping(std::size_t index, uChareSet<Pingping, CProxy_Pingping, CBase_Pingping> *uchareset) : 
			uChare<Pingping, CProxy_Pingping, CBase_Pingping>(index, uchareset)  {
			CkPrintf("[uchare=%d, chare=%d,pe=%d]: created \n", 
					getId(), getuChareSet()->getId(), getuChareSet()->getPe());

			pingDone = pongDone = false;

			pingCounters.resize(N_uChares);
			pingCounters.assign(N_uChares, -1);
			pongCounters.resize(N_uChares);
			pongCounters.assign(N_uChares, 999);

			contribute(CkCallback(CkReductionTarget(Main, start), mainProxy));
		}
Beispiel #8
0
// Coordination for file input, initialized chare arrays
//
void Main::InitSim() {
  // Wait on initialization
  if (++cinit == ninit) {
    CkPrintf("Setting up network parts\n");

    // Load data from input files to network parts
    CkCallback *cb = new CkCallback(CkReductionTarget(Main, StartSim), mainProxy);
    network.ckSetReductionClient(cb);
    network.LoadNetwork(netdata);
    
#ifdef STACS_WITH_YARP
    // Open RPC port
    streamrpc.Open(network);
#endif
  }
}
Beispiel #9
0
  Participant() {

    int numPes = CkNumPes();
    neighbors = new int[numPes];
    for (int i = 0; i < numPes; i++) {
      neighbors[i] = i;
    }

    // shuffle to prevent bottlenecks
    for (int i = numPes-1; i >= 0; i--) {
      int shuffleIndex = rand() % (i+1);
      int temp = neighbors[i];
      neighbors[i] = neighbors[shuffleIndex];
      neighbors[shuffleIndex] = temp;
    }

    contribute(CkCallback(CkReductionTarget(Main, prepare), mainProxy));
  }
Beispiel #10
0
// Coordination for stopping simulation
//
void Main::StopSim() {
  CkPrintf("Stopping simulation\n");
  
  // Stop timer
  tfinish = std::chrono::system_clock::now();
  // Print timing
  std::chrono::duration<real_t> tduration = std::chrono::duration_cast<std::chrono::milliseconds>(tfinish - tstart);
  CkPrintf("Elapsed time (wall clock): %" PRIrealsec " seconds\n", tduration.count());

  // Save data from network parts to output files
  chalt = nhalt = 0;
  network.SaveNetwork();
  ++nhalt;
  network.SaveRecord();
  ++nhalt;
  
  // Set callback for halting
  CkCallback *cb = new CkCallback(CkReductionTarget(Main, FiniSim), mainProxy);
  netdata.ckSetReductionClient(cb);
}
Beispiel #11
0
void PsiCache::receivePsi(PsiMessage* msg) {
  if (msg->spin_index != 0) {
    CkAbort("Error: We don't support multiple spins yet!\n");
  }
  CkAssert(msg->k_index < K);
  CkAssert(msg->state_index < L);
  CkAssert(msg->size == psi_size);
  if(msg->shifted==false){std::copy(msg->psi, msg->psi+psi_size, psis[msg->k_index][msg->state_index]);}
  if(msg->shifted==true){std::copy(msg->psi, msg->psi+psi_size, psis_shifted[msg->k_index][msg->state_index]);}
  delete msg;

  // Once the cache has received all of it's data start the sliding pipeline
  // sending of psis to P to start the accumulation of fxf'.
  int expected_psis = K*L;
  if(qindex == 0)
    expected_psis += K*L;
  if (++received_psis == expected_psis) {
    //CkPrintf("[%d]: Cache filled\n", CkMyPe());
    contribute(CkCallback(CkReductionTarget(Controller,cachesFilled), controller_proxy));
  }
}
Beispiel #12
0
PsiCache::PsiCache() {
  GWBSE *gwbse = GWBSE::get();
  K = gwbse->gw_parallel.K;
  L = gwbse->gw_parallel.L;
  qindex = Q_IDX;
  psi_size = gwbse->gw_parallel.n_elems;
  pipeline_stages = gwbse->gw_parallel.pipeline_stages;
  received_psis = 0;
  received_chunks = 0;
  psis = new complex**[K];
  for (int k = 0; k < K; k++) {
    psis[k] = new complex*[L];
    for (int l = 0; l < L; l++) {
      psis[k][l] = new complex[psi_size];
    }
  }
  // shifted k grid psis. Need this for qindex=0
  psis_shifted = new complex**[K];
  for (int k = 0; k < K; k++) {
    psis_shifted[k] = new complex*[L];
    for (int l = 0; l < L; l++) {
      psis_shifted[k][l] = new complex[psi_size];
    }
  }

  fs = new complex[L*psi_size*pipeline_stages];

  umklapp_factor = new complex[psi_size];

  // Variables for chare region registration
  min_row = INT_MAX;
  min_col = INT_MAX;
  max_row = INT_MIN;
  max_col = INT_MIN;
  tile_lock = CmiCreateLock();

  total_time = 0.0;
  contribute(CkCallback(CkReductionTarget(Controller,psiCacheReady), controller_proxy));
}
Beispiel #13
0
// Main entry point
//
Main::Main(CkArgMsg *msg) {
  // Display title
  CkPrintf("Simulation Tool for Asynchrnous Cortical Streams (stacs)\n");

  // Command line arguments
  std::string configfile;
  if (msg->argc < 2) {
    configfile = "config.yml"; // default
  }
  else {
    configfile = msg->argv[1];
  }
  delete msg;

  // Parsing config
  if (ParseConfig(configfile)) {
    CkPrintf("Error loading config...\n");
    CkExit();
  }

  // Charm information
  real_t netpe = (real_t)npnet/CkNumPes();
  if (netpe < 1) { netpe = 1; }

  // Display configuration information
  CkPrintf("Loaded config from %s\n"
           "  Data Files (npdat):     %" PRIidx "\n"
           "  Network Parts (npnet):  %" PRIidx "\n"
           "  Processing Elements:    %d\n"
           "  Network Parts per PE:   %.2g\n"
           "  Total Simulation Time (tmax): %" PRItick "\n"
           "  Simulation Time Step (tstep): %" PRItick "\n"
           "  Checkpoint Interval (tcheck): %" PRItick "\n",
           configfile.c_str(), npdat, npnet,
           CkNumPes(), netpe, tmax, tstep, tcheck);

  // Read vertex distribution
  CkPrintf("Initializing simulation\n");
  if (ReadDist()) {
    CkPrintf("Error loading distribution...\n");
    CkExit();
  }
  // Read model information
  if (ReadModel()) {
    CkPrintf("Error loading models...\n");
    CkExit();
  }
  
  // Setup Charm++ variables
  mainProxy = thisProxy;
  mCastGrpId = CProxy_CkMulticastMgr::ckNew();

  // Initialize coordination
  cinit = 0;
  ninit = 0;

#ifdef STACS_WITH_YARP
  // Initialize YARP
  yarp.init();
#endif

  // Setup chare arrays
  CkCallback *cb = new CkCallback(CkReductionTarget(Main, InitSim), mainProxy);
  // netdata
  ++ninit;
  mDist *mdist = BuildDist();
  netdata = CProxy_NetData::ckNew(mdist, npdat);
  netdata.ckSetReductionClient(cb);
  // network
  ++ninit;
  mModel *mmodel = BuildModel();
  network = CProxy_Network::ckNew(mmodel, npnet);
  network.ckSetReductionClient(cb);
#ifdef STACS_WITH_YARP
  // streamrpc
  ++ninit;
  mVtxDist *mvtxdist = BuildVtxDist();
  streamrpc = CProxy_StreamRPC::ckNew(mvtxdist);
#endif
}
Beispiel #14
0
		void call_contribute(/*CkReduction::reducerType op,*/ const CmiUInt8 & v) { 
			contribute(sizeof(CmiUInt8), &v, CkReduction::sum_long,
					        CkCallback(CkReductionTarget(Main, verify_contribute), mainProxy));
		}
Beispiel #15
0
		Hello(const uChareAttr_Hello &attr) : CBase_uChare_Hello(attr) {
			//CkPrintf("[uchare=%d, chare=%d,pe=%d]: created \n", 
			//		getId(), getuChareSet()->getId(), getuChareSet()->getPe());

			contribute(CkCallback(CkReductionTarget(Main, init), mainProxy));
		}
Beispiel #16
0
	void finish(){
	    recvCnt = 0;	
        contribute(CkCallback(CkReductionTarget(Main,nextallToAll), mainProxy));
	}
Beispiel #17
0
		Hello_charm_ref() {
			//CkPrintf("[uchare=%d, chare=%d,pe=%d]: created \n", 
			//		getId(), getuChareSet()->getId(), getuChareSet()->getPe());

			contribute(CkCallback(CkReductionTarget(Main, start), mainProxy));
		}
Beispiel #18
0
	void getScannedVertexNum() {
		CmiUInt8 c = (parent == -1 ? 0 : 1);
		contribute(sizeof(CmiUInt8), &c, CkReduction::sum_long,
				CkCallback(CkReductionTarget(TestDriver, done), driverProxy));
	}
Beispiel #19
0
FVectorCache::FVectorCache() {
  eps_chares_x = 7;
  eps_chares_y = 7;
  totalSize = 0;
  GWBSE *gwbse = GWBSE::get();
  L = gwbse->gw_parallel.L;
  int total_eps_chares = eps_chares_x*eps_chares_y;

  my_chare_count = total_eps_chares/CkNumNodes();

  my_chare_start = CkMyNode()*my_chare_count;
  int remaining = total_eps_chares%CkNumNodes();

  if(CkMyNode()>0)
    my_chare_start += remaining;

  if(CkMyNode()==0)
    my_chare_count += remaining;

  my_eps_chare_indices_x = new int[my_chare_count];
  my_eps_chare_indices_y = new int[my_chare_count];

  findIndices();
  int count = 0;
  for(int i=eps_start_chare_x;i<=eps_end_chare_x;i++){
    int j = 0;
    if(i==eps_start_chare_x)
      j = eps_start_chare_y;
    int j_end = eps_chares_y-1;
    if(i==eps_end_chare_x)
      j_end = eps_end_chare_y;
    while(j<=j_end){
      my_eps_chare_indices_x[count] = i;
      my_eps_chare_indices_y[count++] = j;
      j++;
    }
  }

  ndata = gwbse->gw_parallel.n_elems;
  data_size_x = ndata/eps_chares_x;
  if(ndata%eps_chares_x > 0)
    data_size_x += 2;
  data_size_y = ndata/eps_chares_y;
    if(ndata%eps_chares_y > 0)
      data_size_y += 2;
  data_offset_x = new int[my_chare_count];
  data_offset_y = new int[my_chare_count];

  for(int i=0;i<my_chare_count;i++){
    data_offset_x[i] = my_eps_chare_indices_x[i]*data_size_x;
    data_offset_y[i] = my_eps_chare_indices_y[i]*data_size_y;
  }

  int size_x = data_size_x;
  int size_y = data_size_y;
  local_offset =  new int[my_chare_count*2];
  global_offset = new int[my_chare_count*2];
  for(int i=0;i<my_chare_count;i++){
    global_offset[2*i] = data_offset_x[i];//totalSize;
    local_offset[2*i] = totalSize;
    totalSize += size_x;

    global_offset[2*i+1] = data_offset_y[i];//totalSize;
    local_offset[2*i+1] = totalSize;
    totalSize += size_y;
  }

  fs = new complex[NSIZE*L*totalSize];

  contribute(CkCallback(CkReductionTarget(Controller,fCacheReady), controller_proxy));
}
Beispiel #20
0
void PsiCache::setVCoulb(std::vector<double> vcoulb_in){
  vcoulb = vcoulb_in;
  contribute(CkCallback(CkReductionTarget(Controller,prepare_epsilon), controller_proxy));
}
Beispiel #21
0
// Receive an unoccupied psi, and split off the computation of all associated f
// vectors across the node using CkLoop.
void PsiCache::computeFs(PsiMessage* msg) {
  double start = CmiWallTimer();

  if (msg->spin_index != 0) {
    CkAbort("Error: We don't support multiple spins yet!\n");
  }
  CkAssert(msg->size == psi_size);

  // Compute ikq index and the associated umklapp factor
  // TODO: This should just be a table lookup
  unsigned ikq;
  int umklapp[3];
  kqIndex(msg->k_index, ikq, umklapp);

  bool uproc = false;
  if (umklapp[0] != 0 || umklapp[1] != 0 || umklapp[2] != 0) {
    uproc = true;
    computeUmklappFactor(umklapp);
  }

  GWBSE* gwbse = GWBSE::get();
  double*** e_occ = gwbse->gw_epsilon.Eocc;
  double*** e_occ_shifted = gwbse->gw_epsilon.Eocc_shifted;
  double*** e_unocc = gwbse->gw_epsilon.Eunocc;

  // Create the FComputePacket for this set of f vectors and start CkLoop
  f_packet.size = psi_size;
  f_packet.unocc_psi = msg->psi;

  if ( qindex == 0 ) { 
    f_packet.occ_psis = psis_shifted[ikq]; 
    f_packet.e_occ = e_occ_shifted[msg->spin_index][ikq];
  }
  else { 
    f_packet.occ_psis = psis[ikq];
    f_packet.e_occ = e_occ[msg->spin_index][ikq]; 
  }
  f_packet.e_unocc = e_unocc[msg->spin_index][msg->k_index][msg->state_index-L];
  f_packet.fs = fs + (L*psi_size*(received_chunks%pipeline_stages));

  if (uproc) { f_packet.umklapp_factor = umklapp_factor; }
  else { f_packet.umklapp_factor = NULL; }

#ifdef USE_CKLOOP
  CkLoop_Parallelize(computeF, 1, &f_packet, L, 0, L - 1);
#else
  for (int l = 0; l < L; l++) {
    computeF(l,l,NULL,1,&f_packet);
  }
#endif
  received_chunks++;


#ifdef TESTING
{
  FVectorCache *fvec_cache = fvector_cache_proxy.ckLocalBranch();
  fvec_cache->computeFTilde(fs);
//  fvec_cache->applyCutoff(msg->accept_size, msg->accept);
//  fvec_cache->init(140);
//compute ftilde first - similar to ckloop above for all L's
  fvec_cache->putFVec(msg->state_index-L, fs);
}
#endif

  // Let the matrix chares know that the f vectors are ready
  CkCallback cb(CkReductionTarget(PMatrix, applyFs), pmatrix2D_proxy);
  contribute(cb);

  // Cleanup
  delete msg;
  total_time += CmiWallTimer() - start;
}
Beispiel #22
0
 void barrier() {
   contribute (CkCallback(CkReductionTarget(Worker,  barrierH), workerarray));
   t = CthSelf();
   CthSuspend();
 } 
Beispiel #23
0
void States::fftGtoR() {

  // Set up the FFT data structures in the FFTController
  FFTController* fft_controller = fft_controller_proxy.ckLocalBranch();
  int backward = 1;

  fft_controller->setup_fftw_3d(nfft,backward);

  fftw_complex* in_pointer = fft_controller->get_in_pointer();
  fftw_complex* out_pointer = fft_controller->get_out_pointer();
  
   // we need to setup fftidx
  int *g[3]; // put_into_fftbox routine takes 2D g array, so we need to do this
  g[0] = ga;
  g[1] = gb;
  g[2] = gc;

  int **fftidx;
  fftidx = new int *[numCoeff];
  for(int i=0; i<numCoeff;i++){ fftidx[i] = new int [3]; }

  // this routine changes negative g index to be a positive numbers
  // since it is origianlly written with Fortran, fftidx has fortran counting,
  // i.e., if gidx is (0,0,0), then (1,1,1) in fftidx
  gidx_to_fftidx(numCoeff, g, nfft, fftidx);

  // state coefficients are copied to in_pointer
  // put_into_fftbox was originally written for doublePack = 0 (false)
  // for gamma point calculation, put_into_fftbox has been modified from the original version
  put_into_fftbox(numCoeff, stateCoeff, fftidx, nfft, in_pointer, doublePack);
  
  // tell the FFTController to do the fft
  fft_controller->do_fftw();

  // transfer data from out_pointer to stateCoeffR
  // malloc stateRspace first
  int ndata = nfft[0]*nfft[1]*nfft[2];
  stateCoeffR = new complex [ndata];
  double scale = sqrt(1.0/double(ndata)); // IFFT requires normalization
  fftbox_to_array(ndata, out_pointer, stateCoeffR, scale);

  // delete stateCoeff
  delete [] stateCoeff;

  // fft for shifted states (only occupied states)
  int qindex = Q_IDX;
  if( istate < nocc && qindex == 0){
    stateCoeffR_shifted = new complex [ndata];
    put_into_fftbox(numCoeff, stateCoeff_shifted, fftidx, nfft, in_pointer, doublePack);
    fft_controller->do_fftw();
    fftbox_to_array(ndata, out_pointer, stateCoeffR_shifted, scale);
    delete [] stateCoeff_shifted;
  }

  // delete space used for fftidx
  for (int i = 0; i < numCoeff; i++) { delete [] fftidx[i]; }
  delete [] fftidx;

  // tell the controller that the states are ready
  contribute(CkCallback(CkReductionTarget(Controller, fftComplete), controller_proxy));
  
}
Beispiel #24
0
	void getScannedEdgesNum() {
    contribute(sizeof(CmiUInt8), &numScannedEdges, CkReduction::sum_long,
               CkCallback(CkReductionTarget(TestDriver, done),
                          driverProxy));
	}