Beispiel #1
0
    void check_and_compute() {
        compute_kernel();

        // calculate error
        // not being done right now since we are doing a fixed no. of iterations

        double *tmp;
        tmp = temperature;
        temperature = new_temperature;
        new_temperature = tmp;

        constrainBC();

        if (iterations % CKP_FREQ == 0 || iterations > MAX_ITER) {
#ifdef CMK_MEM_CHECKPOINT
            contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::report(), mainProxy));
#elif CMK_MESSAGE_LOGGING
            if(iterations > MAX_ITER)
                contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::report(), mainProxy));
            else
                AtSync();
#else
            contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::report(), mainProxy));
#endif
        } else {
            doStep();
        }
    }
Beispiel #2
0
void CP_Rho_GSpacePlane::divRhoVksGspace() {

  double tpi,*hmati;

  CPXCFNCTS::CP_fetch_hmati(&hmati,&tpi);

  memset(divRhoY, 0, sizeof(complex) * myGrid_size);
  memset(divRhoZ, 0, sizeof(complex) * myGrid_size);
  double gx,gy,gz;

  std::vector< gridPoint > & points = (*myPoints);
  double sumX = 0, sumY = 0, sumZ = 0;
  for(int p = 0; p < numPoints; p++) {
    int offset = points[p].offset;
    gx = tpi * (points[p].d3 * hmati[1] + points[p].d2 * hmati[2] +
        points[p].d1 * hmati[3]);
    gy = tpi * (points[p].d3 * hmati[4] + points[p].d2 * hmati[5] +
        points[p].d1 * hmati[6]);
    gz = tpi * (points[p].d3 * hmati[7] + points[p].d2 * hmati[8] +
        points[p].d1 * hmati[9]);
    complex tmp = (divRhoX[offset].multiplyByi())*(-1.0);
    divRhoX[offset] = tmp * gx;
    divRhoY[offset] = tmp * gy;
    divRhoZ[offset] = tmp * gz;
#if _CP_DEBUG_RHOG_VERBOSE_
    sumX += divRhoX[offset].re + divRhoX[offset].im;
    sumY += divRhoY[offset].re + divRhoY[offset].im;
    sumZ += divRhoZ[offset].re + divRhoZ[offset].im;
#endif
  }//endfor

#if _CP_DEBUG_RHOG_VERBOSE_
  CkPrintf("{%d} Rho GS [%d] divSums %lf %lf %lf\n", thisInstance.proxyOffset, thisIndex,
    sumX, sumY, sumZ);
#endif

  Charm_doBackwardFFT(CkCallback(CkIndex_CP_Rho_RealSpacePlane::acceptGradRhoVks(),
        UrhoRealProxy[thisInstance.proxyOffset]),
        Urho_fft_xProxy[thisInstance.proxyOffset], fft_xoffset,
        1 / simReadOnly.vol);
  Charm_doBackwardFFT(CkCallback(CkIndex_CP_Rho_RealSpacePlane::acceptGradRhoVks(),
        UrhoRealProxy[thisInstance.proxyOffset]),
        Urho_fft_yProxy[thisInstance.proxyOffset], fft_yoffset,
        1 / simReadOnly.vol);
  Charm_doBackwardFFT(CkCallback(CkIndex_CP_Rho_RealSpacePlane::acceptGradRhoVks(),
        UrhoRealProxy[thisInstance.proxyOffset]),
        Urho_fft_zProxy[thisInstance.proxyOffset], fft_zoffset,
        1 / simReadOnly.vol);

  //---------------------------------------------------------------------------
}//end routine
Beispiel #3
0
void Compute::resetArrays() {
  int indexX = thisIndex.x;
  int indexY = thisIndex.y;
  int indexZ = thisIndex.z;
  
  float tmp;
  
  for(int i=indexZ*subBlockDimXz; i<(indexZ+1)*subBlockDimXz; i++)
    for(int j=0; j<blockDimY; j++) {
      tmp = (float)drand48(); 
      while(tmp > MAX_LIMIT || tmp < (-1)*MAX_LIMIT)
        tmp = (float)drand48();

      A[i*blockDimY + j] = tmp;
  }

  for(int j=indexX*subBlockDimYx; j<(indexX+1)*subBlockDimYx; j++)
    for(int k=0; k<blockDimZ; k++) {
      tmp = (float)drand48();
      while(tmp > MAX_LIMIT || tmp < (-1)*MAX_LIMIT)
        tmp = (float)drand48();

      B[j*blockDimZ + k] = tmp;
  }

  for(int i=0; i<blockDimX; i++)
    for(int k=0; k<blockDimZ; k++) {
      C[i*blockDimZ + k] = 0.0;
#if USE_CKDIRECT
      tmpC[i*blockDimZ + k] = 0.0;
#endif
    }

  contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::resetDone(), mainProxy));
}
Beispiel #4
0
void Compute::receiveC(float *data, int size, int who) {
  int indexY = thisIndex.y;
  if(who) {
    for(int i=0; i<subBlockDimXy; i++)
      for(int k=0; k<blockDimZ; k++)
	C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k] += data[i*blockDimZ + k];
  }
  countC++;
  if(countC == num_chare_y) {
    /*char name[30];
    sprintf(name, "%s_%d_%d_%d", "C", thisIndex.x, thisIndex.y, thisIndex.z);
    FILE *fp = fopen(name, "w");
    for(int i=0; i<subBlockDimXy; i++) {
      for(int k=0; k<blockDimZ; k++)
	fprintf(fp, "%f ", C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k]);
      fprintf(fp, "\n");
    }
    fclose(fp);*/

    // counters to keep track of how many messages have been received
    countA = 0;
    countB = 0;
    countC = 0;

    contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::done(), mainProxy));
    // mainProxy.done();
  }
}
Beispiel #5
0
  void check_and_compute ()
  {
//       if (--messages_due == 0) 
//          messages_due = 4;
      compute ();
//          mainProxy.report();
      if (thisIndex < majElements - 1)
	{
//                      printf("DONE WITH index=%d and calling for ind=%d\n",thisIndex,thisIndex+1);
#ifdef PRIOR
	  opts = new CkEntryOptions ();
	  opts1 = new CkEntryOptions ();
	  opts->setPriority (-100);
	  opts1->setPriority (100);

//printf("-------- Jacobi[%d] sending message to next one at time=%f\n",thisIndex,CkWallTimer());
	  thisProxy[thisIndex + 1].begin_iteration (1, opts);
        for(int i=(thisIndex+1)*7;i<(thisIndex+1)*7+7;i++)
                minorProxy[i].begin_iteration(1,opts1);
#else
	  thisProxy[thisIndex + 1].begin_iteration (1);
	  for (int i = (thisIndex + 1) * 7; i < (thisIndex + 1) * 7 + 7; i++)
	    minorProxy[i].begin_iteration (1);
#endif
	}
      else
	{
//                      printf("CAlling report Jacobi[%d] time=%f!!!!!!!!!!1\n",thisIndex,CkWallTimer());

//	  else
//	    mainProxy.report ();
	}
	if (iterations % ldbTime == 4) AtSync();
	else contribute(CkCallback(CkIndex_Main::report(NULL),mainProxy));
  }
Beispiel #6
0
void Compute::recvHandle(infiDirectUserHandle shdl, int index, int arr) {
  // --- B --- | --- C --- | --- A ---
  if(arr == SENDA) {
    sHandles[num_chare_x + num_chare_y + index] = shdl;
    CkDirect_assocLocalBuffer(&sHandles[num_chare_x + num_chare_y + index], &A[thisIndex.z*subBlockDimXz*blockDimY], sizeof(float)*subBlockDimXz*blockDimY);
    countA++;
  }

  if(arr == SENDB) {
    sHandles[index] = shdl;
    CkDirect_assocLocalBuffer(&sHandles[index], &B[thisIndex.x*subBlockDimYx*blockDimZ], sizeof(float)*subBlockDimYx*blockDimZ);
    countB++;
  }

  if(arr == SENDC) {
    sHandles[num_chare_x + index] = shdl;
    CkDirect_assocLocalBuffer(&sHandles[num_chare_x + index], &C[index*subBlockDimXy*blockDimZ], sizeof(float)*subBlockDimXy*blockDimZ);
    countC++;
  }

  if(countA == num_chare_z-1 && countB == num_chare_x-1 && countC == num_chare_y-1) {
    countA = 0;
    countB = 0;
    countC = 0;
    contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::setupDone(), mainProxy));
    // mainProxy.setupDone();
  }
}
Beispiel #7
0
  void communicate(int iters, bool useTram) {
    GroupMeshStreamer<DataItem, Participant, SimpleMeshRouter> *localStreamer;
    if (useTram) {
      localStreamer = aggregator.ckLocalBranch();
    }

    int ctr = 0;
    for (int i = 0; i < iters; i++) {
      for (int j=0; j<CkNumPes(); j++) {
        if (useTram) {
          localStreamer->insertData(myItem, neighbors[j]);
        }
        else {
          allToAllGroup[neighbors[j]].receive(myItem);
          ctr++;
        }
      }
      if (!useTram) {
        if (ctr == 1024) {
          ctr = 0;
          CthYield();
        }
      }
    }
    if (useTram) {
      localStreamer->done();
    }
    else {
      contribute(CkCallback(CkReductionTarget(Main, allDone), mainProxy));
    }
  }
Beispiel #8
0
//============================================================================
void CP_Rho_GSpacePlane::acceptWhiteByrd() {
  //============================================================================

#ifdef _CP_DEBUG_RHOG_VERBOSE_
  CkPrintf("{%d} Rho GS [%d] acceptWhiteByrd_%d\n", thisInstance.proxyOffset, thisIndex,
      doneWhiteByrd);
#endif

  doneWhiteByrd++;

  // When all 3 gradients are in g-space, then you will ready for the next step.
  if(doneWhiteByrd == 3){
    doneWhiteByrd = 0;
    /** The partially FFT'ed white byrd correction to VKS arrives to RhoG
      and ffts invoked. Only happens if gradient corrections are on.  */
#if CMK_TRACE_ENABLED
    double StartTime=CmiWallTimer();
#endif

    //============================================================================
    // Compute my whiteByrd : store it in divrhox
    double tpi,*hmati;
    CPXCFNCTS::CP_fetch_hmati(&hmati, &tpi);

    double gx, gy, gz;
    complex *whitebyrd = divRhoX; // zeroing done carefully inside loop

    complex zero;
    zero.re = 0.0; zero.im = 0.0;
    std::vector< gridPoint > & points = (*myPoints);
    int last_offset = -1;
    for(int p = 0; p < numPoints; p++) {
      int offset = points[p].offset;
      if(offset != (last_offset + 1)) {
        for(int cur_off = last_offset + 1; cur_off < offset; cur_off++) {
          whitebyrd[cur_off] = zero;
        }
      }
      gx = tpi * (points[p].d3 * hmati[1] + points[p].d2 * hmati[2] +
          points[p].d1 * hmati[3]);
      gy = tpi * (points[p].d3 * hmati[4] + points[p].d2 * hmati[5] +
          points[p].d1 * hmati[6]);
      gz = tpi * (points[p].d3 * hmati[7] + points[p].d2 * hmati[8] +
          points[p].d1 * hmati[9]);
      complex tmp = divRhoX[offset]*gx + divRhoY[offset]*gy + divRhoZ[offset]*gz;
      whitebyrd[offset] = tmp.multiplyByi()*(-1.0);
      last_offset = offset;
    }

    for(int cur_off = last_offset + 1; cur_off < myGrid_size; cur_off++) {
      whitebyrd[cur_off] = zero;
    }

    Charm_doBackwardFFT(CkCallback(CkIndex_CP_Rho_RealSpacePlane::acceptWhiteByrd(),
          UrhoRealProxy[thisInstance.proxyOffset]),
          Urho_fft_xProxy[thisInstance.proxyOffset], fft_xoffset);
    myTime++;
  }
    //---------------------------------------------------------------------------
}//end routine
Beispiel #9
0
  void ResumeFromSync ()
  {
//      printf("Jacobi[%d] calling resumeSync\n",thisIndex);
//    if (thisIndex == 0)
//      mainProxy.report ();
//CkPrintf("Coming in MAJ MAJ MAJ RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR ++++++++\n");
	contribute(CkCallback(CkIndex_Main::report(NULL),mainProxy));
  }
Beispiel #10
0
		void getScannedVertexNum() {
			CmiUInt8 numScannedVertices = 0;
			typedef std::vector<BFSVertex>::iterator Iterator;
			for (Iterator it = vertices.begin(); it != vertices.end(); it++) 
				numScannedVertices += it->getScannedVertexNum();
			contribute(sizeof(CmiUInt8), &numScannedVertices, CkReduction::sum_long,
								 CkCallback(CkReductionTarget(TestDriver, done),
														driverProxy));
		}
Beispiel #11
0
FFTController::FFTController() {
  first_time = true;
  in_pointer = out_pointer = NULL;

  geps = new GSPACE();
  

  // TODO: A group dependency could probably solve this better
  contribute(CkCallback(CkReductionTarget(Controller, fftControllerReady), controller_proxy));
}
Beispiel #12
0
void PsiCache::reportFTime() {
  CkReduction::statisticsElement stats(total_time);
  int tuple_size = 2;
  CkReduction::tupleElement tuple_reduction[] = {
    CkReduction::tupleElement(sizeof(double), &total_time, CkReduction::max_double),
    CkReduction::tupleElement(sizeof(CkReduction::statisticsElement), &stats, CkReduction::statistics) };

  CkReductionMsg* msg = CkReductionMsg::buildFromTuple(tuple_reduction, tuple_size);
  msg->setCallback(CkCallback(CkIndex_Controller::reportFTime(NULL), controller_proxy));
  contribute(msg);
}
Beispiel #13
0
	allToAll() {
        iter = 0;
        recvCnt = 0;
		msgs = new allToAllMsg*[numChares*msgCount];
		for(int i = 0; i < msgCount*numChares; i++) {
			msgs[i] = new (msgSize) allToAllMsg;
		}

		// reduction to the mainchare to signal that initialization is complete
		contribute(CkCallback(CkReductionTarget(Main,allToAllReady), mainProxy));
	}
Beispiel #14
0
		/*entry*/ void start() {
			CkPrintf("Main: run calculations...\n");
			CkPrintf("Main: start...\n");
			startt = CkWallTimer();

			alltoall_proxy->run(CkCallback(CkIndex_Main::done(), thisProxy));

			//for (int i = 0; i < N_uChares; i++)
			//	(*hello_proxy)[i]->ping(0);
			alltoall_proxy->start();
			//alltoall_proxy->flush();
		}
Beispiel #15
0
 Main(CkArgMsg *m) {
   CkPrintf("running SDAG migration test\n");
   CProxy_Test testProxy = CProxy_Test::ckNew(NUM_ELEMS);
   testProxy.wrapper(100, 200);
   for (int i = 0; i < NUM_ELEMS; i++) {
     char str[100];
     sprintf(str, "test %d", i);
     Msg* m = new (strlen(str) + 1) Msg(i, str);
     testProxy[i].method2(i * 2, i * 2 + 1);
     testProxy[i].method3(m);
     testProxy[i].methodA();
   }
   CkStartQD(CkCallback(CkIndex_Main::finished(), thisProxy));
 }
Beispiel #16
0
  Main(CkArgMsg* msg) {

    int n = atoi(msg->argv[1]);
    mainProxy = thisProxy;
    
    CkPrintf("n = %d\n",n);
    BProxy = CProxy_B::ckNew(n);
    AProxy = CProxy_A::ckNew(2);
    AProxy.F();

    CkCallback cb = CkCallback(CkReductionTarget(Main, done), thisProxy);
    CkStartQD(cb);

  }
Beispiel #17
0
		Pingping(std::size_t index, uChareSet<Pingping, CProxy_Pingping, CBase_Pingping> *uchareset) : 
			uChare<Pingping, CProxy_Pingping, CBase_Pingping>(index, uchareset)  {
			CkPrintf("[uchare=%d, chare=%d,pe=%d]: created \n", 
					getId(), getuChareSet()->getId(), getuChareSet()->getPe());

			pingDone = pongDone = false;

			pingCounters.resize(N_uChares);
			pingCounters.assign(N_uChares, -1);
			pongCounters.resize(N_uChares);
			pongCounters.assign(N_uChares, 999);

			contribute(CkCallback(CkReductionTarget(Main, start), mainProxy));
		}
Beispiel #18
0
void Compute::receiveC() {
  int indexX = thisIndex.x;
  int indexY = thisIndex.y;
  int indexZ = thisIndex.z;

  // copy C from tmpC to the correct location
  for(int j=0; j<num_chare_y; j++) {
    if( j != indexY) {
      for(int i=0; i<subBlockDimXy; i++)
	for(int k=0; k<blockDimZ; k++)
	  C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k] += tmpC[j*subBlockDimXy*blockDimZ + i*blockDimZ + k];
    }
  }
  /*char name[30];
  sprintf(name, "%s_%d_%d_%d", "C", thisIndex.x, thisIndex.y, thisIndex.z);
  FILE *fp = fopen(name, "w");
  for(int i=0; i<subBlockDimXy; i++) {
    for(int k=0; k<blockDimZ; k++)
      fprintf(fp, "%f ", C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k]);
    fprintf(fp, "\n");
  }
  fclose(fp);
  CkPrintf("%d_%d_%d\n", thisIndex.x, thisIndex.y, thisIndex.z);
  for(int i=0; i<subBlockDimXy; i++) {
    for(int k=0; k<blockDimZ; k++)
      CkPrintf("%f ", C[indexY*subBlockDimXy*blockDimZ + i*blockDimZ + k]);
    CkPrintf("\n");
  }*/

  // call ready for the buffers
  for(int i=0; i<num_chare_x; i++)
    if(i != indexX)
      CkDirect_ready(&rHandles[i]);

  for(int j=0; j<num_chare_y; j++)
    if(j != indexY)
      CkDirect_ready(&rHandles[num_chare_x + j]);
  
  for(int k=0; k<num_chare_z; k++)
    if(k != indexZ)
      CkDirect_ready(&rHandles[num_chare_x + num_chare_y + k]);

  // counters to keep track of how many messages have been received
  countA = 0;
  countB = 0;
  countC = 0;

  contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::done(), mainProxy));
  // mainProxy.done();
}
Beispiel #19
0
void Workers::complete() {
  int size = matrixSize * matrixSize * sizeof(ElementType); 
  memcpy(C, h_C, size); 
#ifdef DEBUG
  CkPrintf("[%d] A\n", thisIndex); 
  for (int i=0; i<matrixSize; i++) {
    CkPrintf("[%d] ", thisIndex);
    for (int j=0; j<matrixSize; j++) {
      CkPrintf("%.2f ", A[i*matrixSize+j]); 
    }
    CkPrintf("\n");
  }
  CkPrintf("[%d] B\n", thisIndex); 
  for (int i=0; i<matrixSize; i++) {
    CkPrintf("[%d] ", thisIndex);
    for (int j=0; j<matrixSize; j++) {
      CkPrintf("%.2f ", B[i*matrixSize+j]); 
    }
    CkPrintf("\n");
  }
  CkPrintf("[%d] C\n", thisIndex);
  for (int i=0; i<matrixSize; i++) {
    CkPrintf("[%d] ", thisIndex);
    for (int j=0; j<matrixSize; j++) {
      if(useCublas)
        CkPrintf("%.2f ", C[j*matrixSize+i]);
      else
        CkPrintf("%.2f ", C[i*matrixSize+j]);
    }
    CkPrintf("\n");
  }
  CkPrintf("[%d] C-gold\n", thisIndex);
  for (int i=0; i<matrixSize; i++) {
    CkPrintf("[%d] ", thisIndex);
    for (int j=0; j<matrixSize; j++) {
      C[i*matrixSize + j] = 0; 
      for (int k=0; k<matrixSize; k++) {
	C[i*matrixSize + j] += A[i*matrixSize +k] * B[k * matrixSize + j];
      }
      CkPrintf("%.2f ", C[i*matrixSize+j]); 
    }
    CkPrintf("\n");
  }

#endif

  contribute(CkCallback(CkIndex_Main::finishWork(NULL), mainProxy));
}
Beispiel #20
0
// Function that checks whether it must start the following step or wait until other messages are received
void Patch::checkNextStep(){
  int i;
  double timer;

  if (updateFlag && incomingFlag) {
    // resetting flags
    updateFlag = false;
    incomingFlag = false;
    stepCount++;

    // adding new elements
    for (i = 0; i < incomingParticles.length(); i++)
      particles.push_back(incomingParticles[i]);
    incomingParticles.removeAll();

    if (thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0 && stepCount%NUM_STEPS == 0) {
      timer = CmiWallTimer();
      CkPrintf("Step %d Benchmark Time %f ms/step, Total Time Elapsed %f s\n", stepCount, ((timer - stepTime)/NUM_STEPS)*1000, timer);
      stepTime = timer;
//      if (stepCount == 300)
//	traceBegin();
  //    if (stepCount == 400)
//	traceEnd();

    }
 //   if (stepCount == 300 && thisIndex.x*patchArrayDimY*patchArrayDimZ + thisIndex.y*patchArrayDimZ + thisIndex.z < 8)
 //     traceBegin();
 //   if (stepCount == 301 && thisIndex.x*patchArrayDimY*patchArrayDimZ + thisIndex.y*patchArrayDimZ + thisIndex.z < 8)
 //     traceEnd();

    // checking for next step
    if (stepCount >= finalStepCount) {
     // CkPrintf("Final number of particles is %d on Patch [%d][%d][%d]\n", particles.length(), thisIndex.x, thisIndex.y, thisIndex.z);
      print();
      contribute(CkCallback(CkIndex_Main::allDone(), mainProxy)); 
    } else {
      if (perform_lb){
	AtSync();
	LBTurnInstrumentOff();
	perform_lb=false;
      }
      else{
	thisProxy(thisIndex.x, thisIndex.y, thisIndex.z).start();
	//contribute(CkCallback(CkIndex_Main::lbBarrier(),mainProxy));
      }
    }
  }
}
Beispiel #21
0
  Participant() {

    int numPes = CkNumPes();
    neighbors = new int[numPes];
    for (int i = 0; i < numPes; i++) {
      neighbors[i] = i;
    }

    // shuffle to prevent bottlenecks
    for (int i = numPes-1; i >= 0; i--) {
      int shuffleIndex = rand() % (i+1);
      int temp = neighbors[i];
      neighbors[i] = neighbors[shuffleIndex];
      neighbors[shuffleIndex] = temp;
    }

    contribute(CkCallback(CkReductionTarget(Main, prepare), mainProxy));
  }
Beispiel #22
0
  void check_and_compute ()
  {
//       if (--messages_due == 0) 

//          messages_due = 4;
    compute ();
    if (iterations % ldbTime == 4/* || iterations == 100*/)
      {
//                        printf("MINOR[%d] itr=%d ----------------------------- ssssssssssssss\n",thisIndex,iterations);
	AtSync ();
      }

    else
//      mainProxy.report ();
	contribute(CkCallback(CkIndex_Main::report(NULL),mainProxy));


  }
Beispiel #23
0
/* Default constructor */
Patch::Patch(FileDataMsg* fdmsg) {
  LBTurnInstrumentOff();
  inbrs = numNbrs;
  usesAtSync = CmiTrue;

  updateCount = 0;
  forceCount = 0;
  stepCount = 0;
  resumeCount = 0;
  updateFlag = false;
  incomingFlag = false;
  perform_lb = false;
  incomingParticles.resize(0);
  // setMigratable(CmiFalse);

  int i;

  // Particle initialization
  myNumParts = 0;
  for(i=0; i < fdmsg->length; i++) {
    particles.push_back(Particle());
    particles[myNumParts].charge = fdmsg->charge[i];
    particles[myNumParts].mass = fdmsg->mass[i];

    particles[myNumParts].x = fdmsg->coords[i].x;
    particles[myNumParts].y = fdmsg->coords[i].y;
    particles[myNumParts].z = fdmsg->coords[i].z;

    particles[myNumParts].vx = 0;
    particles[myNumParts].vy = 0;
    particles[myNumParts].vz = 0;
    particles[myNumParts].fx = 0;
    particles[myNumParts].fy = 0;
    particles[myNumParts].fz = 0;

    particles[myNumParts].id = (thisIndex.x*patchArrayDimX + thisIndex.y) * numParts / (patchArrayDimX*patchArrayDimY)  + i;

    particles[myNumParts].vdw_type = fdmsg->vdw_type[i];
    myNumParts++;
  }

  delete fdmsg;
  contribute(CkCallback(CkIndex_Main::startUpDone(), mainProxy));
}
Beispiel #24
0
    void run() {
      for (int i = 0 ; i < numelements; i++) {
        
        if(thisIndex % 2 == 0 && thisIndex != numelements -1 ) {
          myMsg* m = workerarray[thisIndex + 1].sendSmaller(val);
          val = m->val;
          delete m;
        } 
        barrier();
        if (thisIndex % 2 == 1 && thisIndex != numelements -1 ) {
          myMsg* m  = workerarray[thisIndex + 1].sendSmaller(val);
          val = m->val;
          delete m;
        }
        barrier();
      }

      contribute(CkCallback(CkIndex_Main::done(NULL), mainproxy)); 

    }
Beispiel #25
0
void PsiCache::receivePsi(PsiMessage* msg) {
  if (msg->spin_index != 0) {
    CkAbort("Error: We don't support multiple spins yet!\n");
  }
  CkAssert(msg->k_index < K);
  CkAssert(msg->state_index < L);
  CkAssert(msg->size == psi_size);
  if(msg->shifted==false){std::copy(msg->psi, msg->psi+psi_size, psis[msg->k_index][msg->state_index]);}
  if(msg->shifted==true){std::copy(msg->psi, msg->psi+psi_size, psis_shifted[msg->k_index][msg->state_index]);}
  delete msg;

  // Once the cache has received all of it's data start the sliding pipeline
  // sending of psis to P to start the accumulation of fxf'.
  int expected_psis = K*L;
  if(qindex == 0)
    expected_psis += K*L;
  if (++received_psis == expected_psis) {
    //CkPrintf("[%d]: Cache filled\n", CkMyPe());
    contribute(CkCallback(CkReductionTarget(Controller,cachesFilled), controller_proxy));
  }
}
Beispiel #26
0
PsiCache::PsiCache() {
  GWBSE *gwbse = GWBSE::get();
  K = gwbse->gw_parallel.K;
  L = gwbse->gw_parallel.L;
  qindex = Q_IDX;
  psi_size = gwbse->gw_parallel.n_elems;
  pipeline_stages = gwbse->gw_parallel.pipeline_stages;
  received_psis = 0;
  received_chunks = 0;
  psis = new complex**[K];
  for (int k = 0; k < K; k++) {
    psis[k] = new complex*[L];
    for (int l = 0; l < L; l++) {
      psis[k][l] = new complex[psi_size];
    }
  }
  // shifted k grid psis. Need this for qindex=0
  psis_shifted = new complex**[K];
  for (int k = 0; k < K; k++) {
    psis_shifted[k] = new complex*[L];
    for (int l = 0; l < L; l++) {
      psis_shifted[k][l] = new complex[psi_size];
    }
  }

  fs = new complex[L*psi_size*pipeline_stages];

  umklapp_factor = new complex[psi_size];

  // Variables for chare region registration
  min_row = INT_MAX;
  min_col = INT_MAX;
  max_row = INT_MIN;
  max_col = INT_MIN;
  tile_lock = CmiCreateLock();

  total_time = 0.0;
  contribute(CkCallback(CkReductionTarget(Controller,psiCacheReady), controller_proxy));
}
Beispiel #27
0
	void next(void) {
		state++;
		expectParam=rand();
		expectCount=1;
		switch(state) {
		case 0: //Send to chare
			expectType=typeChare;
			send(CkCallback(CkIndex_callbackChare::idx_accept(&callbackChare::accept),
					cp));
			break;
		case 1: //Send to array element
			expectType=typeArray;
			send(CkCallback(CkIndex_callbackArray::accept(NULL),
					CkArrayIndex1D(nArr-1),ap));
			break;
		case 2: //Send to group member 0
			expectType=typeGroup;
			send(CkCallback(CkIndex_callbackGroup::accept(NULL),
					CkNumPes()-1,gp));
			break;
		case 3: //Send to C function
			expectType=typeCfn;
			send(CkCallback(acceptCFnCall,&thisProxy));
			break;
		case 4: //Broadcast to array
			expectCount=nArr;
			expectType=typeArray;
			send(CkCallback(CkIndex_callbackArray::accept(NULL),ap));
			break;
		case 5: //Broadcast to group
			expectCount=CkNumPes();
			expectType=typeGroup;
			send(CkCallback(CkIndex_callbackGroup::accept(NULL),gp));
			break;
		case 6: //That's it
			expectType=-1;
			expectParam=-1;
			thisProxy.threadedTest();
			break;
		};
	}
Beispiel #28
0
void Patch::createSection() {
  localCreateSection();
  contribute(CkCallback(CkIndex_Main::startUpDone(), mainProxy));
}
Beispiel #29
0
void Patch::createComputes() {
  //double d1 = CmiWallTimer();
  int num;  
  
  int x = thisIndex.x;
  int y = thisIndex.y;
  int z = thisIndex.z;
  int px1, py1, pz1, dx, dy, dz, px2, py2, pz2;

  // For Round Robin insertion
  int numPes = CkNumPes();
  int currPe = CkMyPe();

  computesList = new int*[numNbrs];
  for (int i =0; i < numNbrs; i++){
    computesList[i] = new int[6];
  }
 
  /*  The computes X are inserted by a given patch:
   *
   *	^  X  X  X
   *	|  0  X  X
   *	y  0  0  0
   *	   x ---->
   */

  // these computes will be created by other patches
  for (num=0; num<numNbrs; num++) {
    dx = num / (nbrsY * nbrsZ)           - nbrsX/2;
    dy = (num % (nbrsY * nbrsZ)) / nbrsZ - nbrsY/2;
    dz = num % nbrsZ                     - nbrsZ/2;

    if (num >= numNbrs/2){
      px1 = x + 2;
      px2 = x+dx+2;
      py1 = y + 2;
      py2 = y+dy+2;
      pz1 = z + 2;
      pz2 = z+dz+2;
      computeArray(px1, py1, pz1, px2, py2, pz2).insert((++currPe)%numPes);
      computesList[num][0] = px1; computesList[num][1] = py1; computesList[num][2] = pz1; 
      computesList[num][3] = px2; computesList[num][4] = py2; computesList[num][5] = pz2;
    }
    else {
      px2 = WRAP_X(x+dx);
      py2 = WRAP_Y(y+dy);
      pz2 = WRAP_Z(z+dz);
      px1 = x;
      py1 = y;
      pz1 = z; 
      px1 = px2 - dx + 2;
      px2 = px2+2;
      py1 = py2 - dy + 2;
      py2 = py2+2;
      pz1 = pz2 - dz + 2;
      pz2 = pz2+2;
      computesList[num][0] = px2; computesList[num][1] = py2; computesList[num][2] = pz2; 
      computesList[num][3] = px1; computesList[num][4] = py1; computesList[num][5] = pz1;
    }

    //insert only the upper right half computes
  } // end of for loop

  contribute(CkCallback(CkIndex_Main::startUpDone(), mainProxy));
  //loadTime += CmiWallTimer()-d1;
}
Beispiel #30
0
void States::fftGtoR() {

  // Set up the FFT data structures in the FFTController
  FFTController* fft_controller = fft_controller_proxy.ckLocalBranch();
  int backward = 1;

  fft_controller->setup_fftw_3d(nfft,backward);

  fftw_complex* in_pointer = fft_controller->get_in_pointer();
  fftw_complex* out_pointer = fft_controller->get_out_pointer();
  
   // we need to setup fftidx
  int *g[3]; // put_into_fftbox routine takes 2D g array, so we need to do this
  g[0] = ga;
  g[1] = gb;
  g[2] = gc;

  int **fftidx;
  fftidx = new int *[numCoeff];
  for(int i=0; i<numCoeff;i++){ fftidx[i] = new int [3]; }

  // this routine changes negative g index to be a positive numbers
  // since it is origianlly written with Fortran, fftidx has fortran counting,
  // i.e., if gidx is (0,0,0), then (1,1,1) in fftidx
  gidx_to_fftidx(numCoeff, g, nfft, fftidx);

  // state coefficients are copied to in_pointer
  // put_into_fftbox was originally written for doublePack = 0 (false)
  // for gamma point calculation, put_into_fftbox has been modified from the original version
  put_into_fftbox(numCoeff, stateCoeff, fftidx, nfft, in_pointer, doublePack);
  
  // tell the FFTController to do the fft
  fft_controller->do_fftw();

  // transfer data from out_pointer to stateCoeffR
  // malloc stateRspace first
  int ndata = nfft[0]*nfft[1]*nfft[2];
  stateCoeffR = new complex [ndata];
  double scale = sqrt(1.0/double(ndata)); // IFFT requires normalization
  fftbox_to_array(ndata, out_pointer, stateCoeffR, scale);

  // delete stateCoeff
  delete [] stateCoeff;

  // fft for shifted states (only occupied states)
  int qindex = Q_IDX;
  if( istate < nocc && qindex == 0){
    stateCoeffR_shifted = new complex [ndata];
    put_into_fftbox(numCoeff, stateCoeff_shifted, fftidx, nfft, in_pointer, doublePack);
    fft_controller->do_fftw();
    fftbox_to_array(ndata, out_pointer, stateCoeffR_shifted, scale);
    delete [] stateCoeff_shifted;
  }

  // delete space used for fftidx
  for (int i = 0; i < numCoeff; i++) { delete [] fftidx[i]; }
  delete [] fftidx;

  // tell the controller that the states are ready
  contribute(CkCallback(CkReductionTarget(Controller, fftComplete), controller_proxy));
  
}