Exemplo n.º 1
0
static void compute_initial_conditions(dcomplex u0[NZ][NY][NX], int d[3]) {

/*--------------------------------------------------------------------
c-------------------------------------------------------------------*/

/*--------------------------------------------------------------------
c Fill in array u0 with initial conditions from 
c random number generator 
c-------------------------------------------------------------------*/

    int k;
    double x0, start, an, dummy;
    static double tmp[NX*2*MAXDIM+1];
    int i,j,t;
      
    start = SEED;
/*--------------------------------------------------------------------
c Jump to the starting element for our first plane.
c-------------------------------------------------------------------*/
    ipow46(A, (zstart[0]-1)*2*NX*NY + (ystart[0]-1)*2*NX, &an);
    dummy = randlc(&start, an);
    ipow46(A, 2*NX*NY, &an);
      
/*--------------------------------------------------------------------
c Go through by z planes filling in one square at a time.
c-------------------------------------------------------------------*/
    for (k = 0; k < dims[0][2]; k++) {
	x0 = start;
        vranlc(2*NX*dims[0][1], &x0, A, tmp);
	
	t = 1;
	for (j = 0; j < dims[0][1]; j++)
	  for (i = 0; i < NX; i++) {
	    u0[k][j][i].real = tmp[t++];
	    u0[k][j][i].imag = tmp[t++];
	  }
	      
        if (k != dims[0][2]) dummy = randlc(&start, an);
    }
}
Exemplo n.º 2
0
//---------------------------------------------------------------------
// Fill in array u0 with initial conditions from 
// random number generator 
//---------------------------------------------------------------------
static void compute_initial_conditions(cl_mem *u0, int d1, int d2, int d3)
{
  int k;
  double start, an, dummy, starts[NZ];
  size_t local_ws, global_ws, temp;
  cl_mem m_starts;
  cl_int ecode;

  start = SEED;
  //---------------------------------------------------------------------
  // Jump to the starting element for our first plane.
  //---------------------------------------------------------------------
  an = ipow46(A, 0);
  dummy = randlc(&start, an);
  an = ipow46(A, 2*NX*NY);

  starts[0] = start;
  for (k = 1; k < dims[2]; k++) {
    dummy = randlc(&start, an);
    starts[k] = start;
  }

  if (device_type == CL_DEVICE_TYPE_CPU) {
    m_starts = clCreateBuffer(context,
                              CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                              sizeof(double) * NZ,
                              starts, &ecode);
    clu_CheckError(ecode, "clCreateBuffer() for m_starts");

    local_ws  = 1;
    global_ws = clu_RoundWorkSize((size_t)d2, local_ws);
  } else { //GPU
    m_starts = clCreateBuffer(context,
                              CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                              sizeof(double) * NZ,
                              starts,
                              &ecode);
    clu_CheckError(ecode, "clCreateBuffer() for m_starts");

    temp = d2 / max_compute_units;
    local_ws  = temp == 0 ? 
                1 : ((temp > work_item_sizes[0]) ? work_item_sizes[0] : temp);
    global_ws = clu_RoundWorkSize((size_t)d2, local_ws);
  }

  ecode  = clSetKernelArg(k_compute_ics, 0, sizeof(cl_mem), u0);
  ecode |= clSetKernelArg(k_compute_ics, 1, sizeof(cl_mem), &m_starts);
  clu_CheckError(ecode, "clSetKernelArg() for compute_initial_conditions");

  ecode = clEnqueueNDRangeKernel(cmd_queue,
                                 k_compute_ics,
                                 1, NULL,
                                 &global_ws,
                                 &local_ws,
                                 0, NULL, NULL);
  clu_CheckError(ecode, "clEnqueueNDRangeKernel()");

  ecode = clFinish(cmd_queue);
  clu_CheckError(ecode, "clFinish()");

  DTIMER_START(T_RELEASE);
  clReleaseMemObject(m_starts);
  DTIMER_STOP(T_RELEASE);
}