static void compute_initial_conditions(dcomplex u0[NZ][NY][NX], int d[3]) { /*-------------------------------------------------------------------- c-------------------------------------------------------------------*/ /*-------------------------------------------------------------------- c Fill in array u0 with initial conditions from c random number generator c-------------------------------------------------------------------*/ int k; double x0, start, an, dummy; static double tmp[NX*2*MAXDIM+1]; int i,j,t; start = SEED; /*-------------------------------------------------------------------- c Jump to the starting element for our first plane. c-------------------------------------------------------------------*/ ipow46(A, (zstart[0]-1)*2*NX*NY + (ystart[0]-1)*2*NX, &an); dummy = randlc(&start, an); ipow46(A, 2*NX*NY, &an); /*-------------------------------------------------------------------- c Go through by z planes filling in one square at a time. c-------------------------------------------------------------------*/ for (k = 0; k < dims[0][2]; k++) { x0 = start; vranlc(2*NX*dims[0][1], &x0, A, tmp); t = 1; for (j = 0; j < dims[0][1]; j++) for (i = 0; i < NX; i++) { u0[k][j][i].real = tmp[t++]; u0[k][j][i].imag = tmp[t++]; } if (k != dims[0][2]) dummy = randlc(&start, an); } }
//--------------------------------------------------------------------- // Fill in array u0 with initial conditions from // random number generator //--------------------------------------------------------------------- static void compute_initial_conditions(cl_mem *u0, int d1, int d2, int d3) { int k; double start, an, dummy, starts[NZ]; size_t local_ws, global_ws, temp; cl_mem m_starts; cl_int ecode; start = SEED; //--------------------------------------------------------------------- // Jump to the starting element for our first plane. //--------------------------------------------------------------------- an = ipow46(A, 0); dummy = randlc(&start, an); an = ipow46(A, 2*NX*NY); starts[0] = start; for (k = 1; k < dims[2]; k++) { dummy = randlc(&start, an); starts[k] = start; } if (device_type == CL_DEVICE_TYPE_CPU) { m_starts = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(double) * NZ, starts, &ecode); clu_CheckError(ecode, "clCreateBuffer() for m_starts"); local_ws = 1; global_ws = clu_RoundWorkSize((size_t)d2, local_ws); } else { //GPU m_starts = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(double) * NZ, starts, &ecode); clu_CheckError(ecode, "clCreateBuffer() for m_starts"); temp = d2 / max_compute_units; local_ws = temp == 0 ? 1 : ((temp > work_item_sizes[0]) ? work_item_sizes[0] : temp); global_ws = clu_RoundWorkSize((size_t)d2, local_ws); } ecode = clSetKernelArg(k_compute_ics, 0, sizeof(cl_mem), u0); ecode |= clSetKernelArg(k_compute_ics, 1, sizeof(cl_mem), &m_starts); clu_CheckError(ecode, "clSetKernelArg() for compute_initial_conditions"); ecode = clEnqueueNDRangeKernel(cmd_queue, k_compute_ics, 1, NULL, &global_ws, &local_ws, 0, NULL, NULL); clu_CheckError(ecode, "clEnqueueNDRangeKernel()"); ecode = clFinish(cmd_queue); clu_CheckError(ecode, "clFinish()"); DTIMER_START(T_RELEASE); clReleaseMemObject(m_starts); DTIMER_STOP(T_RELEASE); }