/* main */ int main(int argc, char **argv) { /*OpenCL variables */ cl_device_id device; cl_device_type device_type; /*to test if we are on cpu or gpu*/ cl_context context; cl_command_queue cmdQueue; /* The event variables are created only when needed */ #ifdef _UNBLOCK cl_uint num_events = 3; cl_event event[num_events]; #endif FPTYPE * buffers[3]; cl_mdsys_t cl_sys; cl_int status; int nprint, i, nthreads = 0; char restfile[BLEN], trajfile[BLEN], ergfile[BLEN], line[BLEN]; FILE *fp,*traj,*erg; mdsys_t sys; /* Start profiling */ #ifdef __PROFILING double t1, t2; t1 = second(); #endif /* handling the command line arguments */ switch (argc) { case 2: /* only the cpu/gpu argument was passed, setting default nthreads */ if( !strcmp( argv[1], "cpu" ) ) nthreads = 16; else nthreads = 1024; break; case 3: /* both the device type (cpu/gpu) and the number of threads were passed */ nthreads = strtol(argv[2],NULL,10); if( nthreads<0 ) { fprintf( stderr, "\n. The number of threads must be more than 1.\n"); PrintUsageAndExit(); } break; default: PrintUsageAndExit(); break; } /* Initialize the OpenCL environment */ if( InitOpenCLEnvironment( argv[1], &device, &context, &cmdQueue ) != CL_SUCCESS ){ fprintf( stderr, "Program Error! OpenCL Environment was not initialized correctly.\n" ); return 4; } /* The event initialization is performed only when needed */ #ifdef _UNBLOCK /* initialize the cl_event handler variables */ for( i = 0; i < num_events; ++i) { event[i] = clCreateUserEvent( context, NULL ); clSetUserEventStatus( event[i], CL_COMPLETE ); } #endif /* read input file */ if(get_me_a_line(stdin,line)) return 1; sys.natoms=atoi(line); if(get_me_a_line(stdin,line)) return 1; sys.mass=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.epsilon=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.sigma=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.rcut=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.box=atof(line); if(get_me_a_line(stdin,restfile)) return 1; if(get_me_a_line(stdin,trajfile)) return 1; if(get_me_a_line(stdin,ergfile)) return 1; if(get_me_a_line(stdin,line)) return 1; sys.nsteps=atoi(line); if(get_me_a_line(stdin,line)) return 1; sys.dt=atof(line); if(get_me_a_line(stdin,line)) return 1; nprint=atoi(line); /* allocate memory */ cl_sys.natoms = sys.natoms; cl_sys.rx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.ry = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.rz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.vx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.vy = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.vz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.fx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.fy = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); cl_sys.fz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status ); buffers[0] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) ); buffers[1] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) ); buffers[2] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) ); /* read restart */ fp = fopen( restfile, "r" ); if( fp ) { for( i = 0; i < 2 * cl_sys.natoms; ++i ){ #ifdef _USE_FLOAT fscanf( fp, "%f%f%f", buffers[0] + i, buffers[1] + i, buffers[2] + i); #else fscanf( fp, "%lf%lf%lf", buffers[0] + i, buffers[1] + i, buffers[2] + i); #endif } status = clEnqueueWriteBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0] + cl_sys.natoms, 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vy, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1] + cl_sys.natoms, 0, NULL, NULL ); status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2] + cl_sys.natoms, 0, NULL, NULL ); fclose(fp); } else { perror("cannot read restart file"); return 3; } /* initialize forces and energies.*/ sys.nfi=0; size_t globalWorkSize[1]; globalWorkSize[0] = nthreads; const char * sourcecode = #include <opencl_kernels_as_string.h> ; cl_program program = clCreateProgramWithSource( context, 1, (const char **) &sourcecode, NULL, &status ); status |= clBuildProgram( program, 0, NULL, kernelflags, NULL, NULL ); #ifdef __DEBUG size_t log_size; char log [200000]; clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, sizeof(log), log, &log_size ); fprintf( stderr, "\nLog: \n\n %s", log ); #endif cl_kernel kernel_force = clCreateKernel( program, "opencl_force", &status ); cl_kernel kernel_ekin = clCreateKernel( program, "opencl_ekin", &status ); cl_kernel kernel_verlet_first = clCreateKernel( program, "opencl_verlet_first", &status ); cl_kernel kernel_verlet_second = clCreateKernel( program, "opencl_verlet_second", &status ); cl_kernel kernel_azzero = clCreateKernel( program, "opencl_azzero", &status ); FPTYPE * tmp_epot; cl_mem epot_buffer; tmp_epot = (FPTYPE *) malloc( nthreads * sizeof(FPTYPE) ); epot_buffer = clCreateBuffer( context, CL_MEM_READ_WRITE, nthreads * sizeof(FPTYPE), NULL, &status ); /* precompute some constants */ FPTYPE c12 = 4.0 * sys.epsilon * pow( sys.sigma, 12.0); FPTYPE c6 = 4.0 * sys.epsilon * pow( sys.sigma, 6.0); FPTYPE rcsq = sys.rcut * sys.rcut; FPTYPE boxby2 = HALF * sys.box; FPTYPE dtmf = HALF * sys.dt / mvsq2e / sys.mass; sys.epot = ZERO; sys.ekin = ZERO; /* Azzero force buffer */ status = clSetMultKernelArgs( kernel_azzero, 0, 4, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.natoms)); status = clEnqueueNDRangeKernel( cmdQueue, kernel_azzero, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); status |= clSetMultKernelArgs( kernel_force, 0, 13, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.rx), KArg(cl_sys.ry), KArg(cl_sys.rz), KArg(cl_sys.natoms), KArg(epot_buffer), KArg(c12), KArg(c6), KArg(rcsq), KArg(boxby2), KArg(sys.box)); status = clEnqueueNDRangeKernel( cmdQueue, kernel_force, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, NULL ); for( i = 0; i < nthreads; i++) sys.epot += tmp_epot[i]; FPTYPE * tmp_ekin; cl_mem ekin_buffer; tmp_ekin = (FPTYPE *) malloc( nthreads * sizeof(FPTYPE) ); ekin_buffer = clCreateBuffer( context, CL_MEM_READ_WRITE, nthreads * sizeof(FPTYPE), NULL, &status ); status |= clSetMultKernelArgs( kernel_ekin, 0, 5, KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(ekin_buffer)); status = clEnqueueNDRangeKernel( cmdQueue, kernel_ekin, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, NULL ); for( i = 0; i < nthreads; i++) sys.ekin += tmp_ekin[i]; sys.ekin *= HALF * mvsq2e * sys.mass; sys.temp = TWO * sys.ekin / ( THREE * sys.natoms - THREE ) / kboltz; erg=fopen(ergfile,"w"); traj=fopen(trajfile,"w"); printf("Starting simulation with %d atoms for %d steps.\n",sys.natoms, sys.nsteps); printf(" NFI TEMP EKIN EPOT ETOT\n"); /* download data on host */ status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); sys.rx = buffers[0]; sys.ry = buffers[1]; sys.rz = buffers[2]; output(&sys, erg, traj); /**************************************************/ /* main MD loop */ for(sys.nfi=1; sys.nfi <= sys.nsteps; ++sys.nfi) { /* propagate system and recompute energies */ /* 2) verlet_first */ status |= clSetMultKernelArgs( kernel_verlet_first, 0, 12, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.rx), KArg(cl_sys.ry), KArg(cl_sys.rz), KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(sys.dt), KArg(dtmf)); CheckSuccess(status, 2); /* When the data transfer is non blocking, this kernel has to wait the completion of part 8 (event[2]) */ #ifdef _UNBLOCK status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_first, 1, NULL, globalWorkSize, NULL, 1, &event[2], NULL ); #else status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_first, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); #endif /* 6) download position@device to position@host */ if ((sys.nfi % nprint) == nprint-1) { /* In non blocking mode (CL_FALSE) this data transfer raises events[i] */ #ifdef _UNBLOCK status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, &event[2] ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, &event[1] ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, &event[0] ); #else status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); #endif CheckSuccess(status, 6); } /* 3) force */ status |= clSetMultKernelArgs( kernel_force, 0, 13, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.rx), KArg(cl_sys.ry), KArg(cl_sys.rz), KArg(cl_sys.natoms), KArg(epot_buffer), KArg(c12), KArg(c6), KArg(rcsq), KArg(boxby2), KArg(sys.box)); CheckSuccess(status, 3); status = clEnqueueNDRangeKernel( cmdQueue, kernel_force, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); /* 7) download E_pot[i]@device and perform reduction to E_pot@host */ if ((sys.nfi % nprint) == nprint-1) { /* In non blocking mode (CL_FALSE) this data transfer kernel raises an event[1] */ #ifdef _UNBLOCK status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_FALSE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, &event[1] ); #else status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, NULL ); #endif CheckSuccess(status, 7); } /* 4) verlet_second */ status |= clSetMultKernelArgs( kernel_verlet_second, 0, 9, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(sys.dt), KArg(dtmf)); CheckSuccess(status, 4); status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_second, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); if ((sys.nfi % nprint) == nprint-1) { /* 5) ekin */ status |= clSetMultKernelArgs( kernel_ekin, 0, 5, KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz), KArg(cl_sys.natoms), KArg(ekin_buffer)); CheckSuccess(status, 5); status = clEnqueueNDRangeKernel( cmdQueue, kernel_ekin, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); /* 8) download E_kin[i]@device and perform reduction to E_kin@host */ /* In non blocking mode (CL_FALSE) this data transfer kernel raises an event[2] */ #ifdef _UNBLOCK status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_FALSE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, &event[2] ); #else status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, NULL ); #endif CheckSuccess(status, 8); } /* 1) write output every nprint steps */ if ((sys.nfi % nprint) == 0) { /* Calling a synchronization function (only when in non blocking mode) that will wait until all the * events[i], related to the data transfers, to be completed */ #ifdef _UNBLOCK clWaitForEvents(3, event); #endif sys.rx = buffers[0]; sys.ry = buffers[1]; sys.rz = buffers[2]; /* initialize the sys.epot@host and sys.ekin@host variables to ZERO */ sys.epot = ZERO; sys.ekin = ZERO; /* reduction on the tmp_Exxx[i] buffers downloaded from the device * during parts 7 and 8 of the previous MD loop iteration */ for( i = 0; i < nthreads; i++) { sys.epot += tmp_epot[i]; sys.ekin += tmp_ekin[i]; } /* multiplying the kinetic energy by prefactors */ sys.ekin *= HALF * mvsq2e * sys.mass; sys.temp = TWO * sys.ekin / ( THREE * sys.natoms - THREE ) / kboltz; /* writing output files (positions, energies and temperature) */ output(&sys, erg, traj); } } /**************************************************/ /* End profiling */ #ifdef __PROFILING t2 = second(); fprintf( stdout, "\n\nTime of execution = %.3g (seconds)\n", (t2 - t1) ); #endif /* clean up: close files, free memory */ printf("Simulation Done.\n"); fclose(erg); fclose(traj); free(buffers[0]); free(buffers[1]); free(buffers[2]); return 0; }
/* main */ int main(int argc, char **argv) { int nprint, i; char restfile[BLEN], trajfile[BLEN], ergfile[BLEN], line[BLEN]; FILE *fp,*traj,*erg; mdsys_t sys; /* read input file */ if(get_me_a_line(stdin,line)) return 1; sys.natoms=atoi(line); if(get_me_a_line(stdin,line)) return 1; sys.mass=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.epsilon=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.sigma=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.rcut=atof(line); if(get_me_a_line(stdin,line)) return 1; sys.box=atof(line); if(get_me_a_line(stdin,restfile)) return 1; if(get_me_a_line(stdin,trajfile)) return 1; if(get_me_a_line(stdin,ergfile)) return 1; if(get_me_a_line(stdin,line)) return 1; sys.nsteps=atoi(line); if(get_me_a_line(stdin,line)) return 1; sys.dt=atof(line); if(get_me_a_line(stdin,line)) return 1; nprint=atoi(line); /* allocate memory */ sys.rx=(double *)malloc(sys.natoms*sizeof(double)); sys.ry=(double *)malloc(sys.natoms*sizeof(double)); sys.rz=(double *)malloc(sys.natoms*sizeof(double)); sys.vx=(double *)malloc(sys.natoms*sizeof(double)); sys.vy=(double *)malloc(sys.natoms*sizeof(double)); sys.vz=(double *)malloc(sys.natoms*sizeof(double)); sys.fx=(double *)malloc(sys.natoms*sizeof(double)); sys.fy=(double *)malloc(sys.natoms*sizeof(double)); sys.fz=(double *)malloc(sys.natoms*sizeof(double)); /* read restart */ fp=fopen(restfile,"r"); if(fp) { for (i=0; i<sys.natoms; ++i) { fscanf(fp,"%lf%lf%lf",sys.rx+i, sys.ry+i, sys.rz+i); } for (i=0; i<sys.natoms; ++i) { fscanf(fp,"%lf%lf%lf",sys.vx+i, sys.vy+i, sys.vz+i); } fclose(fp); azzero(sys.fx, sys.natoms); azzero(sys.fy, sys.natoms); azzero(sys.fz, sys.natoms); } else { perror("cannot read restart file"); return 3; } /* initialize forces and energies.*/ sys.nfi=0; force(&sys); ekin(&sys); erg=fopen(ergfile,"w"); traj=fopen(trajfile,"w"); printf("Starting simulation with %d atoms for %d steps.\n",sys.natoms, sys.nsteps); printf(" NFI TEMP EKIN EPOT ETOT\n"); output(&sys, erg, traj); /**************************************************/ /* main MD loop */ for(sys.nfi=1; sys.nfi <= sys.nsteps; ++sys.nfi) { /* write output, if requested */ if ((sys.nfi % nprint) == 0) output(&sys, erg, traj); /* propagate system and recompute energies */ velverlet(&sys); ekin(&sys); } /**************************************************/ /* clean up: close files, free memory */ printf("Simulation Done.\n"); fclose(erg); fclose(traj); free(sys.rx); free(sys.ry); free(sys.rz); free(sys.vx); free(sys.vy); free(sys.vz); free(sys.fx); free(sys.fy); free(sys.fz); return 0; }