Пример #1
0
/* main */
int main(int argc, char **argv) 
{
  /*OpenCL variables */
  cl_device_id device;
  cl_device_type device_type; /*to test if we are on cpu or gpu*/
  cl_context context;
  cl_command_queue cmdQueue;

  /* The event variables are created only when needed */
#ifdef _UNBLOCK
  cl_uint  num_events = 3;
  cl_event event[num_events];
#endif

  FPTYPE * buffers[3];
  cl_mdsys_t cl_sys;
  cl_int status;

  int nprint, i, nthreads = 0;
  char restfile[BLEN], trajfile[BLEN], ergfile[BLEN], line[BLEN];
  FILE *fp,*traj,*erg;
  mdsys_t sys;


/* Start profiling */

#ifdef __PROFILING
  
  double t1, t2;

  t1 = second();

#endif

  /* handling the command line arguments */
  switch (argc) {
      case 2: /* only the cpu/gpu argument was passed, setting default nthreads */
	      if( !strcmp( argv[1], "cpu" ) ) nthreads = 16;
	      else nthreads = 1024;
	      break;
      case 3: /* both the device type (cpu/gpu) and the number of threads were passed */
	      nthreads = strtol(argv[2],NULL,10);
	      if( nthreads<0 ) {
		      fprintf( stderr, "\n. The number of threads must be more than 1.\n");
		      PrintUsageAndExit();
	      }
	      break;
      default:
	      PrintUsageAndExit();
	      break;
  }
  
  /* Initialize the OpenCL environment */
  if( InitOpenCLEnvironment( argv[1], &device, &context, &cmdQueue ) != CL_SUCCESS ){
    fprintf( stderr, "Program Error! OpenCL Environment was not initialized correctly.\n" );
    return 4;
  }

  /* The event initialization is performed only when needed */
#ifdef _UNBLOCK
  /* initialize the cl_event handler variables */
  for( i = 0; i < num_events; ++i) {
	  event[i] = clCreateUserEvent( context, NULL );
	  clSetUserEventStatus( event[i], CL_COMPLETE );
  }
#endif

  /* read input file */
  if(get_me_a_line(stdin,line)) return 1;
  sys.natoms=atoi(line);
  if(get_me_a_line(stdin,line)) return 1;
  sys.mass=atof(line);
  if(get_me_a_line(stdin,line)) return 1;
  sys.epsilon=atof(line);
  if(get_me_a_line(stdin,line)) return 1;
  sys.sigma=atof(line);
  if(get_me_a_line(stdin,line)) return 1;
  sys.rcut=atof(line);
  if(get_me_a_line(stdin,line)) return 1;
  sys.box=atof(line);
  if(get_me_a_line(stdin,restfile)) return 1;
  if(get_me_a_line(stdin,trajfile)) return 1;
  if(get_me_a_line(stdin,ergfile)) return 1;
  if(get_me_a_line(stdin,line)) return 1;
  sys.nsteps=atoi(line);
  if(get_me_a_line(stdin,line)) return 1;
  sys.dt=atof(line);
  if(get_me_a_line(stdin,line)) return 1;
  nprint=atoi(line);
  

  
  /* allocate memory */
  cl_sys.natoms = sys.natoms;
  cl_sys.rx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status );
  cl_sys.ry = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status );
  cl_sys.rz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status );
  cl_sys.vx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status );
  cl_sys.vy = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status );
  cl_sys.vz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status );
  cl_sys.fx = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status );
  cl_sys.fy = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status );
  cl_sys.fz = clCreateBuffer( context, CL_MEM_READ_WRITE, cl_sys.natoms * sizeof(FPTYPE), NULL, &status );
  
  buffers[0] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) );
  buffers[1] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) );
  buffers[2] = (FPTYPE *) malloc( 2 * cl_sys.natoms * sizeof(FPTYPE) );
  
  /* read restart */
  fp = fopen( restfile, "r" );
  if( fp ) {
    for( i = 0; i < 2 * cl_sys.natoms; ++i ){
#ifdef _USE_FLOAT
      fscanf( fp, "%f%f%f", buffers[0] + i, buffers[1] + i, buffers[2] + i);
#else
      fscanf( fp, "%lf%lf%lf", buffers[0] + i, buffers[1] + i, buffers[2] + i);
#endif
    }
    
    status = clEnqueueWriteBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); 
    status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); 
    status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); 
    
    status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0] + cl_sys.natoms, 0, NULL, NULL ); 
    status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vy, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1] + cl_sys.natoms, 0, NULL, NULL ); 
    status |= clEnqueueWriteBuffer( cmdQueue, cl_sys.vz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2] + cl_sys.natoms, 0, NULL, NULL ); 
    
    fclose(fp);

  } else {
    perror("cannot read restart file");
    return 3;
  }
  
  /* initialize forces and energies.*/
  sys.nfi=0;
  
  size_t globalWorkSize[1];
  globalWorkSize[0] = nthreads;
  
  const char * sourcecode =
  #include <opencl_kernels_as_string.h>
  ;

  cl_program program = clCreateProgramWithSource( context, 1, (const char **) &sourcecode, NULL, &status );
  
  status |= clBuildProgram( program, 0, NULL, kernelflags, NULL, NULL );
  
#ifdef __DEBUG
  size_t log_size;
  char log [200000]; 
  clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, sizeof(log), log, &log_size );
  fprintf( stderr, "\nLog: \n\n %s", log ); 
#endif
  
  cl_kernel kernel_force = clCreateKernel( program, "opencl_force", &status );
  cl_kernel kernel_ekin = clCreateKernel( program, "opencl_ekin", &status );
  cl_kernel kernel_verlet_first = clCreateKernel( program, "opencl_verlet_first", &status );
  cl_kernel kernel_verlet_second = clCreateKernel( program, "opencl_verlet_second", &status );
  cl_kernel kernel_azzero = clCreateKernel( program, "opencl_azzero", &status );
  
  FPTYPE * tmp_epot;
  cl_mem epot_buffer;
  tmp_epot = (FPTYPE *) malloc( nthreads * sizeof(FPTYPE) );
  epot_buffer = clCreateBuffer( context, CL_MEM_READ_WRITE, nthreads * sizeof(FPTYPE), NULL, &status );
  
  /* precompute some constants */
  FPTYPE c12 = 4.0 * sys.epsilon * pow( sys.sigma, 12.0);
  FPTYPE c6  = 4.0 * sys.epsilon * pow( sys.sigma, 6.0);
  FPTYPE rcsq = sys.rcut * sys.rcut;
  FPTYPE boxby2 = HALF * sys.box;  
  FPTYPE dtmf = HALF * sys.dt / mvsq2e / sys.mass;
  sys.epot = ZERO;
  sys.ekin = ZERO;

  /* Azzero force buffer */
  status = clSetMultKernelArgs( kernel_azzero, 0, 4, KArg(cl_sys.fx), KArg(cl_sys.fy), KArg(cl_sys.fz), KArg(cl_sys.natoms));

  status = clEnqueueNDRangeKernel( cmdQueue, kernel_azzero, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL );

  status |= clSetMultKernelArgs( kernel_force, 0, 13,
	KArg(cl_sys.fx),
	KArg(cl_sys.fy),
	KArg(cl_sys.fz),
	KArg(cl_sys.rx),
	KArg(cl_sys.ry),
	KArg(cl_sys.rz),
	KArg(cl_sys.natoms),
	KArg(epot_buffer),
	KArg(c12),
	KArg(c6),
	KArg(rcsq),
	KArg(boxby2),
	KArg(sys.box));
  
  status = clEnqueueNDRangeKernel( cmdQueue, kernel_force, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL );
  
  status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, NULL );     
  
  for( i = 0; i < nthreads; i++) sys.epot += tmp_epot[i];
  
  FPTYPE * tmp_ekin;
  cl_mem ekin_buffer;
  tmp_ekin = (FPTYPE *) malloc( nthreads * sizeof(FPTYPE) );
  ekin_buffer = clCreateBuffer( context, CL_MEM_READ_WRITE, nthreads * sizeof(FPTYPE), NULL, &status );
  
  status |= clSetMultKernelArgs( kernel_ekin, 0, 5, KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz),
    KArg(cl_sys.natoms), KArg(ekin_buffer));
  
  status = clEnqueueNDRangeKernel( cmdQueue, kernel_ekin, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL );
    
  status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, NULL );     

  for( i = 0; i < nthreads; i++) sys.ekin += tmp_ekin[i];
  sys.ekin *= HALF * mvsq2e * sys.mass;
  sys.temp  = TWO * sys.ekin / ( THREE * sys.natoms - THREE ) / kboltz;

  erg=fopen(ergfile,"w");
  traj=fopen(trajfile,"w");

  printf("Starting simulation with %d atoms for %d steps.\n",sys.natoms, sys.nsteps);
  printf("     NFI            TEMP            EKIN                 EPOT              ETOT\n");
  
  /* download data on host */
  status = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL ); 
  status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL ); 
  status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL ); 
  
  sys.rx = buffers[0];
  sys.ry = buffers[1];
  sys.rz = buffers[2];
  
  output(&sys, erg, traj);

  /**************************************************/
  /* main MD loop */
  for(sys.nfi=1; sys.nfi <= sys.nsteps; ++sys.nfi) {

    /* propagate system and recompute energies */
    /* 2) verlet_first   */
    status |= clSetMultKernelArgs( kernel_verlet_first, 0, 12,
      KArg(cl_sys.fx),
      KArg(cl_sys.fy),
      KArg(cl_sys.fz),
      KArg(cl_sys.rx),
      KArg(cl_sys.ry),
      KArg(cl_sys.rz),
      KArg(cl_sys.vx),
      KArg(cl_sys.vy),
      KArg(cl_sys.vz),
      KArg(cl_sys.natoms),
      KArg(sys.dt),
      KArg(dtmf));
    CheckSuccess(status, 2);

    /* When the data transfer is non blocking, this kernel has to wait the completion of part 8 (event[2]) */
#ifdef _UNBLOCK
    status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_first, 1, NULL, globalWorkSize, NULL, 1, &event[2], NULL );
#else
    status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_first, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL );
#endif

    /* 6) download position@device to position@host */
    if ((sys.nfi % nprint) == nprint-1) {

    /* In non blocking mode (CL_FALSE) this data transfer raises events[i] */
#ifdef _UNBLOCK
	status  = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, &event[2] );
	status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, &event[1] );
	status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_FALSE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, &event[0] );
#else
	status  = clEnqueueReadBuffer( cmdQueue, cl_sys.rx, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[0], 0, NULL, NULL );
	status |= clEnqueueReadBuffer( cmdQueue, cl_sys.ry, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[1], 0, NULL, NULL );
	status |= clEnqueueReadBuffer( cmdQueue, cl_sys.rz, CL_TRUE, 0, cl_sys.natoms * sizeof(FPTYPE), buffers[2], 0, NULL, NULL );
#endif
	CheckSuccess(status, 6);
    }

    /* 3) force */
    status |= clSetMultKernelArgs( kernel_force, 0, 13,
      KArg(cl_sys.fx),
      KArg(cl_sys.fy),
      KArg(cl_sys.fz),
      KArg(cl_sys.rx),
      KArg(cl_sys.ry),
      KArg(cl_sys.rz),
      KArg(cl_sys.natoms),
      KArg(epot_buffer),
      KArg(c12),
      KArg(c6),
      KArg(rcsq),
      KArg(boxby2),
      KArg(sys.box));

    CheckSuccess(status, 3);
    status = clEnqueueNDRangeKernel( cmdQueue, kernel_force, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL );

    /* 7) download E_pot[i]@device and perform reduction to E_pot@host */
    if ((sys.nfi % nprint) == nprint-1) {

    /* In non blocking mode (CL_FALSE) this data transfer kernel raises an event[1] */
#ifdef _UNBLOCK
	status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_FALSE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, &event[1] );
#else
	status |= clEnqueueReadBuffer( cmdQueue, epot_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_epot, 0, NULL, NULL );
#endif
	CheckSuccess(status, 7);
    }

    /* 4) verlet_second */
    status |= clSetMultKernelArgs( kernel_verlet_second, 0, 9,
      KArg(cl_sys.fx),
      KArg(cl_sys.fy),
      KArg(cl_sys.fz),
      KArg(cl_sys.vx),
      KArg(cl_sys.vy),
      KArg(cl_sys.vz),
      KArg(cl_sys.natoms),
      KArg(sys.dt),
      KArg(dtmf));

    CheckSuccess(status, 4);
    status = clEnqueueNDRangeKernel( cmdQueue, kernel_verlet_second, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL );

    if ((sys.nfi % nprint) == nprint-1) {

	/* 5) ekin */
	status |= clSetMultKernelArgs( kernel_ekin, 0, 5, KArg(cl_sys.vx), KArg(cl_sys.vy), KArg(cl_sys.vz),
			KArg(cl_sys.natoms), KArg(ekin_buffer));
	CheckSuccess(status, 5);
	status = clEnqueueNDRangeKernel( cmdQueue, kernel_ekin, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL );


	/* 8) download E_kin[i]@device and perform reduction to E_kin@host */
	/* In non blocking mode (CL_FALSE) this data transfer kernel raises an event[2] */
#ifdef _UNBLOCK
	status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_FALSE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, &event[2] );
#else
	status |= clEnqueueReadBuffer( cmdQueue, ekin_buffer, CL_TRUE, 0, nthreads * sizeof(FPTYPE), tmp_ekin, 0, NULL, NULL );
#endif
	CheckSuccess(status, 8);
    }

    /* 1) write output every nprint steps */
    if ((sys.nfi % nprint) == 0) {

    /* Calling a synchronization function (only when in non blocking mode) that will wait until all the
     * events[i], related to the data transfers, to be completed */
#ifdef _UNBLOCK
        clWaitForEvents(3, event);
#endif
	sys.rx = buffers[0];
	sys.ry = buffers[1];
	sys.rz = buffers[2];

	/* initialize the sys.epot@host and sys.ekin@host variables to ZERO */
	sys.epot = ZERO;
	sys.ekin = ZERO;

	/* reduction on the tmp_Exxx[i] buffers downloaded from the device
	 * during parts 7 and 8 of the previous MD loop iteration */
	for( i = 0; i < nthreads; i++) {
		sys.epot += tmp_epot[i];
		sys.ekin += tmp_ekin[i];
	}

	/* multiplying the kinetic energy by prefactors */
	sys.ekin *= HALF * mvsq2e * sys.mass;
	sys.temp  = TWO * sys.ekin / ( THREE * sys.natoms - THREE ) / kboltz;

	/* writing output files (positions, energies and temperature) */
	output(&sys, erg, traj);
    }

  }
  /**************************************************/

/* End profiling */

#ifdef __PROFILING

t2 = second();

fprintf( stdout, "\n\nTime of execution = %.3g (seconds)\n", (t2 - t1) );

#endif






  /* clean up: close files, free memory */
  printf("Simulation Done.\n");
  fclose(erg);
  fclose(traj);

  free(buffers[0]);
  free(buffers[1]);
  free(buffers[2]);

  return 0;
}
Пример #2
0
/* main */
int main(int argc, char **argv) 
{
    int nprint, i;
    char restfile[BLEN], trajfile[BLEN], ergfile[BLEN], line[BLEN];
    FILE *fp,*traj,*erg;
    mdsys_t sys;

    /* read input file */
    if(get_me_a_line(stdin,line)) return 1;
    sys.natoms=atoi(line);
    if(get_me_a_line(stdin,line)) return 1;
    sys.mass=atof(line);
    if(get_me_a_line(stdin,line)) return 1;
    sys.epsilon=atof(line);
    if(get_me_a_line(stdin,line)) return 1;
    sys.sigma=atof(line);
    if(get_me_a_line(stdin,line)) return 1;
    sys.rcut=atof(line);
    if(get_me_a_line(stdin,line)) return 1;
    sys.box=atof(line);
    if(get_me_a_line(stdin,restfile)) return 1;
    if(get_me_a_line(stdin,trajfile)) return 1;
    if(get_me_a_line(stdin,ergfile)) return 1;
    if(get_me_a_line(stdin,line)) return 1;
    sys.nsteps=atoi(line);
    if(get_me_a_line(stdin,line)) return 1;
    sys.dt=atof(line);
    if(get_me_a_line(stdin,line)) return 1;
    nprint=atoi(line);

    /* allocate memory */
    sys.rx=(double *)malloc(sys.natoms*sizeof(double));
    sys.ry=(double *)malloc(sys.natoms*sizeof(double));
    sys.rz=(double *)malloc(sys.natoms*sizeof(double));
    sys.vx=(double *)malloc(sys.natoms*sizeof(double));
    sys.vy=(double *)malloc(sys.natoms*sizeof(double));
    sys.vz=(double *)malloc(sys.natoms*sizeof(double));
    sys.fx=(double *)malloc(sys.natoms*sizeof(double));
    sys.fy=(double *)malloc(sys.natoms*sizeof(double));
    sys.fz=(double *)malloc(sys.natoms*sizeof(double));

    /* read restart */
    fp=fopen(restfile,"r");
    if(fp) {
        for (i=0; i<sys.natoms; ++i) {
            fscanf(fp,"%lf%lf%lf",sys.rx+i, sys.ry+i, sys.rz+i);
        }
        for (i=0; i<sys.natoms; ++i) {
            fscanf(fp,"%lf%lf%lf",sys.vx+i, sys.vy+i, sys.vz+i);
        }
        fclose(fp);
        azzero(sys.fx, sys.natoms);
        azzero(sys.fy, sys.natoms);
        azzero(sys.fz, sys.natoms);
    } else {
        perror("cannot read restart file");
        return 3;
    }

    /* initialize forces and energies.*/
    sys.nfi=0;
    force(&sys);
    ekin(&sys);
    
    erg=fopen(ergfile,"w");
    traj=fopen(trajfile,"w");

    printf("Starting simulation with %d atoms for %d steps.\n",sys.natoms, sys.nsteps);
    printf("     NFI            TEMP            EKIN                 EPOT              ETOT\n");
    output(&sys, erg, traj);

    /**************************************************/
    /* main MD loop */
    for(sys.nfi=1; sys.nfi <= sys.nsteps; ++sys.nfi) {

        /* write output, if requested */
        if ((sys.nfi % nprint) == 0)
            output(&sys, erg, traj);

        /* propagate system and recompute energies */
        velverlet(&sys);
        ekin(&sys);
    }
    /**************************************************/

    /* clean up: close files, free memory */
    printf("Simulation Done.\n");
    fclose(erg);
    fclose(traj);

    free(sys.rx);
    free(sys.ry);
    free(sys.rz);
    free(sys.vx);
    free(sys.vy);
    free(sys.vz);
    free(sys.fx);
    free(sys.fy);
    free(sys.fz);

    return 0;
}