コード例 #1
0
ファイル: Gym_Emu.cpp プロジェクト: Kinglions/modizer
	blargg_err_t hash_( Hash_Function& out ) const
	{
		Gym_Emu::header_t const* h = ( Gym_Emu::header_t const* ) file_begin();
		byte const* data = &file_begin() [data_offset];

		hash_gym_file( *h, data, file_end() - data, out );

		return (blargg_err_t)blargg_ok;
	}
コード例 #2
0
ファイル: Vgm_Emu.cpp プロジェクト: Kinglions/modizer
blargg_err_t Vgm_Emu::hash_( Hash_Function& out ) const
{
	byte const* p = file_begin() + header().size();
	byte const* e = file_end();
	int data_offset = get_le32( header().data_offset );
	if ( data_offset )
		p += data_offset + offsetof( header_t, data_offset ) - header().size();
	int gd3_offset = get_le32( header().gd3_offset );
	if ( gd3_offset > 0 && gd3_offset + offsetof( header_t, gd3_offset ) > data_offset + offsetof( header_t, data_offset ) )
		e = file_begin() + gd3_offset + offsetof( header_t, gd3_offset );
	hash_vgm_file( header(), p, e - p, out );
	return (blargg_err_t)blargg_ok;
}
コード例 #3
0
ファイル: Gbs_Emu.cpp プロジェクト: iOSAppList/modizer
	blargg_err_t hash_( Hash_Function& out ) const
	{
		hash_gbs_file( *h, file_begin() + h->size, file_end() - file_begin() - h->size, out );
		return blargg_ok;
	}
コード例 #4
0
int main(int argc, char** argv)
{
      if(argc < 3) {
          std::cerr << "Usage: " << argv[0] << " < #particles > < #turns > [deviceIdx]" << std::endl;
          exit(1);
        }
  		int NUM_REPETITIONS = 10;
    	double num_of_turns_drift = 0.0; // for timing
    	double num_of_turns_drift_exact = 0.0; // for timing
    	double num_of_turns_cavity = 0.0; // for timing
    	double num_of_turns_align = 0.0; // for timing
    	double average_execution_time_drift = 0.0;
    	double average_execution_time_drift_exact = 0.0;
    	double average_execution_time_cavity = 0.0;
    	double average_execution_time_align = 0.0;

      std::vector<double> exec_time_drift;
      std::vector<double> exec_time_drift_exact;
      std::vector<double> exec_time_cavity;
      std::vector<double> exec_time_align;

      int choice  = 1;

			for(int ll = 0; ll < NUM_REPETITIONS; ++ll) {
    /* We will use 9+ beam element blocks in this example and do not
     * care to be memory efficient yet; thus we make the blocks for
     * beam elements and particles big enough to avoid running into problems */

    constexpr st_block_size_t const MAX_NUM_BEAM_ELEMENTS       = 1000u; // 20u;
    constexpr st_block_size_t const NUM_OF_BEAM_ELEMENTS        = 1000u; //9u;

    /* 1MByte is plenty of space */
    constexpr st_block_size_t const BEAM_ELEMENTS_DATA_CAPACITY = 1048576u;

    /* Prepare and init the beam elements buffer */

    st_Blocks beam_elements;
    st_Blocks_preset( &beam_elements );

    int ret = st_Blocks_init( &beam_elements, MAX_NUM_BEAM_ELEMENTS,
                              BEAM_ELEMENTS_DATA_CAPACITY );

    assert( ret == 0 ); /* if there was an error, ret would be != 0 */

    /* Add NUM_OF_BEAM_ELEMENTS drifts to the buffer. For this example, let's
     * just have one simple constant length for all of them: */

   // One-fourth of the beam-elements are drift-elements
    for( st_block_size_t ii = 0 ; ii < NUM_OF_BEAM_ELEMENTS/4 ; ++ii )
    {
        double const drift_length = double{ 0.2L };
        st_Drift* drift = st_Blocks_add_drift( &beam_elements, drift_length );

        (void)drift; // using the variable with a no-op

        assert( drift != nullptr ); /* Otherwise, there was a problem! */
    }

    /* Check if we *really* have the correct number of beam elements and
     * if they really are all drifts */

    assert( st_Blocks_get_num_of_blocks( &beam_elements ) ==
            NUM_OF_BEAM_ELEMENTS/4 );

    /* The beam_elements container is currently not serialized yet ->
     * we could still add blocks to the buffer. Let's jus do this and
     * add a different kind of beam element to keep it easier apart! */

    for( st_block_size_t ii = NUM_OF_BEAM_ELEMENTS/4 ; ii < NUM_OF_BEAM_ELEMENTS/2 ; ++ii )
    {
        double const drift_length = double{ 0.1L };
    st_DriftExact* drift_exact = st_Blocks_add_drift_exact(
        &beam_elements, drift_length );
        (void) drift_exact;
    assert( drift_exact != nullptr );
   }

    assert( st_Blocks_get_num_of_blocks( &beam_elements ) ==
            ( NUM_OF_BEAM_ELEMENTS*0.5) );

    /* Adding the beam element 'cavity' */

    for( st_block_size_t ii = NUM_OF_BEAM_ELEMENTS*0.5 ; ii < NUM_OF_BEAM_ELEMENTS*0.75 ; ++ii )
    {
      double const voltage = double{ 1e4};
      double const frequency = double{ 40};
      double const lag = double{ 0.01L};
      st_Cavity* cavity = st_Blocks_add_cavity(
          &beam_elements, voltage, frequency, lag);
      (void) cavity; // a no-op
      assert( cavity != nullptr ); /* Otherwise, there was a problem! */
    }
    assert( st_Blocks_get_num_of_blocks( &beam_elements ) ==
            ( NUM_OF_BEAM_ELEMENTS * 0.75) );

    /* Adding the beam element 'align' */
    double const M__PI   = // note the two underscores between M and PI
      ( double )3.1415926535897932384626433832795028841971693993751L;
    for( st_block_size_t ii = NUM_OF_BEAM_ELEMENTS*0.75 ; ii < NUM_OF_BEAM_ELEMENTS ; ++ii )
    {
      double const tilt = double{ 0.5};
      double const z = double{ M__PI / 45};
      double const dx = double{ 0.2L};
      double const dy = double{ 0.2L};
      st_Align* align = st_Blocks_add_align(
          &beam_elements, tilt, cos( z ), sin( z ), dx, dy);
      (void) align; // a no-op
      assert( align != nullptr ); /* Otherwise, there was a problem! */
    }
    assert( st_Blocks_get_num_of_blocks( &beam_elements ) ==
        ( NUM_OF_BEAM_ELEMENTS) );
    /* Always safely terminate pointer variables pointing to resources they
     * do not own which we no longer need -> just a good practice */

//    drift_exact = nullptr;

    /* After serialization, the "structure" of the beam_elements buffer is
     * frozen, but the data in the elements - i.e. the length of the
     * individual drifts in our example - can still be modified. We will
     * just not be able to add further blocks to the container */

    assert( !st_Blocks_are_serialized( &beam_elements ) );

    ret = st_Blocks_serialize( &beam_elements );

    assert( ret == 0 );
    assert( st_Blocks_are_serialized( &beam_elements ) ); // serialization on CPU done.

    /* Next, let's iterate over all the beam_elements in the buffer and
     * print out the properties -> we expect that NUM_OF_BEAM_ELEMENTS
     * st_Drift with the same length appear and one st_DriftExact with a
     * different length should appear in the end */
    std::cout.flush();

/************************** Preparing grounds for OpenCL *******/
    std::vector<cl::Platform> platform;
    cl::Platform::get(&platform);

    if( platform.empty() )
    {
        std::cerr << "OpenCL platforms not found." << std::endl;
        return 1;
    }

    std::vector< cl::Device > devices;

    for( auto const& p : platform )
    {
        std::vector< cl::Device > temp_devices;

        p.getDevices( CL_DEVICE_TYPE_ALL, &temp_devices );

        for( auto const& d : temp_devices )
        {
            if( !d.getInfo< CL_DEVICE_AVAILABLE >() ) continue;
            devices.push_back( d );
        }
    }

    cl::Device* ptr_selected_device = nullptr;

    if( !devices.empty() )
    {
        if( argc >= 4 )
        {
            std::size_t const device_idx = std::atoi( argv[ 3 ] );

            if( device_idx < devices.size() )
            {
                ptr_selected_device = &devices[ device_idx ];
            }
        }

        if( ptr_selected_device == nullptr )
        {
            std::cout << "default selecting device #0" << std::endl;
            ptr_selected_device = &devices[ 0 ];
        }
    }

    if( ptr_selected_device != nullptr )
    {
        std::cout << "device: "
                  << ptr_selected_device->getInfo< CL_DEVICE_NAME >()
                  << std::endl;
    }
    else return 0;

    cl::Context context( *ptr_selected_device );

//    std::cout << "Device list" << std::endl;
//    for(unsigned int jj=0; jj<devices.size(); jj++){
//      std::cout << "Name of devicei " << jj<<" : "<<devices[jj].getInfo<CL_DEVICE_NAME>() << std::endl;
//      std::cout << "resolution of device timer for device " << jj <<" : "<<devices[jj].getInfo<CL_DEVICE_PROFILING_TIMER_RESOLUTION>() << std::endl;
//    };
/**********************************************/


/////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // getting the kernel file
   std::string PATH_TO_KERNEL_FILE( st_PATH_TO_BASE_DIR );
       PATH_TO_KERNEL_FILE += "tests/benchmark/sixtracklib/opencl/";
       PATH_TO_KERNEL_FILE += "kernels_beam_elements_oneatatime.cl";

       std::string kernel_source( "" );
       std::ifstream kernel_file( PATH_TO_KERNEL_FILE.c_str(),
                                  std::ios::in | std::ios::binary );

       if( kernel_file.is_open() )
       {
           std::istreambuf_iterator< char > file_begin( kernel_file.rdbuf() );
           std::istreambuf_iterator< char > end_of_file;

           kernel_source.assign( file_begin, end_of_file );
           kernel_file.close();
       }
////////////////////////////////////////////////////////////////////////////////////////////////////////////
    assert( ptr_selected_device != nullptr );

  //  int ndev = 0; // specifying the id of the device to be used
    cl::CommandQueue queue(context, *ptr_selected_device,CL_QUEUE_PROFILING_ENABLE);
    // Compile OpenCL program for found devices.
			cl:: Program program(context, kernel_source); //string  kernel_source contains the kernel(s) read from the file

#if 0
/////////////////////// Alternative 1 for including the kernels written in a separate file -- works perfectly fine /////////////////////////////////
			cl:: Program program(context, "#include \"../kernels.cl\" ", false); // the path inside the #include should be relative to an include directory specified using -Ipath/to/dir specified via build options.. otherwise give the absolute path.
#endif

#if 0
/////////////////////// The way to go if the string source[] contains the source in the same file as this.

//    cl::Program program(context, cl::Program::Sources(
//        1, std::make_pair(source, strlen(source))
//        ));
#endif


    try {
    std::string incls = "-D_GPUCODE=1 -D__NAMESPACE=st_ -I" + std::string(NS(PATH_TO_BASE_DIR)) ;
  //  std::cout << "Path = " << incls << std::endl;
    //program.build(devices, "-D_GPUCODE=1 -D__NAMESPACE=st_ -I/home/sosingh/sixtracklib_gsoc18/initial_test/sixtrack-v0/external/include");
    program.build( incls.c_str() );
    } catch (const cl::Error&) {
    std::cerr
      << "OpenCL compilation error" << std::endl
      << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*ptr_selected_device)
      << std::endl;
    throw;
    }




    cl::Buffer B(context, CL_MEM_READ_WRITE, st_Blocks_get_total_num_bytes( &beam_elements  )); // input vector
 queue.enqueueWriteBuffer( B, CL_TRUE, 0, st_Blocks_get_total_num_bytes( &beam_elements ), st_Blocks_get_const_data_begin( &beam_elements ) );




   ////////////////////////// Particles ////////////////////////////////
    st_block_size_t const NUM_PARTICLE_BLOCKS     = 1u;
    st_block_size_t const PARTICLES_DATA_CAPACITY = 1048576u*1000*4; //  ~(4 GB)
    st_block_size_t const NUM_PARTICLES           = atoi(argv[1]); // 100u;

    st_Blocks particles_buffer;
    st_Blocks_preset( &particles_buffer );

    ret = st_Blocks_init(
        &particles_buffer, NUM_PARTICLE_BLOCKS, PARTICLES_DATA_CAPACITY );

    assert( ret == 0 );

    st_Particles* particles = st_Blocks_add_particles(
        &particles_buffer, NUM_PARTICLES );

    if( particles != nullptr )
    {
        /* Just some random values assigned to the individual attributes
         * of the acutal particles -> these values do not make any
         * sense physically, but should be safe for calculating maps ->
         * please check with the map for drift whether they do not produce
         * some NaN's at the sqrt or divisions by 0 though!*/

        std::mt19937_64  prng( 20180622 );

        std::uniform_real_distribution<> x_distribution(  0.05, 1.0 );
        std::uniform_real_distribution<> y_distribution(  0.05, 1.0 );
        std::uniform_real_distribution<> px_distribution( 0.05, 0.2 );
        std::uniform_real_distribution<> py_distribution( 0.05, 0.2 );
        std::uniform_real_distribution<> sigma_distribution( 0.01, 0.5 );

        assert( particles->s     != nullptr );
        assert( particles->x     != nullptr );
        assert( particles->y     != nullptr );
        assert( particles->px    != nullptr );
        assert( particles->py    != nullptr );
        assert( particles->sigma != nullptr );
        assert( particles->rpp   != nullptr );
        assert( particles->rvv   != nullptr );

        assert( particles->num_of_particles == (int)NUM_PARTICLES );

        for( st_block_size_t ii = 0 ; ii < NUM_PARTICLES ; ++ii )
        {
            particles->s[ ii ]     = 0.0;
            particles->x[ ii ]     = x_distribution( prng );
            particles->y[ ii ]     = y_distribution( prng );
            particles->px[ ii ]    = px_distribution( prng );
            particles->py[ ii ]    = py_distribution( prng );
            particles->sigma[ ii ] = sigma_distribution( prng );
            particles->rpp[ ii ]   = 1.0;
            particles->rvv[ ii ]   = 1.0;
        }
    }

    ret = st_Blocks_serialize( &particles_buffer );
    assert( ret == 0 );

    /* ===================================================================== */
    /* Copy to other buffer to simulate working on the GPU */
    //std::cout << "On the GPU:\n";

  // Allocate device buffers and transfer input data to device.

    cl::Buffer C(context, CL_MEM_READ_WRITE, st_Blocks_get_total_num_bytes( &particles_buffer )); // input vector
		queue.enqueueWriteBuffer( C, CL_TRUE, 0, st_Blocks_get_total_num_bytes( &particles_buffer ), st_Blocks_get_const_data_begin( &particles_buffer ) );

    int numThreads = 1;
    int blockSize = 1;
    cl::Kernel unserialize(program, "unserialize");
    unserialize.setArg(0,B);
    unserialize.setArg(1,C);
    unserialize.setArg(2,NUM_PARTICLES);
    queue.enqueueNDRangeKernel(
    unserialize, cl::NullRange, cl::NDRange( numThreads ),
    cl::NDRange(blockSize ));
    queue.flush();
    queue.finish();



      // creating a buffer to transfer the data from GPU to CPU

      std::vector< uint8_t > copy_particles_buffer_host(st_Blocks_get_total_num_bytes( &particles_buffer )/sizeof(uint8_t));  // output vector

      queue.enqueueReadBuffer(C, CL_TRUE, 0, copy_particles_buffer_host.size() * sizeof(uint8_t), copy_particles_buffer_host.data());
      queue.flush();

    st_Blocks copy_particles_buffer;
    st_Blocks_preset( &copy_particles_buffer );

    ret = st_Blocks_unserialize( &copy_particles_buffer, copy_particles_buffer_host.data() );
    assert( ret == 0 );



    SIXTRL_UINT64_T const NUM_TURNS = atoi(argv[2]);//100;
    SIXTRL_UINT64_T offset = 0;
    cl::Event event;
  
  switch (choice)
  {
    case 1 :
    {
    cl::Kernel track_drift_particle(program, "track_drift_particle");
    blockSize = track_drift_particle.getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >( *ptr_selected_device);// determine the work-group size
    numThreads = ((NUM_PARTICLES+blockSize-1)/blockSize) * blockSize; // rounding off NUM_PARTICLES to the next nearest multiple of blockSize. This is to ensure that there are integer number of work-groups launched
    std::cout << blockSize << " " << numThreads<< std::endl;
    track_drift_particle.setArg(0,B);
    track_drift_particle.setArg(1,C);
    track_drift_particle.setArg(2,NUM_PARTICLES);
    track_drift_particle.setArg(3,NUM_TURNS);
    track_drift_particle.setArg(4,offset);



    queue.enqueueNDRangeKernel(
    track_drift_particle, cl::NullRange, cl::NDRange( numThreads ),
    cl::NDRange(blockSize ), nullptr, &event);
    queue.flush();
    event.wait();
    queue.finish();

        cl_ulong when_kernel_queued    = 0;
        cl_ulong when_kernel_submitted = 0;
        cl_ulong when_kernel_started   = 0;
        cl_ulong when_kernel_ended     = 0;

        ret  = event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_QUEUED, &when_kernel_queued );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_SUBMIT, &when_kernel_submitted );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_START, &when_kernel_started );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_END, &when_kernel_ended );

        assert( ret == CL_SUCCESS ); // all ret's should be 1

         double const kernel_time_elapsed = when_kernel_ended - when_kernel_started;
        exec_time_drift.push_back(kernel_time_elapsed);
        if( ll > 5 ) {
          num_of_turns_drift += 1.0;
          average_execution_time_drift += (kernel_time_elapsed - average_execution_time_drift)/num_of_turns_drift;
      }
    //  break;
    }
    case 2:
    {

    offset = 250;
  //  cl::Event event;
    cl::Kernel track_drift_exact_particle(program, "track_drift_exact_particle");
    blockSize = track_drift_exact_particle.getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >( *ptr_selected_device);// determine the work-group size
    numThreads = ((NUM_PARTICLES+blockSize-1)/blockSize) * blockSize; // rounding off NUM_PARTICLES to the next nearest multiple of blockSize. This is to ensure that there are integer number of work-groups launched
    std::cout << blockSize << " " << numThreads<< std::endl;
    track_drift_exact_particle.setArg(0,B);
    track_drift_exact_particle.setArg(1,C);
    track_drift_exact_particle.setArg(2,NUM_PARTICLES);
    track_drift_exact_particle.setArg(3,NUM_TURNS);
    track_drift_exact_particle.setArg(4,offset);
    queue.enqueueNDRangeKernel(
    track_drift_exact_particle, cl::NullRange, cl::NDRange( numThreads ),
    cl::NDRange(blockSize ), nullptr, &event);
    queue.flush();
    event.wait();
    queue.finish();
        cl_ulong when_kernel_queued    = 0;
        cl_ulong when_kernel_submitted = 0;
        cl_ulong when_kernel_started   = 0;
        cl_ulong when_kernel_ended     = 0;

        ret  = event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_QUEUED, &when_kernel_queued );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_SUBMIT, &when_kernel_submitted );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_START, &when_kernel_started );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_END, &when_kernel_ended );

        assert( ret == CL_SUCCESS ); // all ret's should be 1

        double const kernel_time_elapsed = when_kernel_ended - when_kernel_started;
        exec_time_drift_exact.push_back(kernel_time_elapsed);
        if( ll > 5 ) {
          num_of_turns_drift_exact += 1.0;
          average_execution_time_drift_exact += (kernel_time_elapsed - average_execution_time_drift_exact)/num_of_turns_drift_exact;
      }
    //break;
    }
    case 3:
    {
    offset = 500;
   // cl::Event event;
    cl::Kernel track_cavity_particle(program, "track_cavity_particle");
    blockSize = track_cavity_particle.getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >( *ptr_selected_device);// determine the work-group size
    numThreads = ((NUM_PARTICLES+blockSize-1)/blockSize) * blockSize; // rounding off NUM_PARTICLES to the next nearest multiple of blockSize. This is to ensure that there are integer number of work-groups launched
    std::cout << blockSize << " " << numThreads<< std::endl;
    track_cavity_particle.setArg(0,B);
    track_cavity_particle.setArg(1,C);
    track_cavity_particle.setArg(2,NUM_PARTICLES);
    track_cavity_particle.setArg(3,NUM_TURNS);
    track_cavity_particle.setArg(4,offset);
    queue.enqueueNDRangeKernel(
    track_cavity_particle, cl::NullRange, cl::NDRange( numThreads ),
    cl::NDRange(blockSize ), nullptr, &event);
    queue.flush();
    event.wait();
    queue.finish();
        cl_ulong when_kernel_queued    = 0;
        cl_ulong when_kernel_submitted = 0;
        cl_ulong when_kernel_started   = 0;
        cl_ulong when_kernel_ended     = 0;

        ret  = event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_QUEUED, &when_kernel_queued );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_SUBMIT, &when_kernel_submitted );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_START, &when_kernel_started );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_END, &when_kernel_ended );

        assert( ret == CL_SUCCESS ); // all ret's should be 1

        double const kernel_time_elapsed = when_kernel_ended - when_kernel_started;
        exec_time_cavity.push_back(kernel_time_elapsed);
        if( ll > 5 ) {
          num_of_turns_cavity += 1.0;
          average_execution_time_cavity += (kernel_time_elapsed - average_execution_time_cavity)/num_of_turns_cavity;
      }
  //  break;
    }
    case 4:
    {
    //cl::Event event;
    offset = 750;
    cl::Kernel track_align_particle(program, "track_align_particle");
    blockSize = track_align_particle.getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >( *ptr_selected_device);// determine the work-group size
    numThreads = ((NUM_PARTICLES+blockSize-1)/blockSize) * blockSize; // rounding off NUM_PARTICLES to the next nearest multiple of blockSize. This is to ensure that there are integer number of work-groups launched
    std::cout << blockSize << " " << numThreads<< std::endl;
    track_align_particle.setArg(0,B);
    track_align_particle.setArg(1,C);
    track_align_particle.setArg(2,NUM_PARTICLES);
    track_align_particle.setArg(3,NUM_TURNS);
    track_align_particle.setArg(4,offset);
    queue.enqueueNDRangeKernel(
    track_align_particle, cl::NullRange, cl::NDRange( numThreads ),
    cl::NDRange(blockSize ), nullptr, &event);
    queue.flush();
    event.wait();
    queue.finish();
        cl_ulong when_kernel_queued    = 0;
        cl_ulong when_kernel_submitted = 0;
        cl_ulong when_kernel_started   = 0;
        cl_ulong when_kernel_ended     = 0;

        ret  = event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_QUEUED, &when_kernel_queued );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_SUBMIT, &when_kernel_submitted );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_START, &when_kernel_started );

        ret |= event.getProfilingInfo< cl_ulong >(
          CL_PROFILING_COMMAND_END, &when_kernel_ended );

        assert( ret == CL_SUCCESS ); // all ret's should be 1

         double const kernel_time_elapsed = when_kernel_ended - when_kernel_started;
        exec_time_align.push_back(kernel_time_elapsed);
        if( ll > 5 ) {
          num_of_turns_align += 1.0;
          average_execution_time_align += (kernel_time_elapsed - average_execution_time_align)/num_of_turns_align;
      }
   // break;
    }
 }; // end of switch case
      queue.enqueueReadBuffer(C, CL_TRUE, 0, copy_particles_buffer_host.size() * sizeof(uint8_t), copy_particles_buffer_host.data());
      queue.flush();

    //st_Blocks copy_particles_buffer;
    st_Blocks_preset( &copy_particles_buffer );

    ret = st_Blocks_unserialize( &copy_particles_buffer, copy_particles_buffer_host.data() );
    assert( ret == 0 );

    /* on the GPU, these pointers will have __global as a decorator */

#if 0
    // On the CPU after copying the data back from the GPU
    std::cout << "\n On the Host, after applying the drift_track_particles mapping and copying from the GPU\n";

    SIXTRL_GLOBAL_DEC st_BlockInfo const* itr  =
        st_Blocks_get_const_block_infos_begin( &copy_particles_buffer );

    SIXTRL_GLOBAL_DEC st_BlockInfo const* endr =
        st_Blocks_get_const_block_infos_end( &copy_particles_buffer );

    for( ; itr != endr ; ++itr )
    {
        SIXTRL_GLOBAL_DEC st_Particles const* particles =
            ( SIXTRL_GLOBAL_DEC st_Particles const* )itr->begin;

        std::cout.precision( 4 );

        for( st_block_size_t ii = 0 ; ii < NUM_PARTICLES ; ++ii )
        {
            std::cout << " ii    = " << std::setw( 6 ) << ii
                      << std::fixed
                      << " | s     = " << std::setw( 6 ) << particles->s[ ii ]
                      << " | x     = " << std::setw( 6 ) << particles->x[ ii ]
                      << " | y     = " << std::setw( 6 ) << particles->y[ ii ]
                      << " | px    = " << std::setw( 6 ) << particles->px[ ii ]
                      << " | py    = " << std::setw( 6 ) << particles->py[ ii ]
                      << " | sigma = " << std::setw( 6 ) << particles->sigma[ ii ]
                      << " | rpp   = " << std::setw( 6 ) << particles->rpp[ ii ]
                      << " | rvv   = " << std::setw( 6 ) << particles->rvv[ ii ]
                      << "\r\n";
        }
    }

#endif
    std::cout.flush();
    st_Blocks_free( &particles_buffer );
    st_Blocks_free( &copy_particles_buffer );
  } // end of the NUM_REPETITIONS 'for' loop
  switch(choice)
  {
    case 1:
    {
      // printing the contents of the exec_time vector
    std::cout << "track_drift_particle" << std::endl;
    for(std::vector<double>::iterator it = exec_time_drift.begin(); it != exec_time_drift.end(); ++it)
      printf("%.3f s%c",(*it)*1.0e-9, ",\n"[it+1 == exec_time_drift.end()]);
		printf("Reference Version : Time = %.3f s; \n",average_execution_time_drift*1.0e-9);
    //break;
    }
    case 2:
    {
    std::cout << "track_drift_exact_particle" << std::endl;
    for(std::vector<double>::iterator it = exec_time_drift_exact.begin(); it != exec_time_drift_exact.end(); ++it)
      printf("%.3f s%c",(*it)*1.0e-9, ",\n"[it+1 == exec_time_drift_exact.end()]);
		printf("Reference Version: Time = %.3f s; \n",average_execution_time_drift_exact*1.0e-9);
    //break;
    }
    case 3:
    {
    std::cout << "track_cavity_particle" << std::endl;
    for(std::vector<double>::iterator it = exec_time_cavity.begin(); it != exec_time_cavity.end(); ++it)
      printf("%.3f s%c",(*it)*1.0e-9, ",\n"[it+1 == exec_time_cavity.end()]);
		printf("Reference Version: Time = %.3f s; \n",average_execution_time_cavity*1.0e-9);
   // break;
    }
    case 4:
    {
    std::cout << "track_align_particle" << std::endl;
    for(std::vector<double>::iterator it = exec_time_align.begin(); it != exec_time_align.end(); ++it)
      printf("%.3f s%c",(*it)*1.0e-9, ",\n"[it+1 == exec_time_align.end()]);
		printf("Reference Version: Time = %.3f s; \n",average_execution_time_align*1.0e-9);
    break;
    }
  };
    return 0;

  }
コード例 #5
0
ファイル: Gym_Emu.cpp プロジェクト: IamusNavarathna/lv3proj
	blargg_err_t track_info_( track_info_t* out, int ) const
	{
		int length = gym_track_length( &file_begin() [data_offset], file_end() );
		get_gym_info( *(Gym_Emu::header_t const*) file_begin(), length, out );
		return blargg_ok;
	}
コード例 #6
0
ファイル: Hes_Emu.cpp プロジェクト: Kinglions/modizer
	blargg_err_t hash_( Hash_Function& out ) const
	{
		hash_hes_file( h->header, file_begin() + h->header.size, file_end() - file_begin() - h->header.size, out );
		return blargg_ok;
	}
コード例 #7
0
ファイル: Spc_Emu.cpp プロジェクト: AriaAsuka/deadbeef
inline byte const* Spc_Emu::trailer_() const { return &file_begin() [min( file_size(), trailer_offset )]; }
コード例 #8
0
ファイル: Kss_Emu.cpp プロジェクト: wbb1977/modo_android
	blargg_err_t hash_( Hash_Function& out ) const
	{
		hash_kss_file( *header_, file_begin() + Kss_Core::header_t::base_size, file_end() - file_begin() - Kss_Core::header_t::base_size, out );
		return blargg_ok;
	}
コード例 #9
0
ファイル: Spc_Sfm.cpp プロジェクト: iOSAppList/modizer
blargg_err_t Sfm_Emu::hash_( Hash_Function& out ) const
{
    hash_sfm_file( file_begin(), file_size(), out );
    return blargg_ok;
}
コード例 #10
0
ファイル: Spc_Sfm.cpp プロジェクト: iOSAppList/modizer
blargg_err_t Sfm_Emu::start_track_( int track )
{
    RETURN_ERR( Music_Emu::start_track_( track ) );
    resampler.clear();
    filter.clear();
    const byte * ptr = file_begin();
    int metadata_size = get_le32(ptr + 4);
    if ( file_size() < metadata_size + Sfm_Emu::sfm_min_file_size )
        return "SFM file too small";
    char * temp = new char[metadata_size + 1];
    temp[metadata_size] = '\0';
    memcpy(temp, ptr + 8, metadata_size);
    metadata.parseDocument(temp);
    delete [] temp;
    
    apu.init_rom( ipl_rom );
    
    apu.reset();
    
    memcpy( apu.m.ram.ram, ptr + 8 + metadata_size, 65536 );
    
    memcpy( apu.dsp.m.regs, ptr + 8 + metadata_size + 65536, 128 );
    
    apu.set_sfm_queue( ptr + 8 + metadata_size + 65536 + 128, ptr + file_size() );
    
    byte regs[Snes_Spc::reg_count] = {0};
    
    char * end;
    const char * value;
    
    regs[Snes_Spc::r_test] = META_ENUM_INT("smp:test");
    regs[Snes_Spc::r_control] |= META_ENUM_INT("smp:iplrom") ? 0x80 : 0;
    regs[Snes_Spc::r_dspaddr] = META_ENUM_INT("smp:dspaddr");
    
    value = metadata.enumValue("smp:ram");
    if (value)
    {
        regs[Snes_Spc::r_f8] = strtoul(value, &end, 10);
        if (*end)
        {
            value = end + 1;
            regs[Snes_Spc::r_f9] = strtoul(value, &end, 10);
        }
    }
    
    char temp_path[256];
    for (int i = 0; i < 3; ++i)
    {
        sprintf(temp_path, "smp:timer[%u]:", i);
        size_t length = strlen(temp_path);
        strcpy(temp_path + length, "enable");
        value = metadata.enumValue(temp_path);
        if (value)
        {
            regs[Snes_Spc::r_control] |= strtoul(value, &end, 10) ? 1 << i : 0;
        }
        strcpy(temp_path + length, "target");
        value = metadata.enumValue(temp_path);
        if (value)
        {
            regs[Snes_Spc::r_t0target + i] = strtoul(value, &end, 10);
        }
        strcpy(temp_path + length, "stage");
        value = metadata.enumValue(temp_path);
        if (value)
        {
            for (int j = 0; j < 3; ++j)
            {
                if (value) value = strchr(value, ',');
                if (value) ++value;
            }
            if (value)
            {
                regs[Snes_Spc::r_t0out + i] = strtoul(value, &end, 10);
            }
        }
    }
    
    apu.load_regs( regs );
    apu.m.rom_enabled = 0;
    apu.regs_loaded();
    
    for (int i = 0; i < 3; ++i)
    {
        sprintf(temp_path, "smp:timer[%u]:", i);
        size_t length = strlen(temp_path);
        strcpy(temp_path + length, "stage");
        value = metadata.enumValue(temp_path);
        if (value)
        {
            const char * stage = value;
            apu.m.timers[i].next_time = strtoul(stage, &end, 10) + 1;
            for (int j = 0; j < 2; ++j)
            {
                if (stage) stage = strchr(stage, ',');
                if (stage) ++stage;
            }
            if (stage)
            {
                apu.m.timers[i].divider = strtoul(value, &end, 10);
            }
        }
    }
    
    apu.dsp.m.echo_hist_pos = &apu.dsp.m.echo_hist[META_ENUM_INT("dsp:echohistaddr")];
    
    value = metadata.enumValue("dsp:echohistdata");
    if (value)
    {
        for (int i = 0; i < 8; ++i)
        {
            apu.dsp.m.echo_hist[i][0] = strtoul(value, &end, 10);
            value = strchr(value, ',');
            if (!value) break;
            ++value;
            apu.dsp.m.echo_hist[i][1] = strtoul(value, &end, 10);
            value = strchr(value, ',');
            if (!value) break;
            ++value;
        }
    }
    
    apu.dsp.m.phase = META_ENUM_INT("dsp:sample");
    apu.dsp.m.kon = META_ENUM_INT("dsp:kon");
    apu.dsp.m.noise = META_ENUM_INT("dsp:noise");
    apu.dsp.m.counter = META_ENUM_INT("dsp:counter");
    apu.dsp.m.echo_offset = META_ENUM_INT("dsp:echooffset");
    apu.dsp.m.echo_length = META_ENUM_INT("dsp:echolength");
    apu.dsp.m.new_kon = META_ENUM_INT("dsp:koncache");
    apu.dsp.m.endx_buf = META_ENUM_INT("dsp:endx");
    apu.dsp.m.envx_buf = META_ENUM_INT("dsp:envx");
    apu.dsp.m.outx_buf = META_ENUM_INT("dsp:outx");
    apu.dsp.m.t_pmon = META_ENUM_INT("dsp:pmon");
    apu.dsp.m.t_non = META_ENUM_INT("dsp:non");
    apu.dsp.m.t_eon = META_ENUM_INT("dsp:eon");
    apu.dsp.m.t_dir = META_ENUM_INT("dsp:dir");
    apu.dsp.m.t_koff = META_ENUM_INT("dsp:koff");
    apu.dsp.m.t_brr_next_addr = META_ENUM_INT("dsp:brrnext");
    apu.dsp.m.t_adsr0 = META_ENUM_INT("dsp:adsr0");
    apu.dsp.m.t_brr_header = META_ENUM_INT("dsp:brrheader");
    apu.dsp.m.t_brr_byte = META_ENUM_INT("dsp:brrdata");
    apu.dsp.m.t_srcn = META_ENUM_INT("dsp:srcn");
    apu.dsp.m.t_esa = META_ENUM_INT("dsp:esa");
    apu.dsp.m.t_echo_enabled = !META_ENUM_INT("dsp:echodisable");
    apu.dsp.m.t_dir_addr = META_ENUM_INT("dsp:diraddr");
    apu.dsp.m.t_pitch = META_ENUM_INT("dsp:pitch");
    apu.dsp.m.t_output = META_ENUM_INT("dsp:output");
    apu.dsp.m.t_looped = META_ENUM_INT("dsp:looped");
    apu.dsp.m.t_echo_ptr = META_ENUM_INT("dsp:echoaddr");
    
    
#define META_ENUM_LEVELS(n, o) \
value = metadata.enumValue(n); \
if (value) \
{ \
(o)[0] = strtoul(value, &end, 10); \
if (*end) \
{ \
value = end + 1; \
(o)[1] = strtoul(value, &end, 10); \
} \
}
    
    META_ENUM_LEVELS("dsp:mainout", apu.dsp.m.t_main_out);
    META_ENUM_LEVELS("dsp:echoout", apu.dsp.m.t_echo_out);
    META_ENUM_LEVELS("dsp:echoin", apu.dsp.m.t_echo_in);
    
#undef META_ENUM_LEVELS
    
    for (int i = 0; i < 8; ++i)
    {
        sprintf(temp_path, "dsp:voice[%u]:", i);
        size_t length = strlen(temp_path);
        Spc_Dsp::voice_t & voice = apu.dsp.m.voices[i];
        strcpy(temp_path + length, "brrhistaddr");
        value = metadata.enumValue(temp_path);
        if (value)
        {
            voice.buf_pos = strtoul(value, &end, 10);
        }
        strcpy(temp_path + length, "brrhistdata");
        value = metadata.enumValue(temp_path);
        if (value)
        {
            for (int j = 0; j < Spc_Dsp::brr_buf_size; ++j)
            {
                voice.buf[j] = voice.buf[j + Spc_Dsp::brr_buf_size] = strtoul(value, &end, 10);
                if (!*end) break;
                value = end + 1;
            }
        }
        strcpy(temp_path + length, "interpaddr");
        voice.interp_pos = META_ENUM_INT(temp_path);
        strcpy(temp_path + length, "brraddr");
        voice.brr_addr = META_ENUM_INT(temp_path);
        strcpy(temp_path + length, "brroffset");
        voice.brr_offset = META_ENUM_INT(temp_path);
        strcpy(temp_path + length, "vbit");
        voice.vbit = META_ENUM_INT(temp_path);
        strcpy(temp_path + length, "vidx");
        voice.regs = &apu.dsp.m.regs[META_ENUM_INT(temp_path)];
        strcpy(temp_path + length, "kondelay");
        voice.kon_delay = META_ENUM_INT(temp_path);
        strcpy(temp_path + length, "envmode");
        voice.env_mode = (Spc_Dsp::env_mode_t) META_ENUM_INT(temp_path);
        strcpy(temp_path + length, "env");
        voice.env = META_ENUM_INT(temp_path);
        strcpy(temp_path + length, "envxout");
        voice.t_envx_out = META_ENUM_INT(temp_path);
        strcpy(temp_path + length, "envcache");
        voice.hidden_env = META_ENUM_INT(temp_path);
    }
    
    filter.set_gain( (int) (gain() * Spc_Filter::gain_unit) );
    apu.clear_echo( true );
    return blargg_ok;
}