blargg_err_t hash_( Hash_Function& out ) const { Gym_Emu::header_t const* h = ( Gym_Emu::header_t const* ) file_begin(); byte const* data = &file_begin() [data_offset]; hash_gym_file( *h, data, file_end() - data, out ); return (blargg_err_t)blargg_ok; }
blargg_err_t Vgm_Emu::hash_( Hash_Function& out ) const { byte const* p = file_begin() + header().size(); byte const* e = file_end(); int data_offset = get_le32( header().data_offset ); if ( data_offset ) p += data_offset + offsetof( header_t, data_offset ) - header().size(); int gd3_offset = get_le32( header().gd3_offset ); if ( gd3_offset > 0 && gd3_offset + offsetof( header_t, gd3_offset ) > data_offset + offsetof( header_t, data_offset ) ) e = file_begin() + gd3_offset + offsetof( header_t, gd3_offset ); hash_vgm_file( header(), p, e - p, out ); return (blargg_err_t)blargg_ok; }
blargg_err_t hash_( Hash_Function& out ) const { hash_gbs_file( *h, file_begin() + h->size, file_end() - file_begin() - h->size, out ); return blargg_ok; }
int main(int argc, char** argv) { if(argc < 3) { std::cerr << "Usage: " << argv[0] << " < #particles > < #turns > [deviceIdx]" << std::endl; exit(1); } int NUM_REPETITIONS = 10; double num_of_turns_drift = 0.0; // for timing double num_of_turns_drift_exact = 0.0; // for timing double num_of_turns_cavity = 0.0; // for timing double num_of_turns_align = 0.0; // for timing double average_execution_time_drift = 0.0; double average_execution_time_drift_exact = 0.0; double average_execution_time_cavity = 0.0; double average_execution_time_align = 0.0; std::vector<double> exec_time_drift; std::vector<double> exec_time_drift_exact; std::vector<double> exec_time_cavity; std::vector<double> exec_time_align; int choice = 1; for(int ll = 0; ll < NUM_REPETITIONS; ++ll) { /* We will use 9+ beam element blocks in this example and do not * care to be memory efficient yet; thus we make the blocks for * beam elements and particles big enough to avoid running into problems */ constexpr st_block_size_t const MAX_NUM_BEAM_ELEMENTS = 1000u; // 20u; constexpr st_block_size_t const NUM_OF_BEAM_ELEMENTS = 1000u; //9u; /* 1MByte is plenty of space */ constexpr st_block_size_t const BEAM_ELEMENTS_DATA_CAPACITY = 1048576u; /* Prepare and init the beam elements buffer */ st_Blocks beam_elements; st_Blocks_preset( &beam_elements ); int ret = st_Blocks_init( &beam_elements, MAX_NUM_BEAM_ELEMENTS, BEAM_ELEMENTS_DATA_CAPACITY ); assert( ret == 0 ); /* if there was an error, ret would be != 0 */ /* Add NUM_OF_BEAM_ELEMENTS drifts to the buffer. For this example, let's * just have one simple constant length for all of them: */ // One-fourth of the beam-elements are drift-elements for( st_block_size_t ii = 0 ; ii < NUM_OF_BEAM_ELEMENTS/4 ; ++ii ) { double const drift_length = double{ 0.2L }; st_Drift* drift = st_Blocks_add_drift( &beam_elements, drift_length ); (void)drift; // using the variable with a no-op assert( drift != nullptr ); /* Otherwise, there was a problem! */ } /* Check if we *really* have the correct number of beam elements and * if they really are all drifts */ assert( st_Blocks_get_num_of_blocks( &beam_elements ) == NUM_OF_BEAM_ELEMENTS/4 ); /* The beam_elements container is currently not serialized yet -> * we could still add blocks to the buffer. Let's jus do this and * add a different kind of beam element to keep it easier apart! */ for( st_block_size_t ii = NUM_OF_BEAM_ELEMENTS/4 ; ii < NUM_OF_BEAM_ELEMENTS/2 ; ++ii ) { double const drift_length = double{ 0.1L }; st_DriftExact* drift_exact = st_Blocks_add_drift_exact( &beam_elements, drift_length ); (void) drift_exact; assert( drift_exact != nullptr ); } assert( st_Blocks_get_num_of_blocks( &beam_elements ) == ( NUM_OF_BEAM_ELEMENTS*0.5) ); /* Adding the beam element 'cavity' */ for( st_block_size_t ii = NUM_OF_BEAM_ELEMENTS*0.5 ; ii < NUM_OF_BEAM_ELEMENTS*0.75 ; ++ii ) { double const voltage = double{ 1e4}; double const frequency = double{ 40}; double const lag = double{ 0.01L}; st_Cavity* cavity = st_Blocks_add_cavity( &beam_elements, voltage, frequency, lag); (void) cavity; // a no-op assert( cavity != nullptr ); /* Otherwise, there was a problem! */ } assert( st_Blocks_get_num_of_blocks( &beam_elements ) == ( NUM_OF_BEAM_ELEMENTS * 0.75) ); /* Adding the beam element 'align' */ double const M__PI = // note the two underscores between M and PI ( double )3.1415926535897932384626433832795028841971693993751L; for( st_block_size_t ii = NUM_OF_BEAM_ELEMENTS*0.75 ; ii < NUM_OF_BEAM_ELEMENTS ; ++ii ) { double const tilt = double{ 0.5}; double const z = double{ M__PI / 45}; double const dx = double{ 0.2L}; double const dy = double{ 0.2L}; st_Align* align = st_Blocks_add_align( &beam_elements, tilt, cos( z ), sin( z ), dx, dy); (void) align; // a no-op assert( align != nullptr ); /* Otherwise, there was a problem! */ } assert( st_Blocks_get_num_of_blocks( &beam_elements ) == ( NUM_OF_BEAM_ELEMENTS) ); /* Always safely terminate pointer variables pointing to resources they * do not own which we no longer need -> just a good practice */ // drift_exact = nullptr; /* After serialization, the "structure" of the beam_elements buffer is * frozen, but the data in the elements - i.e. the length of the * individual drifts in our example - can still be modified. We will * just not be able to add further blocks to the container */ assert( !st_Blocks_are_serialized( &beam_elements ) ); ret = st_Blocks_serialize( &beam_elements ); assert( ret == 0 ); assert( st_Blocks_are_serialized( &beam_elements ) ); // serialization on CPU done. /* Next, let's iterate over all the beam_elements in the buffer and * print out the properties -> we expect that NUM_OF_BEAM_ELEMENTS * st_Drift with the same length appear and one st_DriftExact with a * different length should appear in the end */ std::cout.flush(); /************************** Preparing grounds for OpenCL *******/ std::vector<cl::Platform> platform; cl::Platform::get(&platform); if( platform.empty() ) { std::cerr << "OpenCL platforms not found." << std::endl; return 1; } std::vector< cl::Device > devices; for( auto const& p : platform ) { std::vector< cl::Device > temp_devices; p.getDevices( CL_DEVICE_TYPE_ALL, &temp_devices ); for( auto const& d : temp_devices ) { if( !d.getInfo< CL_DEVICE_AVAILABLE >() ) continue; devices.push_back( d ); } } cl::Device* ptr_selected_device = nullptr; if( !devices.empty() ) { if( argc >= 4 ) { std::size_t const device_idx = std::atoi( argv[ 3 ] ); if( device_idx < devices.size() ) { ptr_selected_device = &devices[ device_idx ]; } } if( ptr_selected_device == nullptr ) { std::cout << "default selecting device #0" << std::endl; ptr_selected_device = &devices[ 0 ]; } } if( ptr_selected_device != nullptr ) { std::cout << "device: " << ptr_selected_device->getInfo< CL_DEVICE_NAME >() << std::endl; } else return 0; cl::Context context( *ptr_selected_device ); // std::cout << "Device list" << std::endl; // for(unsigned int jj=0; jj<devices.size(); jj++){ // std::cout << "Name of devicei " << jj<<" : "<<devices[jj].getInfo<CL_DEVICE_NAME>() << std::endl; // std::cout << "resolution of device timer for device " << jj <<" : "<<devices[jj].getInfo<CL_DEVICE_PROFILING_TIMER_RESOLUTION>() << std::endl; // }; /**********************************************/ ///////////////////////////////////////////////////////////////////////////////////////////////////////////// // getting the kernel file std::string PATH_TO_KERNEL_FILE( st_PATH_TO_BASE_DIR ); PATH_TO_KERNEL_FILE += "tests/benchmark/sixtracklib/opencl/"; PATH_TO_KERNEL_FILE += "kernels_beam_elements_oneatatime.cl"; std::string kernel_source( "" ); std::ifstream kernel_file( PATH_TO_KERNEL_FILE.c_str(), std::ios::in | std::ios::binary ); if( kernel_file.is_open() ) { std::istreambuf_iterator< char > file_begin( kernel_file.rdbuf() ); std::istreambuf_iterator< char > end_of_file; kernel_source.assign( file_begin, end_of_file ); kernel_file.close(); } //////////////////////////////////////////////////////////////////////////////////////////////////////////// assert( ptr_selected_device != nullptr ); // int ndev = 0; // specifying the id of the device to be used cl::CommandQueue queue(context, *ptr_selected_device,CL_QUEUE_PROFILING_ENABLE); // Compile OpenCL program for found devices. cl:: Program program(context, kernel_source); //string kernel_source contains the kernel(s) read from the file #if 0 /////////////////////// Alternative 1 for including the kernels written in a separate file -- works perfectly fine ///////////////////////////////// cl:: Program program(context, "#include \"../kernels.cl\" ", false); // the path inside the #include should be relative to an include directory specified using -Ipath/to/dir specified via build options.. otherwise give the absolute path. #endif #if 0 /////////////////////// The way to go if the string source[] contains the source in the same file as this. // cl::Program program(context, cl::Program::Sources( // 1, std::make_pair(source, strlen(source)) // )); #endif try { std::string incls = "-D_GPUCODE=1 -D__NAMESPACE=st_ -I" + std::string(NS(PATH_TO_BASE_DIR)) ; // std::cout << "Path = " << incls << std::endl; //program.build(devices, "-D_GPUCODE=1 -D__NAMESPACE=st_ -I/home/sosingh/sixtracklib_gsoc18/initial_test/sixtrack-v0/external/include"); program.build( incls.c_str() ); } catch (const cl::Error&) { std::cerr << "OpenCL compilation error" << std::endl << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(*ptr_selected_device) << std::endl; throw; } cl::Buffer B(context, CL_MEM_READ_WRITE, st_Blocks_get_total_num_bytes( &beam_elements )); // input vector queue.enqueueWriteBuffer( B, CL_TRUE, 0, st_Blocks_get_total_num_bytes( &beam_elements ), st_Blocks_get_const_data_begin( &beam_elements ) ); ////////////////////////// Particles //////////////////////////////// st_block_size_t const NUM_PARTICLE_BLOCKS = 1u; st_block_size_t const PARTICLES_DATA_CAPACITY = 1048576u*1000*4; // ~(4 GB) st_block_size_t const NUM_PARTICLES = atoi(argv[1]); // 100u; st_Blocks particles_buffer; st_Blocks_preset( &particles_buffer ); ret = st_Blocks_init( &particles_buffer, NUM_PARTICLE_BLOCKS, PARTICLES_DATA_CAPACITY ); assert( ret == 0 ); st_Particles* particles = st_Blocks_add_particles( &particles_buffer, NUM_PARTICLES ); if( particles != nullptr ) { /* Just some random values assigned to the individual attributes * of the acutal particles -> these values do not make any * sense physically, but should be safe for calculating maps -> * please check with the map for drift whether they do not produce * some NaN's at the sqrt or divisions by 0 though!*/ std::mt19937_64 prng( 20180622 ); std::uniform_real_distribution<> x_distribution( 0.05, 1.0 ); std::uniform_real_distribution<> y_distribution( 0.05, 1.0 ); std::uniform_real_distribution<> px_distribution( 0.05, 0.2 ); std::uniform_real_distribution<> py_distribution( 0.05, 0.2 ); std::uniform_real_distribution<> sigma_distribution( 0.01, 0.5 ); assert( particles->s != nullptr ); assert( particles->x != nullptr ); assert( particles->y != nullptr ); assert( particles->px != nullptr ); assert( particles->py != nullptr ); assert( particles->sigma != nullptr ); assert( particles->rpp != nullptr ); assert( particles->rvv != nullptr ); assert( particles->num_of_particles == (int)NUM_PARTICLES ); for( st_block_size_t ii = 0 ; ii < NUM_PARTICLES ; ++ii ) { particles->s[ ii ] = 0.0; particles->x[ ii ] = x_distribution( prng ); particles->y[ ii ] = y_distribution( prng ); particles->px[ ii ] = px_distribution( prng ); particles->py[ ii ] = py_distribution( prng ); particles->sigma[ ii ] = sigma_distribution( prng ); particles->rpp[ ii ] = 1.0; particles->rvv[ ii ] = 1.0; } } ret = st_Blocks_serialize( &particles_buffer ); assert( ret == 0 ); /* ===================================================================== */ /* Copy to other buffer to simulate working on the GPU */ //std::cout << "On the GPU:\n"; // Allocate device buffers and transfer input data to device. cl::Buffer C(context, CL_MEM_READ_WRITE, st_Blocks_get_total_num_bytes( &particles_buffer )); // input vector queue.enqueueWriteBuffer( C, CL_TRUE, 0, st_Blocks_get_total_num_bytes( &particles_buffer ), st_Blocks_get_const_data_begin( &particles_buffer ) ); int numThreads = 1; int blockSize = 1; cl::Kernel unserialize(program, "unserialize"); unserialize.setArg(0,B); unserialize.setArg(1,C); unserialize.setArg(2,NUM_PARTICLES); queue.enqueueNDRangeKernel( unserialize, cl::NullRange, cl::NDRange( numThreads ), cl::NDRange(blockSize )); queue.flush(); queue.finish(); // creating a buffer to transfer the data from GPU to CPU std::vector< uint8_t > copy_particles_buffer_host(st_Blocks_get_total_num_bytes( &particles_buffer )/sizeof(uint8_t)); // output vector queue.enqueueReadBuffer(C, CL_TRUE, 0, copy_particles_buffer_host.size() * sizeof(uint8_t), copy_particles_buffer_host.data()); queue.flush(); st_Blocks copy_particles_buffer; st_Blocks_preset( ©_particles_buffer ); ret = st_Blocks_unserialize( ©_particles_buffer, copy_particles_buffer_host.data() ); assert( ret == 0 ); SIXTRL_UINT64_T const NUM_TURNS = atoi(argv[2]);//100; SIXTRL_UINT64_T offset = 0; cl::Event event; switch (choice) { case 1 : { cl::Kernel track_drift_particle(program, "track_drift_particle"); blockSize = track_drift_particle.getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >( *ptr_selected_device);// determine the work-group size numThreads = ((NUM_PARTICLES+blockSize-1)/blockSize) * blockSize; // rounding off NUM_PARTICLES to the next nearest multiple of blockSize. This is to ensure that there are integer number of work-groups launched std::cout << blockSize << " " << numThreads<< std::endl; track_drift_particle.setArg(0,B); track_drift_particle.setArg(1,C); track_drift_particle.setArg(2,NUM_PARTICLES); track_drift_particle.setArg(3,NUM_TURNS); track_drift_particle.setArg(4,offset); queue.enqueueNDRangeKernel( track_drift_particle, cl::NullRange, cl::NDRange( numThreads ), cl::NDRange(blockSize ), nullptr, &event); queue.flush(); event.wait(); queue.finish(); cl_ulong when_kernel_queued = 0; cl_ulong when_kernel_submitted = 0; cl_ulong when_kernel_started = 0; cl_ulong when_kernel_ended = 0; ret = event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_QUEUED, &when_kernel_queued ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_SUBMIT, &when_kernel_submitted ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_START, &when_kernel_started ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_END, &when_kernel_ended ); assert( ret == CL_SUCCESS ); // all ret's should be 1 double const kernel_time_elapsed = when_kernel_ended - when_kernel_started; exec_time_drift.push_back(kernel_time_elapsed); if( ll > 5 ) { num_of_turns_drift += 1.0; average_execution_time_drift += (kernel_time_elapsed - average_execution_time_drift)/num_of_turns_drift; } // break; } case 2: { offset = 250; // cl::Event event; cl::Kernel track_drift_exact_particle(program, "track_drift_exact_particle"); blockSize = track_drift_exact_particle.getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >( *ptr_selected_device);// determine the work-group size numThreads = ((NUM_PARTICLES+blockSize-1)/blockSize) * blockSize; // rounding off NUM_PARTICLES to the next nearest multiple of blockSize. This is to ensure that there are integer number of work-groups launched std::cout << blockSize << " " << numThreads<< std::endl; track_drift_exact_particle.setArg(0,B); track_drift_exact_particle.setArg(1,C); track_drift_exact_particle.setArg(2,NUM_PARTICLES); track_drift_exact_particle.setArg(3,NUM_TURNS); track_drift_exact_particle.setArg(4,offset); queue.enqueueNDRangeKernel( track_drift_exact_particle, cl::NullRange, cl::NDRange( numThreads ), cl::NDRange(blockSize ), nullptr, &event); queue.flush(); event.wait(); queue.finish(); cl_ulong when_kernel_queued = 0; cl_ulong when_kernel_submitted = 0; cl_ulong when_kernel_started = 0; cl_ulong when_kernel_ended = 0; ret = event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_QUEUED, &when_kernel_queued ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_SUBMIT, &when_kernel_submitted ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_START, &when_kernel_started ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_END, &when_kernel_ended ); assert( ret == CL_SUCCESS ); // all ret's should be 1 double const kernel_time_elapsed = when_kernel_ended - when_kernel_started; exec_time_drift_exact.push_back(kernel_time_elapsed); if( ll > 5 ) { num_of_turns_drift_exact += 1.0; average_execution_time_drift_exact += (kernel_time_elapsed - average_execution_time_drift_exact)/num_of_turns_drift_exact; } //break; } case 3: { offset = 500; // cl::Event event; cl::Kernel track_cavity_particle(program, "track_cavity_particle"); blockSize = track_cavity_particle.getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >( *ptr_selected_device);// determine the work-group size numThreads = ((NUM_PARTICLES+blockSize-1)/blockSize) * blockSize; // rounding off NUM_PARTICLES to the next nearest multiple of blockSize. This is to ensure that there are integer number of work-groups launched std::cout << blockSize << " " << numThreads<< std::endl; track_cavity_particle.setArg(0,B); track_cavity_particle.setArg(1,C); track_cavity_particle.setArg(2,NUM_PARTICLES); track_cavity_particle.setArg(3,NUM_TURNS); track_cavity_particle.setArg(4,offset); queue.enqueueNDRangeKernel( track_cavity_particle, cl::NullRange, cl::NDRange( numThreads ), cl::NDRange(blockSize ), nullptr, &event); queue.flush(); event.wait(); queue.finish(); cl_ulong when_kernel_queued = 0; cl_ulong when_kernel_submitted = 0; cl_ulong when_kernel_started = 0; cl_ulong when_kernel_ended = 0; ret = event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_QUEUED, &when_kernel_queued ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_SUBMIT, &when_kernel_submitted ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_START, &when_kernel_started ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_END, &when_kernel_ended ); assert( ret == CL_SUCCESS ); // all ret's should be 1 double const kernel_time_elapsed = when_kernel_ended - when_kernel_started; exec_time_cavity.push_back(kernel_time_elapsed); if( ll > 5 ) { num_of_turns_cavity += 1.0; average_execution_time_cavity += (kernel_time_elapsed - average_execution_time_cavity)/num_of_turns_cavity; } // break; } case 4: { //cl::Event event; offset = 750; cl::Kernel track_align_particle(program, "track_align_particle"); blockSize = track_align_particle.getWorkGroupInfo< CL_KERNEL_WORK_GROUP_SIZE >( *ptr_selected_device);// determine the work-group size numThreads = ((NUM_PARTICLES+blockSize-1)/blockSize) * blockSize; // rounding off NUM_PARTICLES to the next nearest multiple of blockSize. This is to ensure that there are integer number of work-groups launched std::cout << blockSize << " " << numThreads<< std::endl; track_align_particle.setArg(0,B); track_align_particle.setArg(1,C); track_align_particle.setArg(2,NUM_PARTICLES); track_align_particle.setArg(3,NUM_TURNS); track_align_particle.setArg(4,offset); queue.enqueueNDRangeKernel( track_align_particle, cl::NullRange, cl::NDRange( numThreads ), cl::NDRange(blockSize ), nullptr, &event); queue.flush(); event.wait(); queue.finish(); cl_ulong when_kernel_queued = 0; cl_ulong when_kernel_submitted = 0; cl_ulong when_kernel_started = 0; cl_ulong when_kernel_ended = 0; ret = event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_QUEUED, &when_kernel_queued ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_SUBMIT, &when_kernel_submitted ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_START, &when_kernel_started ); ret |= event.getProfilingInfo< cl_ulong >( CL_PROFILING_COMMAND_END, &when_kernel_ended ); assert( ret == CL_SUCCESS ); // all ret's should be 1 double const kernel_time_elapsed = when_kernel_ended - when_kernel_started; exec_time_align.push_back(kernel_time_elapsed); if( ll > 5 ) { num_of_turns_align += 1.0; average_execution_time_align += (kernel_time_elapsed - average_execution_time_align)/num_of_turns_align; } // break; } }; // end of switch case queue.enqueueReadBuffer(C, CL_TRUE, 0, copy_particles_buffer_host.size() * sizeof(uint8_t), copy_particles_buffer_host.data()); queue.flush(); //st_Blocks copy_particles_buffer; st_Blocks_preset( ©_particles_buffer ); ret = st_Blocks_unserialize( ©_particles_buffer, copy_particles_buffer_host.data() ); assert( ret == 0 ); /* on the GPU, these pointers will have __global as a decorator */ #if 0 // On the CPU after copying the data back from the GPU std::cout << "\n On the Host, after applying the drift_track_particles mapping and copying from the GPU\n"; SIXTRL_GLOBAL_DEC st_BlockInfo const* itr = st_Blocks_get_const_block_infos_begin( ©_particles_buffer ); SIXTRL_GLOBAL_DEC st_BlockInfo const* endr = st_Blocks_get_const_block_infos_end( ©_particles_buffer ); for( ; itr != endr ; ++itr ) { SIXTRL_GLOBAL_DEC st_Particles const* particles = ( SIXTRL_GLOBAL_DEC st_Particles const* )itr->begin; std::cout.precision( 4 ); for( st_block_size_t ii = 0 ; ii < NUM_PARTICLES ; ++ii ) { std::cout << " ii = " << std::setw( 6 ) << ii << std::fixed << " | s = " << std::setw( 6 ) << particles->s[ ii ] << " | x = " << std::setw( 6 ) << particles->x[ ii ] << " | y = " << std::setw( 6 ) << particles->y[ ii ] << " | px = " << std::setw( 6 ) << particles->px[ ii ] << " | py = " << std::setw( 6 ) << particles->py[ ii ] << " | sigma = " << std::setw( 6 ) << particles->sigma[ ii ] << " | rpp = " << std::setw( 6 ) << particles->rpp[ ii ] << " | rvv = " << std::setw( 6 ) << particles->rvv[ ii ] << "\r\n"; } } #endif std::cout.flush(); st_Blocks_free( &particles_buffer ); st_Blocks_free( ©_particles_buffer ); } // end of the NUM_REPETITIONS 'for' loop switch(choice) { case 1: { // printing the contents of the exec_time vector std::cout << "track_drift_particle" << std::endl; for(std::vector<double>::iterator it = exec_time_drift.begin(); it != exec_time_drift.end(); ++it) printf("%.3f s%c",(*it)*1.0e-9, ",\n"[it+1 == exec_time_drift.end()]); printf("Reference Version : Time = %.3f s; \n",average_execution_time_drift*1.0e-9); //break; } case 2: { std::cout << "track_drift_exact_particle" << std::endl; for(std::vector<double>::iterator it = exec_time_drift_exact.begin(); it != exec_time_drift_exact.end(); ++it) printf("%.3f s%c",(*it)*1.0e-9, ",\n"[it+1 == exec_time_drift_exact.end()]); printf("Reference Version: Time = %.3f s; \n",average_execution_time_drift_exact*1.0e-9); //break; } case 3: { std::cout << "track_cavity_particle" << std::endl; for(std::vector<double>::iterator it = exec_time_cavity.begin(); it != exec_time_cavity.end(); ++it) printf("%.3f s%c",(*it)*1.0e-9, ",\n"[it+1 == exec_time_cavity.end()]); printf("Reference Version: Time = %.3f s; \n",average_execution_time_cavity*1.0e-9); // break; } case 4: { std::cout << "track_align_particle" << std::endl; for(std::vector<double>::iterator it = exec_time_align.begin(); it != exec_time_align.end(); ++it) printf("%.3f s%c",(*it)*1.0e-9, ",\n"[it+1 == exec_time_align.end()]); printf("Reference Version: Time = %.3f s; \n",average_execution_time_align*1.0e-9); break; } }; return 0; }
blargg_err_t track_info_( track_info_t* out, int ) const { int length = gym_track_length( &file_begin() [data_offset], file_end() ); get_gym_info( *(Gym_Emu::header_t const*) file_begin(), length, out ); return blargg_ok; }
blargg_err_t hash_( Hash_Function& out ) const { hash_hes_file( h->header, file_begin() + h->header.size, file_end() - file_begin() - h->header.size, out ); return blargg_ok; }
inline byte const* Spc_Emu::trailer_() const { return &file_begin() [min( file_size(), trailer_offset )]; }
blargg_err_t hash_( Hash_Function& out ) const { hash_kss_file( *header_, file_begin() + Kss_Core::header_t::base_size, file_end() - file_begin() - Kss_Core::header_t::base_size, out ); return blargg_ok; }
blargg_err_t Sfm_Emu::hash_( Hash_Function& out ) const { hash_sfm_file( file_begin(), file_size(), out ); return blargg_ok; }
blargg_err_t Sfm_Emu::start_track_( int track ) { RETURN_ERR( Music_Emu::start_track_( track ) ); resampler.clear(); filter.clear(); const byte * ptr = file_begin(); int metadata_size = get_le32(ptr + 4); if ( file_size() < metadata_size + Sfm_Emu::sfm_min_file_size ) return "SFM file too small"; char * temp = new char[metadata_size + 1]; temp[metadata_size] = '\0'; memcpy(temp, ptr + 8, metadata_size); metadata.parseDocument(temp); delete [] temp; apu.init_rom( ipl_rom ); apu.reset(); memcpy( apu.m.ram.ram, ptr + 8 + metadata_size, 65536 ); memcpy( apu.dsp.m.regs, ptr + 8 + metadata_size + 65536, 128 ); apu.set_sfm_queue( ptr + 8 + metadata_size + 65536 + 128, ptr + file_size() ); byte regs[Snes_Spc::reg_count] = {0}; char * end; const char * value; regs[Snes_Spc::r_test] = META_ENUM_INT("smp:test"); regs[Snes_Spc::r_control] |= META_ENUM_INT("smp:iplrom") ? 0x80 : 0; regs[Snes_Spc::r_dspaddr] = META_ENUM_INT("smp:dspaddr"); value = metadata.enumValue("smp:ram"); if (value) { regs[Snes_Spc::r_f8] = strtoul(value, &end, 10); if (*end) { value = end + 1; regs[Snes_Spc::r_f9] = strtoul(value, &end, 10); } } char temp_path[256]; for (int i = 0; i < 3; ++i) { sprintf(temp_path, "smp:timer[%u]:", i); size_t length = strlen(temp_path); strcpy(temp_path + length, "enable"); value = metadata.enumValue(temp_path); if (value) { regs[Snes_Spc::r_control] |= strtoul(value, &end, 10) ? 1 << i : 0; } strcpy(temp_path + length, "target"); value = metadata.enumValue(temp_path); if (value) { regs[Snes_Spc::r_t0target + i] = strtoul(value, &end, 10); } strcpy(temp_path + length, "stage"); value = metadata.enumValue(temp_path); if (value) { for (int j = 0; j < 3; ++j) { if (value) value = strchr(value, ','); if (value) ++value; } if (value) { regs[Snes_Spc::r_t0out + i] = strtoul(value, &end, 10); } } } apu.load_regs( regs ); apu.m.rom_enabled = 0; apu.regs_loaded(); for (int i = 0; i < 3; ++i) { sprintf(temp_path, "smp:timer[%u]:", i); size_t length = strlen(temp_path); strcpy(temp_path + length, "stage"); value = metadata.enumValue(temp_path); if (value) { const char * stage = value; apu.m.timers[i].next_time = strtoul(stage, &end, 10) + 1; for (int j = 0; j < 2; ++j) { if (stage) stage = strchr(stage, ','); if (stage) ++stage; } if (stage) { apu.m.timers[i].divider = strtoul(value, &end, 10); } } } apu.dsp.m.echo_hist_pos = &apu.dsp.m.echo_hist[META_ENUM_INT("dsp:echohistaddr")]; value = metadata.enumValue("dsp:echohistdata"); if (value) { for (int i = 0; i < 8; ++i) { apu.dsp.m.echo_hist[i][0] = strtoul(value, &end, 10); value = strchr(value, ','); if (!value) break; ++value; apu.dsp.m.echo_hist[i][1] = strtoul(value, &end, 10); value = strchr(value, ','); if (!value) break; ++value; } } apu.dsp.m.phase = META_ENUM_INT("dsp:sample"); apu.dsp.m.kon = META_ENUM_INT("dsp:kon"); apu.dsp.m.noise = META_ENUM_INT("dsp:noise"); apu.dsp.m.counter = META_ENUM_INT("dsp:counter"); apu.dsp.m.echo_offset = META_ENUM_INT("dsp:echooffset"); apu.dsp.m.echo_length = META_ENUM_INT("dsp:echolength"); apu.dsp.m.new_kon = META_ENUM_INT("dsp:koncache"); apu.dsp.m.endx_buf = META_ENUM_INT("dsp:endx"); apu.dsp.m.envx_buf = META_ENUM_INT("dsp:envx"); apu.dsp.m.outx_buf = META_ENUM_INT("dsp:outx"); apu.dsp.m.t_pmon = META_ENUM_INT("dsp:pmon"); apu.dsp.m.t_non = META_ENUM_INT("dsp:non"); apu.dsp.m.t_eon = META_ENUM_INT("dsp:eon"); apu.dsp.m.t_dir = META_ENUM_INT("dsp:dir"); apu.dsp.m.t_koff = META_ENUM_INT("dsp:koff"); apu.dsp.m.t_brr_next_addr = META_ENUM_INT("dsp:brrnext"); apu.dsp.m.t_adsr0 = META_ENUM_INT("dsp:adsr0"); apu.dsp.m.t_brr_header = META_ENUM_INT("dsp:brrheader"); apu.dsp.m.t_brr_byte = META_ENUM_INT("dsp:brrdata"); apu.dsp.m.t_srcn = META_ENUM_INT("dsp:srcn"); apu.dsp.m.t_esa = META_ENUM_INT("dsp:esa"); apu.dsp.m.t_echo_enabled = !META_ENUM_INT("dsp:echodisable"); apu.dsp.m.t_dir_addr = META_ENUM_INT("dsp:diraddr"); apu.dsp.m.t_pitch = META_ENUM_INT("dsp:pitch"); apu.dsp.m.t_output = META_ENUM_INT("dsp:output"); apu.dsp.m.t_looped = META_ENUM_INT("dsp:looped"); apu.dsp.m.t_echo_ptr = META_ENUM_INT("dsp:echoaddr"); #define META_ENUM_LEVELS(n, o) \ value = metadata.enumValue(n); \ if (value) \ { \ (o)[0] = strtoul(value, &end, 10); \ if (*end) \ { \ value = end + 1; \ (o)[1] = strtoul(value, &end, 10); \ } \ } META_ENUM_LEVELS("dsp:mainout", apu.dsp.m.t_main_out); META_ENUM_LEVELS("dsp:echoout", apu.dsp.m.t_echo_out); META_ENUM_LEVELS("dsp:echoin", apu.dsp.m.t_echo_in); #undef META_ENUM_LEVELS for (int i = 0; i < 8; ++i) { sprintf(temp_path, "dsp:voice[%u]:", i); size_t length = strlen(temp_path); Spc_Dsp::voice_t & voice = apu.dsp.m.voices[i]; strcpy(temp_path + length, "brrhistaddr"); value = metadata.enumValue(temp_path); if (value) { voice.buf_pos = strtoul(value, &end, 10); } strcpy(temp_path + length, "brrhistdata"); value = metadata.enumValue(temp_path); if (value) { for (int j = 0; j < Spc_Dsp::brr_buf_size; ++j) { voice.buf[j] = voice.buf[j + Spc_Dsp::brr_buf_size] = strtoul(value, &end, 10); if (!*end) break; value = end + 1; } } strcpy(temp_path + length, "interpaddr"); voice.interp_pos = META_ENUM_INT(temp_path); strcpy(temp_path + length, "brraddr"); voice.brr_addr = META_ENUM_INT(temp_path); strcpy(temp_path + length, "brroffset"); voice.brr_offset = META_ENUM_INT(temp_path); strcpy(temp_path + length, "vbit"); voice.vbit = META_ENUM_INT(temp_path); strcpy(temp_path + length, "vidx"); voice.regs = &apu.dsp.m.regs[META_ENUM_INT(temp_path)]; strcpy(temp_path + length, "kondelay"); voice.kon_delay = META_ENUM_INT(temp_path); strcpy(temp_path + length, "envmode"); voice.env_mode = (Spc_Dsp::env_mode_t) META_ENUM_INT(temp_path); strcpy(temp_path + length, "env"); voice.env = META_ENUM_INT(temp_path); strcpy(temp_path + length, "envxout"); voice.t_envx_out = META_ENUM_INT(temp_path); strcpy(temp_path + length, "envcache"); voice.hidden_env = META_ENUM_INT(temp_path); } filter.set_gain( (int) (gain() * Spc_Filter::gain_unit) ); apu.clear_echo( true ); return blargg_ok; }