void EngineCPU::handleExecution(BhIR *bhir) { const auto texecution = chrono::steady_clock::now(); // Some statistics stat.record(*bhir); // Let's start by cleanup the instructions from the 'bhir' set<bh_base *> frees; vector<bh_instruction *> instr_list = jitk::remove_non_computed_system_instr(bhir->instr_list, frees); // Let's free device buffers and array memory for (bh_base *base: frees) { bh_data_free(base); } // Set the constructor flag if (array_contraction) { setConstructorFlag(instr_list); } else { for (bh_instruction *instr: instr_list) { instr->constructor = false; } } // Let's get the kernel list vector<LoopB> kernel_list = get_kernel_list(instr_list, fusion_config, fcache, stat); for (const LoopB &kernel: kernel_list) { // Let's create the symbol table for the kernel const SymbolTable symbols(kernel, use_volatile, strides_as_var, index_as_var, const_as_var); stat.record(symbols); if (not kernel.isSystemOnly()) { // We can skip this step if the kernel does no computation // Create the constant vector vector<const bh_instruction *> constants; constants.reserve(symbols.constIDs().size()); for (const InstrPtr &instr: symbols.constIDs()) { constants.push_back(&(*instr)); } const auto lookup = codegen_cache.lookup(kernel, symbols); if (not lookup.first.empty()) { // In debug mode, we check that the cached source code is correct #ifndef NDEBUG stringstream ss; writeKernel(kernel, symbols, {}, lookup.second, ss); if (ss.str().compare(lookup.first) != 0) { cout << "\nCached source code: \n" << lookup.first; cout << "\nReal source code: \n" << ss.str(); assert(1 == 2); } #endif execute(symbols, lookup.first, lookup.second, constants); } else { const auto tcodegen = chrono::steady_clock::now(); stringstream ss; writeKernel(kernel, symbols, {}, lookup.second, ss); string source = ss.str(); stat.time_codegen += chrono::steady_clock::now() - tcodegen; execute(symbols, source, lookup.second, constants); codegen_cache.insert(std::move(source), kernel, symbols); } } // Finally, let's cleanup for (bh_base *base: kernel.getAllFrees()) { bh_data_free(base); } } stat.time_total_execution += chrono::steady_clock::now() - texecution; }
// Compile the kernels that this plan uses, and store into the plan clfftStatus FFTAction::compileKernels( const cl_command_queue commQueueFFT, const clfftPlanHandle plHandle, FFTPlan* fftPlan ) { cl_int status = 0; size_t deviceListSize = 0; FFTRepo& fftRepo = FFTRepo::getInstance( ); // create a cl program executable for the device associated with command queue // Get the device cl_device_id &q_device = fftPlan->bakeDevice; cl_program program; if( fftRepo.getclProgram( this->getGenerator(), this->getSignatureData(), program, q_device, fftPlan->context ) == CLFFT_INVALID_PROGRAM ) { FFTBinaryLookup lookup (this->getGenerator(), plHandle, fftPlan->context, q_device); lookup.variantRaw(this->getSignatureData(), this->getSignatureData()->datasize); if (lookup.found()) { #if FFT_CACHE_DEBUG // debug message in debug mode to ensure that the cache is used fprintf(stderr, "Kernel loaded from cache\n"); #endif program = lookup.getProgram(); } else { #if FFT_CACHE_DEBUG fprintf(stderr, "Kernel built from source\n"); #endif // If the user wishes us to write the kernels out to disk, we do so if( fftRepo.setupData.debugFlags & CLFFT_DUMP_PROGRAMS ) { OPENCL_V( writeKernel( plHandle, this->getGenerator(), this->getSignatureData(), fftPlan->context, fftPlan->bakeDevice ), _T( "writeKernel failed." ) ); } std::string programCode; OPENCL_V( fftRepo.getProgramCode( this->getGenerator(), this->getSignatureData(), programCode, q_device, fftPlan->context ), _T( "fftRepo.getProgramCode failed." ) ); const char* source = programCode.c_str(); program = clCreateProgramWithSource( fftPlan->context, 1, &source, NULL, &status ); OPENCL_V( status, _T( "clCreateProgramWithSource failed." ) ); // create a cl program executable for the device associated with command queue #if defined(DEBUGGING) status = clBuildProgram( program, 1, &q_device, "-g -cl-opt-disable", NULL, NULL); // good for debugging kernels // if you have trouble creating smbols that GDB can pick up to set a breakpoint after kernels are loaded into memory // this can be used to stop execution to allow you to set a breakpoint in a kernel after kernel symbols are in memory. #ifdef DEBUG_BREAK_GDB __debugbreak(); #endif #else status = clBuildProgram( program, 1, &q_device, "", NULL, NULL); #endif if( status != CL_SUCCESS ) { if( status == CL_BUILD_PROGRAM_FAILURE ) { size_t buildLogSize = 0; OPENCL_V( clGetProgramBuildInfo( program, q_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize ), _T( "clGetProgramBuildInfo failed" ) ); std::vector< char > buildLog( buildLogSize ); ::memset( &buildLog[ 0 ], 0x0, buildLogSize ); OPENCL_V( clGetProgramBuildInfo( program, q_device, CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[ 0 ], NULL ), _T( "clGetProgramBuildInfo failed" ) ); std::cerr << "\n\t\t\tBUILD LOG\n"; std::cerr << "************************************************\n"; std::cerr << &buildLog[ 0 ] << std::endl; std::cerr << "************************************************\n"; } OPENCL_V( status, _T( "clBuildProgram failed" ) ); } lookup.setProgram(program, source); lookup.populateCache(); } fftRepo.setclProgram( this->getGenerator(), this->getSignatureData(), program, q_device, fftPlan->context ); // For real transforms we compile either forward or backward kernel bool buildFwdKernel = buildForwardKernel(); bool buildBwdKernel = buildBackwardKernel(); // get a kernel object handle for a kernel with the given name cl_kernel kernel; if( buildFwdKernel ) { if( fftRepo.getclKernel( program, CLFFT_FORWARD, kernel ) == CLFFT_INVALID_KERNEL ) { std::string entryPoint; OPENCL_V( fftRepo.getProgramEntryPoint( this->getGenerator(), this->getSignatureData(), CLFFT_FORWARD, entryPoint, q_device, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) ); kernel = clCreateKernel( program, entryPoint.c_str( ), &status ); OPENCL_V( status, _T( "clCreateKernel failed" ) ); fftRepo.setclKernel( program, CLFFT_FORWARD, kernel ); } } if( buildBwdKernel ) { if( fftRepo.getclKernel( program, CLFFT_BACKWARD, kernel ) == CLFFT_INVALID_KERNEL ) { std::string entryPoint; OPENCL_V( fftRepo.getProgramEntryPoint( this->getGenerator(), this->getSignatureData(), CLFFT_BACKWARD, entryPoint, q_device, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) ); kernel = clCreateKernel( program, entryPoint.c_str( ), &status ); OPENCL_V( status, _T( "clCreateKernel failed" ) ); fftRepo.setclKernel( program, CLFFT_BACKWARD, kernel ); } } } return CLFFT_SUCCESS; }