示例#1
0
void EngineCPU::handleExecution(BhIR *bhir) {

    const auto texecution = chrono::steady_clock::now();

    // Some statistics
    stat.record(*bhir);

    // Let's start by cleanup the instructions from the 'bhir'
    set<bh_base *> frees;
    vector<bh_instruction *> instr_list = jitk::remove_non_computed_system_instr(bhir->instr_list, frees);

    // Let's free device buffers and array memory
    for (bh_base *base: frees) {
        bh_data_free(base);
    }

    // Set the constructor flag
    if (array_contraction) {
        setConstructorFlag(instr_list);
    } else {
        for (bh_instruction *instr: instr_list) {
            instr->constructor = false;
        }
    }

    // Let's get the kernel list
    vector<LoopB> kernel_list = get_kernel_list(instr_list, fusion_config, fcache, stat);

    for (const LoopB &kernel: kernel_list) {
        // Let's create the symbol table for the kernel
        const SymbolTable symbols(kernel,
                                  use_volatile,
                                  strides_as_var,
                                  index_as_var,
                                  const_as_var);
        stat.record(symbols);

        if (not kernel.isSystemOnly()) { // We can skip this step if the kernel does no computation
            // Create the constant vector
            vector<const bh_instruction *> constants;
            constants.reserve(symbols.constIDs().size());
            for (const InstrPtr &instr: symbols.constIDs()) {
                constants.push_back(&(*instr));
            }

            const auto lookup = codegen_cache.lookup(kernel, symbols);
            if (not lookup.first.empty()) {
                // In debug mode, we check that the cached source code is correct
                #ifndef NDEBUG
                    stringstream ss;
                    writeKernel(kernel, symbols, {}, lookup.second, ss);
                    if (ss.str().compare(lookup.first) != 0) {
                        cout << "\nCached source code: \n" << lookup.first;
                        cout << "\nReal source code: \n" << ss.str();
                        assert(1 == 2);
                    }
                #endif
                execute(symbols, lookup.first, lookup.second, constants);
            } else {
                const auto tcodegen = chrono::steady_clock::now();
                stringstream ss;
                writeKernel(kernel, symbols, {}, lookup.second, ss);
                string source = ss.str();
                stat.time_codegen += chrono::steady_clock::now() - tcodegen;

                execute(symbols, source, lookup.second, constants);
                codegen_cache.insert(std::move(source), kernel, symbols);
            }
        }

        // Finally, let's cleanup
        for (bh_base *base: kernel.getAllFrees()) {
            bh_data_free(base);
        }
    }
    stat.time_total_execution += chrono::steady_clock::now() - texecution;
}
示例#2
0
文件: action.cpp 项目: TimmyLiu/clFFT
//	Compile the kernels that this plan uses, and store into the plan
clfftStatus FFTAction::compileKernels( const cl_command_queue commQueueFFT, const clfftPlanHandle plHandle, FFTPlan* fftPlan )
{
    cl_int status = 0;
    size_t deviceListSize = 0;

    FFTRepo& fftRepo	= FFTRepo::getInstance( );

    // create a cl program executable for the device associated with command queue
    // Get the device
    cl_device_id &q_device = fftPlan->bakeDevice;

    cl_program program;
    if( fftRepo.getclProgram( this->getGenerator(), this->getSignatureData(), program, q_device, fftPlan->context ) == CLFFT_INVALID_PROGRAM )
    {
        FFTBinaryLookup lookup (this->getGenerator(), plHandle, fftPlan->context, q_device);

        lookup.variantRaw(this->getSignatureData(), this->getSignatureData()->datasize);

        if (lookup.found())
        {
#if FFT_CACHE_DEBUG
            // debug message in debug mode to ensure that the cache is used
            fprintf(stderr, "Kernel loaded from cache\n");
#endif

            program = lookup.getProgram();
        }
        else
        {
#if FFT_CACHE_DEBUG
            fprintf(stderr, "Kernel built from source\n");
#endif

            //	If the user wishes us to write the kernels out to disk, we do so
            if( fftRepo.setupData.debugFlags & CLFFT_DUMP_PROGRAMS )
            {
				OPENCL_V( writeKernel( plHandle, this->getGenerator(), this->getSignatureData(), fftPlan->context, fftPlan->bakeDevice ), _T( "writeKernel failed." ) );
            }

            std::string programCode;
            OPENCL_V( fftRepo.getProgramCode( this->getGenerator(), this->getSignatureData(), programCode, q_device, fftPlan->context  ), _T( "fftRepo.getProgramCode failed." ) );

            const char* source = programCode.c_str();
            program = clCreateProgramWithSource( fftPlan->context, 1, &source, NULL, &status );
            OPENCL_V( status, _T( "clCreateProgramWithSource failed." ) );

            // create a cl program executable for the device associated with command queue

#if defined(DEBUGGING)
            status = clBuildProgram( program, 1, &q_device, "-g -cl-opt-disable", NULL, NULL); // good for debugging kernels

// if you have trouble creating smbols that GDB can pick up to set a breakpoint after kernels are loaded into memory
// this can be used to stop execution to allow you to set a breakpoint in a kernel after kernel symbols are in memory.
#ifdef DEBUG_BREAK_GDB
            __debugbreak();
#endif
#else
            status = clBuildProgram( program, 1, &q_device, "", NULL, NULL);
#endif
            if( status != CL_SUCCESS )
            {
                if( status == CL_BUILD_PROGRAM_FAILURE )
                {
                    size_t buildLogSize = 0;
                    OPENCL_V( clGetProgramBuildInfo( program, q_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize ),
                              _T( "clGetProgramBuildInfo failed" ) );

                    std::vector< char > buildLog( buildLogSize );
                    ::memset( &buildLog[ 0 ], 0x0, buildLogSize );

                    OPENCL_V( clGetProgramBuildInfo( program, q_device, CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[ 0 ], NULL ),
                              _T( "clGetProgramBuildInfo failed" ) );

                    std::cerr << "\n\t\t\tBUILD LOG\n";
                    std::cerr << "************************************************\n";
                    std::cerr << &buildLog[ 0 ] << std::endl;
                    std::cerr << "************************************************\n";
                }

                OPENCL_V( status, _T( "clBuildProgram failed" ) );
            }

            lookup.setProgram(program, source);
            lookup.populateCache();
        }

        fftRepo.setclProgram( this->getGenerator(), this->getSignatureData(), program, q_device, fftPlan->context );


        // For real transforms we compile either forward or backward kernel
        bool buildFwdKernel = buildForwardKernel();
        bool buildBwdKernel = buildBackwardKernel();

        // get a kernel object handle for a kernel with the given name
        cl_kernel kernel;
        if( buildFwdKernel )
        {
            if( fftRepo.getclKernel( program, CLFFT_FORWARD, kernel ) == CLFFT_INVALID_KERNEL )
            {
                std::string entryPoint;
                OPENCL_V( fftRepo.getProgramEntryPoint( this->getGenerator(), this->getSignatureData(), CLFFT_FORWARD, entryPoint, q_device, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) );

                kernel = clCreateKernel( program, entryPoint.c_str( ), &status );
                OPENCL_V( status, _T( "clCreateKernel failed" ) );

                fftRepo.setclKernel( program, CLFFT_FORWARD, kernel );
            }
        }

        if( buildBwdKernel )
        {
            if( fftRepo.getclKernel( program, CLFFT_BACKWARD, kernel ) == CLFFT_INVALID_KERNEL )
            {
                std::string entryPoint;
                OPENCL_V( fftRepo.getProgramEntryPoint( this->getGenerator(), this->getSignatureData(), CLFFT_BACKWARD, entryPoint, q_device, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) );

                kernel = clCreateKernel( program, entryPoint.c_str( ), &status );
                OPENCL_V( status, _T( "clCreateKernel failed" ) );

                fftRepo.setclKernel( program, CLFFT_BACKWARD, kernel );
            }
        }
    }

    return	CLFFT_SUCCESS;
}