Exemplo n.º 1
0
void ClFFT::perform( cl::Buffer real, cl::Buffer im, clFFT_Direction& dir )
{
	size_t requiredSize = m_size.getArea()*sizeof(float);
	size_t realBytes = real.getInfo<CL_MEM_SIZE>();
	size_t imBytes = real.getInfo<CL_MEM_SIZE>();
	assert(realBytes >= requiredSize);
	assert(imBytes >= requiredSize);

	try {
		cl_int err = clFFT_ExecutePlannar((*m_queue)(), *m_fftPlan, 1, dir, real(), im(), real(), im(),  0, NULL, NULL);
		if(!*m_fftPlan || err)
			throw cl::Error(ERR_OPENCL, "clFFT_ExecutePlannar");
	} catchCLError;
}
Exemplo n.º 2
0
void CLSimulator::initializeClFFT()
{
    /* x x x x
     * x x x x
     * x x x x
     * x x x x
     */

    for (size_t x_idx = 0, x_val = _nX - 1; x_idx < _nX; ++x_idx, --x_val) {
        for (size_t y_idx = 0, y_val = _nY - 1; y_idx < _nY; ++y_idx, --y_val) {
            float distance = sqrt(pow(float(x_val), 2.0f) + pow(float(y_val), 2.0f));
            _distances_real[x_idx + y_idx * _nFFTx] = _f_w_EE((float(distance)));
        }
    }

    /* v v x x
     * v v x x
     * x x x x
     * x x x x
     */

    for (size_t x_idx = 0, x_val = _nX - 1; x_idx < _nX; ++x_idx, --x_val) {
        for (size_t y_idx = _nY, y_val = 1; y_idx < _nFFTy - 1; ++y_idx, ++y_val) {
            float distance = sqrt(pow(float(x_val), 2.0f) + pow(float(y_val), 2.0f));
            _distances_real[x_idx + y_idx * _nFFTx] = _f_w_EE((float(distance)));
        }
    }

    /* v v v x
     * v v v x
     * x x x x
     * x x x x
     */

    if (_nY > 1)
    {
        for (size_t x_idx = 0; x_idx < _nFFTx; ++x_idx) {
            _distances_real[x_idx + (_nFFTy - 1) * _nFFTx] = 0;
        }
    }

    /* v v v 0
     * v v v 0
     * x x x 0
     * x x x 0
     */

    for (size_t x_idx = _nX, x_val = 1; x_idx < _nFFTx - 1; ++x_idx, ++x_val) {
        for (size_t y_idx = 0, y_val = _nY - 1; y_idx < _nY; ++y_idx, --y_val) {
            float distance = sqrt(pow(float(x_val), 2.0f) + pow(float(y_val), 2.0f));
            _distances_real[x_idx + y_idx * _nFFTx] = _f_w_EE((float(distance)));
        }
    }

    /* v v v 0
     * v v v 0
     * v v x 0
     * x x x 0
     */

    for (size_t y_idx = 0; y_idx < _nFFTy; ++y_idx) {
        _distances_real[(_nFFTx - 1) + y_idx * _nFFTx] = 0;
    }

    /* v v v 0
     * v v v 0
     * v v x 0
     * 0 0 0 0
     */

    for (size_t x_idx = _nX, x_val = 1; x_idx < _nFFTx - 1; ++x_idx, ++x_val) {
        for (size_t y_idx = _nY, y_val = 1; y_idx < _nFFTy - 1; ++y_idx, ++y_val) {
            float distance = sqrt(pow(float(x_val), 2.0f) + pow(float(y_val), 2.0f));
            _distances_real[x_idx + y_idx * _nFFTx] = _f_w_EE((float(distance)));
        }
    }

    /* v v v 0
     * v v v 0
     * v v v 0
     * 0 0 0 0
     */

    assert(isPowerOfTwo(_nFFT));
    assert(_nX >= 1 && _nY >= 1 && _nZ >= 1);
    assert((_nX >= _nY) && (_nY >= _nZ));
    clFFT_Dim3 n = { static_cast<unsigned int>(_nFFTx),
                     static_cast<unsigned int>(_nFFTy),
                     static_cast<unsigned int>(_nFFTz) };
    clFFT_DataFormat dataFormat = clFFT_SplitComplexFormat;
    clFFT_Dimension dim;

    if (_nY == 1)
    {
        dim = clFFT_1D;
    } else if (_nZ == 1)
    {
        dim = clFFT_2D;
    } else
    {
        dim = clFFT_3D;
    }
    _p_cl = clFFT_CreatePlan(_wrapper.getContextC(), n, dim, dataFormat, &_err);
    handleClError(_err);

    _distances_real_cl = cl::Buffer(_wrapper.getContext(),
                                    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                    _nFFT * sizeof(float),
                                    _distances_real.get(),
                                    &_err);
    _distances_imag_cl = cl::Buffer(_wrapper.getContext(),
                                    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                    _nFFT * sizeof(float),
                                    _zeros.get(),
                                    &_err);
    _sVals_real_cl = cl::Buffer(_wrapper.getContext(),
                                CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                _nFFT * sizeof(float),
                                _zeros.get(),
                                &_err);
    _sVals_imag_cl = cl::Buffer(_wrapper.getContext(),
                                CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                _nFFT * sizeof(float),
                                _zeros.get(),
                                &_err);
    _convolution_real_cl = cl::Buffer(_wrapper.getContext(),
                                      CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                      _nFFT * sizeof(float),
                                      _zeros.get(),
                                      &_err);
    _convolution_imag_cl = cl::Buffer(_wrapper.getContext(),
                                      CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                      _nFFT * sizeof(float),
                                      _zeros.get(),
                                      &_err);
    _distances_f_real_cl = cl::Buffer(_wrapper.getContext(),
                                      CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                      _nFFT * sizeof(float),
                                      _zeros.get(),
                                      &_err);
    _distances_f_imag_cl = cl::Buffer(_wrapper.getContext(),
                                      CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                      _nFFT * sizeof(float),
                                      _zeros.get(),
                                      &_err);
    _sVals_f_real_cl = cl::Buffer(_wrapper.getContext(),
                                  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                  _nFFT * sizeof(float),
                                  _zeros.get(),
                                  &_err);
    _sVals_f_imag_cl = cl::Buffer(_wrapper.getContext(),
                                  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                  _nFFT * sizeof(float),
                                  _zeros.get(),
                                  &_err);
    _convolution_f_real_cl = cl::Buffer(_wrapper.getContext(),
                                        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                        _nFFT * sizeof(float),
                                        _zeros.get(),
                                        &_err);
    _convolution_f_imag_cl = cl::Buffer(_wrapper.getContext(),
                                        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                                        _nFFT * sizeof(float),
                                        _zeros.get(),
                                        &_err);

    _kernel_convolution = cl::Kernel(_program, "convolution", &_err);
    handleClError(_kernel_convolution.setArg(0, _convolution_f_real_cl));
    handleClError(_kernel_convolution.setArg(1, _convolution_f_imag_cl));
    handleClError(_kernel_convolution.setArg(2, _distances_f_real_cl));
    handleClError(_kernel_convolution.setArg(3, _distances_f_imag_cl));
    handleClError(_kernel_convolution.setArg(4, _sVals_f_real_cl));
    handleClError(_kernel_convolution.setArg(5, _sVals_f_imag_cl));
    handleClError(_kernel_convolution.setArg(6, _scaleFFT));

    handleClError(clFFT_ExecutePlannar(_wrapper.getQueueC(),
                                       _p_cl,
                                       1,
                                       clFFT_Forward,
                                       _distances_real_cl(),
                                       _distances_imag_cl(),
                                       _distances_f_real_cl(),
                                       _distances_f_imag_cl(),
                                       0,
                                       NULL,
                                       NULL));

    _wrapper.getQueue().finish();
}
Exemplo n.º 3
0
void CLSimulator::f_I_FFT_clFFT(const Receptor rec)
{
    // initialize sVals_real for FFT
    switch (rec)
    {
    case AMPA:
        handleClError(_kernel_prepareFFT_AMPA.setArg(0, _states_cl[_ind_old]));
        _err = _wrapper.getQueue().enqueueNDRangeKernel(_kernel_prepareFFT_AMPA, cl::NullRange, cl::NDRange(_numNeurons), cl::NullRange, NULL, NULL);
        break;

    case NMDA:
        handleClError(_kernel_prepareFFT_NMDA.setArg(0, _states_cl[_ind_old]));
        _err = _wrapper.getQueue().enqueueNDRangeKernel(_kernel_prepareFFT_NMDA, cl::NullRange, cl::NDRange(_numNeurons), cl::NullRange, NULL, NULL);
        break;

    case GABAA:
        handleClError(_kernel_prepareFFT_GABAA.setArg(0, _states_cl[_ind_old]));
        _err = _wrapper.getQueue().enqueueNDRangeKernel(_kernel_prepareFFT_GABAA, cl::NullRange, cl::NDRange(_numNeurons), cl::NullRange, NULL, NULL);
        break;
    }

    _wrapper.getQueue().finish();

    // transform sVals into frequency domain using FFT
    handleClError(clFFT_ExecutePlannar(_wrapper.getQueueC(),
                                       _p_cl,
                                       1,
                                       clFFT_Forward,
                                       _sVals_real_cl(),
                                       _sVals_imag_cl(),
                                       _sVals_f_real_cl(),
                                       _sVals_f_imag_cl(),
                                       0,
                                       NULL,
                                       NULL));

    _wrapper.getQueue().finish();

    // execute convolution in frequency domain
    _err = _wrapper.getQueue().enqueueNDRangeKernel(_kernel_convolution, cl::NullRange, cl::NDRange(_nFFT), cl::NullRange, NULL, NULL);

    _wrapper.getQueue().finish();

    // inverse transform convolution_f using FFT
    handleClError(clFFT_ExecutePlannar(_wrapper.getQueueC(),
                                       _p_cl,
                                       1,
                                       clFFT_Inverse,
                                       _convolution_f_real_cl(),
                                       _convolution_f_imag_cl(),
                                       _convolution_real_cl(),
                                       _convolution_imag_cl(),
                                       0,
                                       NULL,
                                       NULL));

    _wrapper.getQueue().finish();

    // update sumFootprint array for current receptor
    switch (rec)
    {
    case AMPA:
        _err = _wrapper.getQueue().enqueueNDRangeKernel(_kernel_postConvolution_AMPA, cl::NullRange, cl::NDRange(_numNeurons), cl::NullRange, NULL, NULL);
        break;

    case NMDA:
        _err = _wrapper.getQueue().enqueueNDRangeKernel(_kernel_postConvolution_NMDA, cl::NullRange, cl::NDRange(_numNeurons), cl::NullRange, NULL, NULL);
        break;

    case GABAA:
        _err = _wrapper.getQueue().enqueueNDRangeKernel(_kernel_postConvolution_GABAA, cl::NullRange, cl::NDRange(_numNeurons), cl::NullRange, NULL, NULL);
        break;
    }
}
Exemplo n.º 4
0
int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension dim, 
			clFFT_DataFormat dataFormat, int numIter, clFFT_TestType testType)
{	
	cl_int err = CL_SUCCESS;
	int iter;
	double t;
	
	uint64_t t0, t1;
	int mx = log2(n.x);
	int my = log2(n.y);
	int mz = log2(n.z);

	int length = n.x * n.y * n.z * batchSize;
		
	double gflops = 5e-9 * ((double)mx + (double)my + (double)mz) * (double)n.x * (double)n.y * (double)n.z * (double)batchSize * (double)numIter;
	
	clFFT_SplitComplex data_i_split = (clFFT_SplitComplex) { NULL, NULL };
	clFFT_SplitComplex data_cl_split = (clFFT_SplitComplex) { NULL, NULL };
	clFFT_Complex *data_i = NULL;
	clFFT_Complex *data_cl = NULL;
	clFFT_SplitComplexDouble data_iref = (clFFT_SplitComplexDouble) { NULL, NULL }; 
	clFFT_SplitComplexDouble data_oref = (clFFT_SplitComplexDouble) { NULL, NULL };
	
	clFFT_Plan plan = NULL;
	cl_mem data_in = NULL;
	cl_mem data_out = NULL;
	cl_mem data_in_real = NULL;
	cl_mem data_in_imag = NULL;
	cl_mem data_out_real = NULL;
	cl_mem data_out_imag = NULL;
	
	if(dataFormat == clFFT_SplitComplexFormat) {
		data_i_split.real     = (float *) malloc(sizeof(float) * length);
		data_i_split.imag     = (float *) malloc(sizeof(float) * length);
		data_cl_split.real    = (float *) malloc(sizeof(float) * length);
		data_cl_split.imag    = (float *) malloc(sizeof(float) * length);
		if(!data_i_split.real || !data_i_split.imag || !data_cl_split.real || !data_cl_split.imag)
		{
			err = -1;
			log_error("Out-of-Resources\n");
			goto cleanup;
		}
	}
	else {
		data_i  = (clFFT_Complex *) malloc(sizeof(clFFT_Complex)*length);
		data_cl = (clFFT_Complex *) malloc(sizeof(clFFT_Complex)*length);
		if(!data_i || !data_cl)
		{
			err = -2;
			log_error("Out-of-Resouces\n");
			goto cleanup;
		}
	}
	
	data_iref.real   = (double *) malloc(sizeof(double) * length);
	data_iref.imag   = (double *) malloc(sizeof(double) * length);
	data_oref.real   = (double *) malloc(sizeof(double) * length);
	data_oref.imag   = (double *) malloc(sizeof(double) * length);	
	if(!data_iref.real || !data_iref.imag || !data_oref.real || !data_oref.imag)
	{
		err = -3;
		log_error("Out-of-Resources\n");
		goto cleanup;
	}

	int i;
	if(dataFormat == clFFT_SplitComplexFormat) {
		for(i = 0; i < length; i++)
		{
			data_i_split.real[i] = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
			data_i_split.imag[i] = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
			data_cl_split.real[i] = 0.0f;
			data_cl_split.imag[i] = 0.0f;			
			data_iref.real[i] = data_i_split.real[i];
			data_iref.imag[i] = data_i_split.imag[i];
			data_oref.real[i] = data_iref.real[i];
			data_oref.imag[i] = data_iref.imag[i];	
		}
	}
	else {
		for(i = 0; i < length; i++)
		{
			data_i[i].real = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
			data_i[i].imag = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f;
			data_cl[i].real = 0.0f;
			data_cl[i].imag = 0.0f;			
			data_iref.real[i] = data_i[i].real;
			data_iref.imag[i] = data_i[i].imag;
			data_oref.real[i] = data_iref.real[i];
			data_oref.imag[i] = data_iref.imag[i];	
		}		
	}
	
	plan = clFFT_CreatePlan( context, n, dim, dataFormat, &err );
	if(!plan || err) 
	{
		log_error("clFFT_CreatePlan failed\n");
		goto cleanup;
	}
	
	//clFFT_DumpPlan(plan, stdout);
	
	if(dataFormat == clFFT_SplitComplexFormat)
	{
		data_in_real = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_i_split.real, &err);
	    if(!data_in_real || err) 
	    {
			log_error("clCreateBuffer failed\n");
			goto cleanup;
	    }
		
		data_in_imag = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_i_split.imag, &err);
	    if(!data_in_imag || err) 
	    {
			log_error("clCreateBuffer failed\n");
			goto cleanup;
	    }
		
		if(testType == clFFT_OUT_OF_PLACE)
		{
			data_out_real = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_cl_split.real, &err);
			if(!data_out_real || err) 
			{
				log_error("clCreateBuffer failed\n");
				goto cleanup;
			}
			
			data_out_imag = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_cl_split.imag, &err);
			if(!data_out_imag || err) 
			{
				log_error("clCreateBuffer failed\n");
				goto cleanup;
			}			
		}
		else
		{
			data_out_real = data_in_real;
			data_out_imag = data_in_imag;
		}
	}
	else
	{
	    data_in = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float)*2, data_i, &err);
	    if(!data_in) 
	    {
			log_error("clCreateBuffer failed\n");
			goto cleanup;
	    }
		if(testType == clFFT_OUT_OF_PLACE)
		{
			data_out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float)*2, data_cl, &err);
			if(!data_out) 
			{
				log_error("clCreateBuffer failed\n");
				goto cleanup;
			}			
		}
		else
			data_out = data_in;
	}
		
			
	err = CL_SUCCESS;
	
	t0 = mach_absolute_time();
	if(dataFormat == clFFT_SplitComplexFormat)
	{
		for(iter = 0; iter < numIter; iter++)
		    err |= clFFT_ExecutePlannar(queue, plan, batchSize, dir, data_in_real, data_in_imag, data_out_real, data_out_imag, 0, NULL, NULL);
	}
	else
	{
	    for(iter = 0; iter < numIter; iter++) 
			err |= clFFT_ExecuteInterleaved(queue, plan, batchSize, dir, data_in, data_out, 0, NULL, NULL);
	}
	
	err |= clFinish(queue);
	
	if(err) 
	{
		log_error("clFFT_Execute\n");
		goto cleanup;	
	}
	
	t1 = mach_absolute_time(); 
	t = subtractTimes(t1, t0);
	char temp[100];
	sprintf(temp, "GFlops achieved for n = (%d, %d, %d), batchsize = %d", n.x, n.y, n.z, batchSize);
	log_perf(gflops / (float) t, 1, "GFlops/s", "%s", temp);

	if(dataFormat == clFFT_SplitComplexFormat)
	{	
		err |= clEnqueueReadBuffer(queue, data_out_real, CL_TRUE, 0, length*sizeof(float), data_cl_split.real, 0, NULL, NULL);
		err |= clEnqueueReadBuffer(queue, data_out_imag, CL_TRUE, 0, length*sizeof(float), data_cl_split.imag, 0, NULL, NULL);
	}
	else
	{
		err |= clEnqueueReadBuffer(queue, data_out, CL_TRUE, 0, length*sizeof(float)*2, data_cl, 0, NULL, NULL);
	}
	
	if(err) 
	{
		log_error("clEnqueueReadBuffer failed\n");
        goto cleanup;
	}	

	computeReferenceD(&data_oref, n, batchSize, dim, dir);
	
	double diff_avg, diff_max, diff_min;
	if(dataFormat == clFFT_SplitComplexFormat) {
		diff_avg = computeL2Error(&data_cl_split, &data_oref, n.x*n.y*n.z, batchSize, &diff_max, &diff_min);
		if(diff_avg > eps_avg)
			log_error("Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
		else
			log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);			
	}
	else {
		clFFT_SplitComplex result_split;
		result_split.real = (float *) malloc(length*sizeof(float));
		result_split.imag = (float *) malloc(length*sizeof(float));
		convertInterleavedToSplit(&result_split, data_cl, length);
		diff_avg = computeL2Error(&result_split, &data_oref, n.x*n.y*n.z, batchSize, &diff_max, &diff_min);
		
		if(diff_avg > eps_avg)
			log_error("Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);
		else
			log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min);	
		free(result_split.real);
		free(result_split.imag);
	}
	
cleanup:
	clFFT_DestroyPlan(plan);	
	if(dataFormat == clFFT_SplitComplexFormat) 
	{
		if(data_i_split.real)
			free(data_i_split.real);
		if(data_i_split.imag)
			free(data_i_split.imag);
		if(data_cl_split.real)
			free(data_cl_split.real);
		if(data_cl_split.imag)
			free(data_cl_split.imag);
		
		if(data_in_real)
			clReleaseMemObject(data_in_real);
		if(data_in_imag)
			clReleaseMemObject(data_in_imag);
		if(data_out_real && testType == clFFT_OUT_OF_PLACE)
			clReleaseMemObject(data_out_real);
		if(data_out_imag && clFFT_OUT_OF_PLACE)
			clReleaseMemObject(data_out_imag);
	}
	else 
	{
		if(data_i)
			free(data_i);
		if(data_cl)
			free(data_cl);
		
		if(data_in)
			clReleaseMemObject(data_in);
		if(data_out && testType == clFFT_OUT_OF_PLACE)
			clReleaseMemObject(data_out);
	}
	
	if(data_iref.real)
		free(data_iref.real);
	if(data_iref.imag)
		free(data_iref.imag);		
	if(data_oref.real)
		free(data_oref.real);
	if(data_oref.imag)
		free(data_oref.imag);
	
	return err;
}
void Convolutioner_FrequencyDomain_OpenCL::process(AudioInOutBuffers<float_type>& audio ) {
    
    //
    unsigned int _2B = audio.channelLength_ * 2;
    unsigned int _B  = audio.channelLength_;
    unsigned int _C  = audio.numOfChannels_;                        //numOfChannels
    unsigned int _P  = partitionedIR_.get_numOfPartsPerChannel();   //numOfIRPartsPerChannel
    //.
    
    //_ if >>>latency<<< or >>>number of channels<<< changed:
    //      set partitionedIR
    //      recreate buffers
    //      recreate fft plans
    if ( window_.get_inputBlockSize() != audio.channelLength_ || window_.get_numOfChannels() != audio.numOfChannels_) {
        
        //Setting partitionedIR
        if (window_.get_inputBlockSize() != audio.channelLength_) {
            
            partitionedIR_.setNewIRF( irf_, audio.channelLength_ );
            _P = partitionedIR_.get_numOfPartsPerChannel();
            
            //Recreate, initialize buffers, and set as kernel arguments: PIR
            //recreate
            bufferPIR_R_.recreate(CL_MEM_READ_ONLY, _2B * _C * _P);
            bufferPIR_I_.recreate(CL_MEM_READ_ONLY, _2B * _C * _P);
            //.
            
            //initialize
            bufferPIR_R_.set(partitionedIR_.real_     );
            bufferPIR_I_.set(partitionedIR_.imaginary_);
            //.
            
            //set as kernel argument
            bufferPIR_R_.setAsKernelArgument(0, complexMultiplyAdd_kernel_);
            bufferPIR_I_.setAsKernelArgument(1, complexMultiplyAdd_kernel_);
            //.
            //.(Recreate...)
            
        }
        //.
        
        //Recreate initialize buffers, and set as kernel arguments: transform, FDL, accumulator
        //recreate
        /****/bufferTransform_R_.recreate(CL_MEM_READ_WRITE,    _2B * _C        );
        /****/bufferTransform_I_.recreate(CL_MEM_READ_WRITE,    _2B * _C        );
        /**********/bufferFDL_R_.recreate(CL_MEM_READ_WRITE,    _2B * _C * _P   );
        /**********/bufferFDL_I_.recreate(CL_MEM_READ_WRITE,    _2B * _C * _P   );
        /**/bufferAccumulator_R_.recreate(CL_MEM_READ_WRITE,    _2B * _C        );
        /**/bufferAccumulator_I_.recreate(CL_MEM_READ_WRITE,    _2B * _C        );
        cpu_bufferAccumulator_R_ = new float_type[_2B * _C ];
        cpu_bufferAccumulator_I_ = new float_type[_2B * _C ];
        //.
        
        //initialize FDL with 0
        bufferFDL_R_.fillWithZero();
        bufferFDL_I_.fillWithZero();
        lastInsertedDelayLineIdx = 0;
        //.
        
        //set as kernel argument
        /**********/bufferFDL_R_.setAsKernelArgument(2, complexMultiplyAdd_kernel_);
        /**********/bufferFDL_I_.setAsKernelArgument(3, complexMultiplyAdd_kernel_);
        /**/bufferAccumulator_R_.setAsKernelArgument(4, complexMultiplyAdd_kernel_);
        /**/bufferAccumulator_I_.setAsKernelArgument(5, complexMultiplyAdd_kernel_);
        //.
        //.(Recreate...)
        
        //Recreate plans
        clFFT_Dim3 dim;
        dim.x = _2B;
        dim.y = 1;
        dim.z = 1;
        fftPlan_ = clFFT_CreatePlan(context_, dim, clFFT_1D, clFFT_SplitComplexFormat, &lastCommandStatus_);
        //.
    }
    
    //update each time bufferGlobalParameters because of incrementing of lastInsertedDelayLineIdx
    /*(_2B, _C, _P, pir_C, FDL_LINE)*/
    cpuData_bufferGlobalParameters_[0] = _2B;
    cpuData_bufferGlobalParameters_[1] = _C;
    cpuData_bufferGlobalParameters_[2] = _P;
    cpuData_bufferGlobalParameters_[3] = irf_->numOfChannels_;
    cpuData_bufferGlobalParameters_[4] = lastInsertedDelayLineIdx;
    
    bufferGlobalParameters_.set(cpuData_bufferGlobalParameters_);
    //.
    
    //Update channelsWindow
    window_.update( audio, /*history size*/ _B );
    //.
    
    //Init >>bufferTransform<<
    bufferTransform_R_.set(window_.buffer_.data_);
    for(unsigned int i = 0; i < _2B * _C; ++i)
        cpu_bufferAccumulator_I_[i]=0;
    bufferTransform_I_.set(cpu_bufferAccumulator_I_);
    //.
    
    //Make fft of bufferTransform
    lastCommandStatus_ = clFFT_ExecutePlannar(  cmdQueue_, fftPlan_, _C, clFFT_Forward,
                                              bufferTransform_R_, bufferTransform_I_,
                                              bufferTransform_R_, bufferTransform_I_,
                                              0, NULL, NULL );
    //.
    
    //Copy bufferTransform into bufferFDL (inserting new delay line) (real and imaginary part)
    clEnqueueCopyBuffer(    cmdQueue_, bufferTransform_R_, bufferFDL_R_,
                        0, lastInsertedDelayLineIdx * (_2B * _C ) * sizeof(float_type), (_2B * _C ) * sizeof(float_type),
                        0, NULL, NULL);
    clEnqueueCopyBuffer(    cmdQueue_, bufferTransform_I_, bufferFDL_I_,
                        0, lastInsertedDelayLineIdx * (_2B * _C ) * sizeof(float_type), (_2B * _C ) * sizeof(float_type),
                        0, NULL, NULL);
    //.
    
    //Increment host lastInsertedDelayLine
    lastInsertedDelayLineIdx = (lastInsertedDelayLineIdx + 1 ) % _P;
    //.
    
    //Execute kernel
    size_t globalWorkSize[1];
    globalWorkSize[0] =  _2B * _C /* == window_.get_allLength() */;
    lastCommandStatus_ = clEnqueueNDRangeKernel(cmdQueue_, complexMultiplyAdd_kernel_, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL);
    if(lastCommandStatus_ == -4) {
        std::cout << "Too much amount of memory must be allocated on the GPU due to lenght of impulse response and number of channels.";
        throw int();
    }
    else if(lastCommandStatus_ != 0) {
        std::cout << "Error while sending clEnqueueNDRangeKernel.";
        throw int();
    }
    //.
    
    //ifft of bufferAccumulator
    lastCommandStatus_ = clFFT_ExecutePlannar(  cmdQueue_, fftPlan_, _C, clFFT_Inverse,
                                              bufferAccumulator_R_, bufferAccumulator_I_,
                                              bufferAccumulator_R_, bufferAccumulator_I_,
                                              0, NULL, NULL );
    //.
    
    //Copy from bufferAccumulator to cpu
    bufferAccumulator_R_.get(cpu_bufferAccumulator_R_);
    //.
    
    //Flushing and finishing
    clFlush(cmdQueue_);
    clFinish(cmdQueue_);
    //.
    
    //Write fftw vector form to audio.outputChannel[number of Channel]
    for (unsigned int channNum = 0; channNum < _C; ++channNum)
        for (unsigned sampleNum = 0; sampleNum < _B; ++sampleNum)
            audio.out_[channNum][sampleNum] = (cpu_bufferAccumulator_R_[channNum*_2B + _B + sampleNum])/_2B;
    //.
}