コード例 #1
0
ファイル: op_model_bf533.c プロジェクト: 3sOx/asuswrt-merlin
int pm_overflow_handler(int irq, struct pt_regs *regs)
{
	int is_kernel;
	int i, cpu;
	unsigned int pc, pfctl;
	unsigned int count[2];

	pr_debug("get interrupt in %s\n", __FUNCTION__);
	if (oprofile_running == 0) {
		pr_debug("error: entering interrupt when oprofile is stopped.\n\r");
		return -1;
	}

	is_kernel = get_kernel();
	cpu = smp_processor_id();
	pc = regs->pc;
	pfctl = ctr_read();

	/* read the two event counter regs */
	count_read(count);

	/* if the counter overflows, add sample to oprofile buffer */
	for (i = 0; i < 2; ++i) {
		if (oprofile_running) {
			oprofile_add_sample(regs, i);
		}
	}

	/* reset the perfmon counter */
	ctr_write(curr_pfctl);
	count_write(curr_count);
	return 0;
}
コード例 #2
0
ファイル: tensor.cpp プロジェクト: hccl/cpuxnn
    void Tensor4d::eltProduct(const Tensor4d & rhs) {
        assert(dims_[0] == rhs.get_dim(0));
        assert(dims_[1] == rhs.get_dim(1));
        assert(dims_[2] == rhs.get_dim(2));
        assert(dims_[3] == rhs.get_dim(3));

        for (UINT i = 0; i < dims_[0]; ++i)
            for (UINT j = 0; j < dims_[1]; ++j)
                get_kernel(i, j).eltProduct(rhs.get_kernel(i, j));
    }        
コード例 #3
0
static void power4_handle_interrupt(struct pt_regs *regs,
				    struct op_counter_config *ctr)
{
	unsigned long pc;
	int is_kernel;
	int val;
	int i;
	unsigned int mmcr0;
	unsigned long mmcra;

	mmcra = mfspr(SPRN_MMCRA);

	pc = get_pc(regs);
	is_kernel = get_kernel(pc, mmcra);

	/* set the PMM bit (see comment below) */
	mtmsrd(mfmsr() | MSR_PMM);

	for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) {
		val = ctr_read(i);
		if (val < 0) {
			if (oprofile_running && ctr[i].enabled) {
				oprofile_add_ext_sample(pc, regs, i, is_kernel);
				ctr_write(i, reset_value[i]);
			} else {
				ctr_write(i, 0);
			}
		}
	}

	mmcr0 = mfspr(SPRN_MMCR0);

	/* reset the perfmon trigger */
	mmcr0 |= MMCR0_PMXE;

	/*
	 * We must clear the PMAO bit on some (GQ) chips. Just do it
	 * all the time
	 */
	mmcr0 &= ~MMCR0_PMAO;

	/* Clear the appropriate bits in the MMCRA */
	mmcra &= ~cur_cpu_spec->oprofile_mmcra_clear;
	mtspr(SPRN_MMCRA, mmcra);

	/*
	 * now clear the freeze bit, counting will not start until we
	 * rfid from this exception, because only at that point will
	 * the PMM bit be cleared
	 */
	mmcr0 &= ~MMCR0_FC;
	mtspr(SPRN_MMCR0, mmcr0);
}
コード例 #4
0
ファイル: FretData.cpp プロジェクト: apolitis/imp
void FretData::init_grids
     (const Floats& d_grid_int, Float R0, Float Rmin, Float Rmax, bool do_limit)
{
  // grid on distance between termini
  for(unsigned l=0; l<d_term_.size(); ++l){
   // grid on sigma
   for(unsigned i=0; i<s_grid_.size(); ++i){
    // grid on distance between center of GMM
    for(unsigned j=0; j<d_center_.size(); ++j){
     Float marg=0.;
     Float norm=0.;
     unsigned kmin=0;
     unsigned kmax=d_grid_int.size();
     // find boundaries for marginalization
     if(do_limit){
      kmin=get_closest(d_grid_int,std::max(Rmin,d_term_[l]-Rmax));
      kmax=get_closest(d_grid_int,d_term_[l]+Rmax);
     }
     // do the marginalization
     for(unsigned k=kmin+1; k<kmax; ++k){

      Float dx = d_grid_int[k] - d_grid_int[k-1];

      Float prob   = get_probability(d_grid_int[k],   d_center_[j], s_grid_[i]);
      Float probm1 = get_probability(d_grid_int[k-1], d_center_[j], s_grid_[i]);

      Float kernel   = get_kernel( d_grid_int[k],   R0 );
      Float kernelm1 = get_kernel( d_grid_int[k-1], R0 );

      marg += ( kernel * prob + kernelm1 * probm1 ) / 2.0 * dx;
      norm += ( prob + probm1 ) / 2.0 * dx;
     }
     // store in grid_ and norm_
     grid_.push_back(marg);
     norm_.push_back(norm);
    }
   }
  }
}
コード例 #5
0
ファイル: r1.c プロジェクト: pengkh/proton
int main(int argc, char *argv[])
{

    prepare();

    base_info vm_base_info;

    vm_base_info.issue = get_dis();
    vm_base_info.hostname = get_hostname();
    vm_base_info.kernel = get_kernel();
    vm_base_info.arch = get_arch();

    printf("%d %d\n", vm_base_info.issue, vm_issue);
    printf("%s\n", vm_base_info.hostname);
    printf("%s\n", vm_base_info.kernel);
    printf("%s\n", vm_base_info.arch);

    /* TODO: not forget memory free */

}
コード例 #6
0
ファイル: juniper.cpp プロジェクト: cran/JuniperKernel
// [[Rcpp::export]]
void rebroadcast_input(SEXP kernel, const std::string& execution_input, const int execution_count) {
  get_kernel(kernel)->_request_server->rebroadcast_input(execution_input, execution_count);
}
コード例 #7
0
ファイル: juniper.cpp プロジェクト: cran/JuniperKernel
// [[Rcpp::export]]
void post_handle(SEXP kernel, Rcpp::List res, std::string sockName) {
  JuniperKernel* jk = get_kernel(kernel);
  jk->post_handle(res, sockName);
}
コード例 #8
0
ファイル: juniper.cpp プロジェクト: cran/JuniperKernel
// [[Rcpp::export]]
SEXP sock_recv(SEXP kernel, std::string sockName) {
  JuniperKernel* jk = get_kernel(kernel);
  return jk->recv(sockName);
}
コード例 #9
0
bool smo::step(int i1, int i2){
	
	if (i1==i2 ) return 0;
	
	double a1, a2;
	double alph1 = (*alpha)[i1];
	double alph2 = (*alpha)[i2];
	int y1 = (*ypsilon)[i1];
	int y2 = (*ypsilon)[i2];
	double f1 = (*f_cache)[i1];
	double f2 = (alph2 > 0 && alph2 < c)?(*f_cache)[i2]:function(i2) -y2;
	int s = y1*y2;
	double L,H,Lobj,Hobj;
	if (y1==y2) {
		double gamma = alph1 + alph2;
		if (gamma > c) {
			L = gamma - c;
			H = c;
		}else{
			L = 0;
			H = gamma;
		}
	}else {
		double gamma = alph1 - alph2;
		if (gamma > 0) {
			L = 0;
			H = c - gamma;
		}else{
			L = -gamma;
			H = c;
		}
	}
	if ( L == H ) 
		return false;
	double k12 = get_kernel(i1,i2);//kern->calculate(elem->at(i1),elem->at(i2));
	double k11 = get_kernel(i1,i1);//kern->calculate(elem->at(i1),elem->at(i1));
	double k22 = get_kernel(i2,i2);//kern->calculate(elem->at(i2),elem->at(i2));
	double eta =2*k12 - k11 - k22;
	if (eta < 0) {
		a2 = alph2 + y2*(f2 - f1)/eta;  //mjenjao
		if (a2<L) 
			a2 =L;
		else if (a2>H) 
			a2 =H;
	}else {
		double c1 = eta/2;
		double c2 = y2*(f1-f2) -eta*alph2;
		Lobj = c1*L*L+c2*L;
		Hobj = c1*H*H+c2*H;
		if (Lobj>Hobj) 
			a2 = L;
		else if (Lobj < Hobj) 
			a2 = H;
		else 
			a2 = alph2;
	}
	if (fabs(a2 -alph2) < eps*(a2+alph2+eps))
		return 0;
	a1 = alph1 - s*(a2 - alph2);
	if (a1 < 0) {
		a2 += s*a1;
		a1 =0;
	}else if (a1 > c) {
		double t = a1-c;
		a2+=s*t;
		a1=c;
	}


	double t1 = y1*(a1-alph1);
	double t2 = y2*(a2-alph2);
	(*alpha)[i1] = a1;
	(*alpha)[i2] = a2;
	double tmp_low = (*f_cache)[i1];
	double tmp_up = (*f_cache)[i1];
	int tmp_ilow = i1;
	int tmp_iup = i1;
	for (int i = 0; i <alpha->size(); i++) {
		if ((*alpha)[i] >0 && (*alpha)[i] < c){
			(*f_cache)[i] += t1*get_kernel(i1,i)/*kern->calculate(elem->at(i1),elem->at(i))*/+t2*get_kernel(i2,i);/*kern->calculate(elem->at(i2),elem->at(i));*/
		}
		if (((*alpha)[i] >0 && (*alpha)[i] < c) || i==i1 || i==i2){
			if ((*f_cache)[i]>tmp_low){
				tmp_low = (*f_cache)[i];
				i_low = i;
			}
			if ((*f_cache)[i]<tmp_up){
				tmp_up = (*f_cache)[i];
				i_up = i;
			}
		}
	}	
	return 1;
}
コード例 #10
0
int main(int argc, char *argv[])
{
	int rc;
	TSS_HCONTEXT    hContext;
	TSS_HTPM	hTPM;
	int i;
	unsigned int pcr_len = 0;
	unsigned char *pcr_value;
	unsigned char sha1[20];

	/* The argument being the executable */
	if (argc <= 1) {
		printf("Must give atleast one argument\n");
		exit(1);
	}

	/* Find out the currently running kernel and return its hash */
	rc = get_kernel(sha1);
	if (rc != 0) {
		printf("Kernel read failed\n");
		exit(1);
	}

	/* Start creating the TSS context */
	rc = Tspi_Context_Create(&hContext);
	if (rc != TSS_SUCCESS)
		printf("Context creation failed!\n");

	rc = Tspi_Context_Connect(hContext, NULL);
	if (rc != TSS_SUCCESS)
		printf("Context connection failed!\n");

	rc = Tspi_Context_GetTpmObject(hContext, &hTPM);
	if (rc != TSS_SUCCESS)
		printf("Getting TPM Object failed\n");

	rc = Tspi_TPM_PcrRead(hTPM, 16, &pcr_len, &pcr_value);
	printf("Length of data read: %d\n", pcr_len);
	for (i = 0; i < pcr_len; i++)
		printf("%x ", pcr_value[i]);
	printf("\n");

	/* Trousers wonkiness - have to pass SHA1 hash */
	rc = Tspi_TPM_PcrExtend(hTPM, 16, 20, sha1, NULL, 
			&pcr_len, &pcr_value);
	if (rc != TSS_SUCCESS) {
		printf("Kernel Extend failed : %d\n", rc);
	}

	/* argv[1] is the path to secure_daemon */
	rc = get_hash(argv[1], sha1);
	if (rc != 0)
		exit(1);

	rc = Tspi_TPM_PcrExtend(hTPM, 16, 20, sha1, NULL, 
			&pcr_len, &pcr_value);
	if (rc != TSS_SUCCESS) {
		printf("Secure Daemon Extend failed : %d\n", rc);
	} 

	printf("Length of extended PCR value: %d\n", pcr_len);
	for (i = 0; i < pcr_len; i++)
		printf("%x ", pcr_value[i]);
	printf("\n");

	free(pcr_value);
	Tspi_Context_Close(hContext);

	return 0;
}
コード例 #11
0
ファイル: juniper.cpp プロジェクト: cran/JuniperKernel
// [[Rcpp::export]]
SEXP boot_kernel(SEXP kernel, int interrupt_event) {
  JuniperKernel* jk = get_kernel(kernel);
  return jk->start_bg_threads(interrupt_event);
}
コード例 #12
0
ファイル: tensor.cpp プロジェクト: hccl/cpuxnn
 void Tensor4d::copy(const Tensor4d & lhs) {
     for (UINT i = 0; i < dims_[0]; ++i)
         for (UINT j = 0; j < dims_[1]; ++j)
             get_kernel(i, j).copy(lhs.get_kernel(i, j));
 }
コード例 #13
0
ファイル: tensor.cpp プロジェクト: hccl/cpuxnn
 void Tensor4d::slrProduct(const float scalar) {
     /* simple implementation for test */
     for (UINT i = 0; i < dims_[0]; ++i)
         for (UINT j = 0; j < dims_[1]; ++j)
             get_kernel(i, j).slrProduct(scalar);
 }
コード例 #14
0
void convolution(int argc, char *argv[])
{
	if (argc != 3) {
		printf("Invalid number of arguments.\n");
		return;
	}

	Image *input_image;
	Image *output_image;
	int error;

	if ((error = TGA_readImage(argv[0], &input_image)) != 0) {
		printf("Error when opening image: %d\n", error);
		return;
	}

	if ((error = Image_new(input_image->width,
			       input_image->height,
			       input_image->channels,
			       &output_image)) != 0) {
		printf("Error when creating output image : %d\n", error);
		Image_delete(input_image);
		return;
	}

	Kernel kernel;

	if ((error = get_kernel(argv[1], &kernel)) != 0) {
		printf("Error when opening kernel : %d\n", error);
		Image_delete(input_image);
		Image_delete(output_image);
		return;
	}

	int radius_x, radius_y;
	int x, y, c;

	radius_x = (kernel.width - 1) / 2;
	radius_y = (kernel.height - 1) / 2;

	Benchmark bench;
	start_benchmark(&bench);

	for (c = 0; c < input_image->channels; ++c) {
		uint8_t *out_data = output_image->data[c];
		
		for (y = 0; y < input_image->height; ++y) {
			for (x = 0; x < input_image->width; ++x) {
				
				int kx, ky;
				float *kernel_data = kernel.data;
				float sum = 0;

				for (ky = -radius_y; ky <= radius_y; ++ky) {
					for (kx = -radius_x; kx <= radius_x; ++kx) {
						int xx = clip(x + kx, 0, input_image->width - 1);
						int yy = clip(y + ky, 0, input_image->height - 1);

						sum += Image_getPixel(input_image, xx, yy, c) * *kernel_data++;
					}
				}

				sum /= kernel.sum;
				*out_data++ = clip(sum, 0, 255);
			}
		}
	}

	end_benchmark(&bench);
	printf("%lu ", bench.elapsed_ticks);
	printf("%lf\n", bench.elapsed_time);

	if ((error = TGA_writeImage(argv[2], output_image)) != 0) {
		printf("Error when writing image: %d\n", error);
	}

	Image_delete(input_image);
	Image_delete(output_image);

	kernel_delete(kernel);
}
コード例 #15
0
ファイル: juniper.cpp プロジェクト: cran/JuniperKernel
// [[Rcpp::export]]
void execute_result(SEXP kernel, Rcpp::List data) {
  get_kernel(kernel)->_request_server->execute_result(from_list_r(data));
}
コード例 #16
0
ファイル: juniper.cpp プロジェクト: cran/JuniperKernel
// [[Rcpp::export]]
void jk_device(SEXP kernel, std::string bg, double width, double height, double pointsize, bool standalone, Rcpp::List aliases) {
  makeDevice(get_kernel(kernel), bg, width, height, pointsize, standalone, aliases);
}
コード例 #17
0
ファイル: tensor.cpp プロジェクト: hccl/cpuxnn
 void Tensor4d::rand_elt() {
     for (UINT i = 0; i < dims_[0]; ++i)
         for (UINT j = 0; j < dims_[1]; ++j)
             get_kernel(i, j).rand_elt();
 }
コード例 #18
0
ファイル: ocl.c プロジェクト: adambarta/spead
void *spead_api_setup(struct spead_api_module_shared *s)
{
  struct sapi_o *a;

  a = malloc(sizeof(struct sapi_o));
  if (a == NULL){
#ifdef DEBUG
    fprintf(stderr, "e: logic could not malloc api obj\n");
#endif
    return NULL;
  }

  if (setup_ocl(KERNELDIR KERNELS_FILE, &(a->ctx), &(a->cq), &(a->p)) != CL_SUCCESS){
#ifdef DEBUG
    fprintf(stderr, "e: setup_ocl error\n");
#endif
    spead_api_destroy(s, a);
    return NULL;
  }


  a->chirp = get_kernel("coherent_dedisperse", &(a->p));
  if (a->chirp == NULL){
#ifdef DEBUG
    fprintf(stderr, "e: get_kernel error\n");
#endif
    spead_api_destroy(s, a);
    return NULL;
  }
  a->power = get_kernel("power", &(a->p));
  if (a->power == NULL){
#ifdef DEBUG
    fprintf(stderr, "e: get_kernel error\n");
#endif
    spead_api_destroy(s, a);
    return NULL;
  }
  a->phase = get_kernel("phase", &(a->p));
  if (a->phase == NULL){
#ifdef DEBUG
    fprintf(stderr, "e: get_kernel error\n");
#endif
    spead_api_destroy(s, a);
    return NULL;
  }

  
#if 0
  a->k = get_kernel("ct", &(a->p));
  if (a->k == NULL){
#ifdef DEBUG
    fprintf(stderr, "e: get_kernel error\n");
#endif
    spead_api_destroy(a);
    return NULL;
  } 
#endif

  a->clin   = NULL;
  a->clout  = NULL;
  a->clpow  = NULL;
  a->host   = NULL;
  
  return a;
}
コード例 #19
0
ファイル: main.cpp プロジェクト: rshkarin/dfr-opencl-oclfft
void dfi_process_sinogram(const char* tiff_input, const char* tiff_output, int center_rotation)
{
  cl_event events[11];

  if(!tiff_input) {
    printf("The filename of input is not valid. (pointer tiff_input = %p)", tiff_input);
    return;
  }

  if(!tiff_output) {
    printf("The filename of output is not valid. (pointer tiff_output = %p)", tiff_output);
    return;
  }

  /////////////////////
  /* Input Data Part */
  /////////////////////

  /* Input a slice properties */
  int bits_per_sample;
  int samples_per_pixel;
  int theta_size;
  int slice_size;

  /* Read the slice */
  clFFT_Complex *data_tiff = tiff_read_complex(tiff_input,
						center_rotation,
						&bits_per_sample,
						&samples_per_pixel,
						&slice_size,
						&theta_size);
  
  //tiff_write_complex("resources/initial-sino.tif", data_tiff, slice_size, theta_size);

  /*
   * OpenCL
   */
  printf("Hey!1\n");
  cl_int status = CL_SUCCESS;
  cl_platform_id platform;

  printf("Hey!1.2\n");
  CL_CHECK_ERROR(clGetPlatformIDs(1, &platform, NULL));

  printf("Hey!2\n");
  cl_device_id devices[10];    // Compute device
  cl_context context;     // Compute context
  cl_uint n_devices = 0;

  printf("@Hey!3\n");
#if GPU
  printf("@Hey!GPU Choosed\n");
  CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 3, devices, &n_devices));
#else
  printf("@Hey!CPU Choosed\n");
  CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 3, devices, &n_devices));
#endif

  //cl_device_id current_device = devices[0];
#define current_device devices[0]
  printf("Hey!3.1  n_devices %d\n", n_devices);
  context = clCreateContext(NULL, 1, devices, NULL, NULL, &status);
  printf("Hey!3.2\n");
  CL_CHECK_ERROR(status);

	/*
	 * Device
	 */
	printf("Hey!3.3\n");

	cl_int device_max_cu = 0;
	CL_CHECK_ERROR(clGetDeviceInfo(current_device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &device_max_cu, NULL));
	size_t wg_count = device_max_cu * wavefronts_per_SIMD;
    size_t global_work_size = wg_count * local_work_size;
    printf("Hey!3.4\n");

    /*
     * Queues, Kernels
     */
    cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
    cl_command_queue command_queue = clCreateCommandQueue(context, current_device, properties, &status);
    CL_CHECK_ERROR(status);

    printf("Hey!3.5\n");
    cl_kernel kernel_linear_interp = get_kernel("linear_interp", &context, &current_device);
    cl_kernel kernel_zero_ifftshift = get_kernel("zero_ifftshift", &context, &current_device);
    cl_kernel kernel_fftshift = get_kernel("fftshift", &context, &current_device);
    cl_kernel kernel_2dshift = get_kernel("shift2d", &context, &current_device);
    cl_kernel kernel_crop_data = get_kernel("crop_data", &context, &current_device);

    printf("@Hey!3.6\n\n");

	////////////////////////
	/* OpenCL - DFI Part */
	////////////////////////

	/* Reconstruction properties */
	int oversampling_ratio = 2;
	int dx = 1; /* zoom times */
	//int size_s = slice_size * oversampling_ratio;

	int min_theta = 0;
	int max_theta = theta_size - 1;

	int size_zeropad_s = pow(2, ceil(log2((float)slice_size))); /* get length of FFT operations */
	int size_s = size_zeropad_s;

	float d_omega_s = 2 * M_PI / (size_zeropad_s * dx); //normalized ratio [0; 2PI]

  /* Start timer */
  timeval global_tim;
  gettimeofday(&global_tim, NULL);
  double t1_global = global_tim.tv_sec + (global_tim.tv_usec/1000000.0), t2_global = 0.0;

	/////////////////////////////////////
	/* Sinogram shifting + Zeropadding */
	/////////////////////////////////////
	long data_size = slice_size * theta_size * sizeof(clFFT_Complex);
	printf("6 ");
	long zeropad_data_size = theta_size * size_zeropad_s * sizeof(clFFT_Complex);

	/* Buffers */
	cl_mem original_data_buffer = clCreateBuffer(context,
						     CL_MEM_READ_WRITE,
						     data_size,
						     NULL,
						     &status);
	CL_CHECK_ERROR(status);

	CL_CHECK_ERROR(clEnqueueWriteBuffer(command_queue,
					    original_data_buffer,
					    CL_FALSE,
					    0,
					    data_size,
					    data_tiff,
					    0,
					    NULL,
					    &events[0]));

    	cl_mem zeropad_ifftshift_data_buffer = clCreateBuffer(context,
							  CL_MEM_READ_WRITE,
							  zeropad_data_size,
							  NULL,
							  &status);
    CL_CHECK_ERROR(status);

    float *zero_out = (float *)g_malloc0(zeropad_data_size);
    CL_CHECK_ERROR(clEnqueueWriteBuffer(command_queue,
					zeropad_ifftshift_data_buffer,
					CL_FALSE,
					0,
					zeropad_data_size,
					zero_out,
					0,
					NULL,
					&events[1]));

    /* Set arguments */
    CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 0, sizeof(void *), (void *)&original_data_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 1, sizeof(theta_size), &theta_size));
    CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 2, sizeof(slice_size), &slice_size));

    CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 3, sizeof(void *), (void *)&zeropad_ifftshift_data_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 4, sizeof(theta_size), &theta_size));
    CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 5, sizeof(size_zeropad_s), &size_zeropad_s));

    /* Run kernel */
    status = clEnqueueNDRangeKernel(command_queue,
				    kernel_zero_ifftshift,
				    1, // work dimensional 1D, 2D, 3D
				    NULL, // offset
				    &global_work_size, // total number of WI
				    &local_work_size, // number of WI in WG
				    2, // number events in wait list
				    events,  // event wait list
				    &events[2]); // event
    CL_CHECK_ERROR(status);

    // Copy result from device to host
    /*
    clFFT_Complex *fur_kernel_sino = (clFFT_Complex *)clEnqueueMapBuffer(command_queue,
    									     	 	 	 	 	 	 	 zeropad_ifftshift_data_buffer,
    									     	 	 	 	 	 	 	 CL_TRUE,
    									     	 	 	 	 	 	 	 CL_MAP_READ,
    									     	 	 	 	 	 	 	 0,
    									     	 	 	 	 	 	 	 zeropad_data_size,
                                                                 	     0, NULL, NULL, NULL );

    clFinish(command_queue);
    tiff_write_complex("resources/zeropad-sino.tif", fur_kernel_sino, size_zeropad_s, theta_size);
    */

    ////////////////////////////////////////////////////////////////////////
    /* Applying 1-D FFT to the each strip of the sinogram and shifting it */
    ////////////////////////////////////////////////////////////////////////

    cl_mem zeropadded_1dfft_data_buffer = clCreateBuffer(context,
							CL_MEM_READ_WRITE,
							zeropad_data_size,
							NULL,
							&status);
    CL_CHECK_ERROR(status);

    /* Setup clAmdFft */
    clFFT_Dim3 sino_fft;
    sino_fft.x = size_zeropad_s;
    sino_fft.y = 1;
    sino_fft.z = 1;

    /* Create FFT plan */
    clFFT_Plan plan_1dfft_sinogram = clFFT_CreatePlan(context,
						      sino_fft,
						      clFFT_1D,
						      clFFT_InterleavedComplexFormat,
						      &status);
    CL_CHECK_ERROR(status);

    /* Execute FFT */
    status = clFFT_ExecuteInterleaved(command_queue,
				      plan_1dfft_sinogram,
				      theta_size,
				      clFFT_Forward,
				      zeropad_ifftshift_data_buffer,
				      zeropadded_1dfft_data_buffer,
				      0,
				      NULL,
				      NULL);
    CL_CHECK_ERROR(status);
    // Free FFT plan
    //clFFT_DestroyPlan(plan_1dfft_sinogram);

    // Copy result from device to host
    /*
    clFFT_Complex *fourier_kernel_sinogram = (clFFT_Complex *)malloc(zeropad_data_size);
    clEnqueueReadBuffer(command_queue, zeropadded_1dfft_data_buffer, CL_TRUE, 0, zeropad_data_size, fourier_kernel_sinogram, 0, NULL, NULL);
    clFinish(command_queue);

	tiff_write_complex("resources/1dfft-sino.tif", fourier_kernel_sinogram, size_zeropad_s, theta_size);
    */

    ///////////////////
    /* Make fftshift */
    ///////////////////

    /* Buffers */
    cl_mem zeropad_fftshift_data_buffer = clCreateBuffer(context,
							CL_MEM_READ_WRITE,
							zeropad_data_size,
							NULL,
							&status);
    CL_CHECK_ERROR(status);

    /* Set arguments */
    CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 0, sizeof(void *), (void *)&zeropadded_1dfft_data_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 1, sizeof(theta_size), &theta_size));
    CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 2, sizeof(size_zeropad_s), &size_zeropad_s));

    CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 3, sizeof(void *), (void *)&zeropad_fftshift_data_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 4, sizeof(theta_size), &theta_size));
    CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 5, sizeof(size_zeropad_s), &size_zeropad_s));

    /* Run kernel */
    status = clEnqueueNDRangeKernel(command_queue,
				    kernel_fftshift,
				    1, // work dimensional 1D, 2D, 3D
				    NULL, // offset
				    &global_work_size, // total number of WI
				    &local_work_size, // number of WI in WG
				    0, // number events in wait list
				    NULL,  // event wait list
				    &events[3]); // event
    CL_CHECK_ERROR(status);

    /* Copy result from device to host */
    /*
    clFFT_Complex *fur_kernel_fftshift_sino = (clFFT_Complex *)clEnqueueMapBuffer(command_queue,
										      	  zeropad_fftshift_data_buffer,
										      	  CL_TRUE,
      									   	      CL_MAP_READ,0,zeropad_data_size,
      									   	      0, NULL, NULL, NULL );

    clFinish(command_queue);
    tiff_write_complex("resources/fftshift-sino.tif", fur_kernel_fftshift_sino, size_zeropad_s, theta_size);
    */

     ////////////////////////
     /* Data Interpolation */
     ////////////////////////

    /* Performing Interpolation */
    cl_long data_length = size_s * size_s;
    cl_int in_rows = theta_size;
    cl_int in_cols = size_zeropad_s;

    cl_float norm_ratio = d_omega_s/dx;

    cl_float in_rows_first_val = min_theta;
    cl_float in_rows_last_val = max_theta;

    cl_float in_cols_first_val = (-in_cols/2)*norm_ratio;
    cl_float in_cols_last_val = (in_cols/2-1)*norm_ratio;

    cl_int interp_rows = size_s;
    cl_int interp_cols = interp_rows;

    cl_int iparams[5];
    iparams[0] = in_rows;
    iparams[1] = in_cols;
    iparams[2] = dx;
    iparams[3] = interp_rows;
    iparams[4] = interp_cols;

    cl_float fparams[5];
    fparams[0] = in_rows_first_val;
    fparams[1] = in_rows_last_val;
    fparams[2] = in_cols_first_val;
    fparams[3] = in_cols_last_val;
    fparams[4] = norm_ratio;

    /* Buffers */
    cl_mem i_buffer = clCreateBuffer(context,
                                     CL_MEM_READ_ONLY,
                                     sizeof(cl_int) * 5,
                                     NULL,
                                     &status);
    CL_CHECK_ERROR(status);

    CL_CHECK_ERROR(clEnqueueWriteBuffer(command_queue,
					i_buffer,
					CL_FALSE,
					0,
					sizeof(cl_int) * 5,
					iparams,
					0,
					NULL,
					&events[4]));

    cl_mem f_buffer = clCreateBuffer(context,
                                     CL_MEM_READ_ONLY,
                                     sizeof(cl_float) * 5,
                                     NULL,
                                     &status);
    CL_CHECK_ERROR(status);

    CL_CHECK_ERROR(clEnqueueWriteBuffer(command_queue,
					f_buffer,
					CL_FALSE,
					0,
					sizeof(cl_float) * 5,
					fparams,
					0,
					NULL,
					&events[5]));

    cl_mem output_buffer = clCreateBuffer(context,
					  CL_MEM_READ_WRITE,
					  data_length * sizeof(clFFT_Complex),
					  NULL,
					  &status);
    CL_CHECK_ERROR(status);

    /* Set arguments */
    CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 0, sizeof(void *), (void *)&i_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 1, sizeof(void *), (void *)&f_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 2, sizeof(void *), (void *)&zeropad_fftshift_data_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 3, sizeof(void *), (void *)&output_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 4, sizeof(data_length), &data_length));

    /* Run kernel */
    status = clEnqueueNDRangeKernel(command_queue,
				    kernel_linear_interp,
				    1, // work dimensional 1D, 2D, 3D
				    NULL, // offset
				    &global_work_size, // total number of WI
				    &local_work_size, // nomber of WI in WG
				    3, // num events in wait list
				    events + 3,  // event wait list
				    &events[6]); // event
    CL_CHECK_ERROR(status);
    //clFinish(command_queue);

    // Copy result from device to host
    /*
    clFFT_Complex *interpolated_spectrum = (clFFT_Complex *)clEnqueueMapBuffer(command_queue,
                                                                  	  	   output_buffer,
                                                                  	  	   CL_TRUE,
                                                                  	  	   CL_MAP_READ,
                                                                 	  	   0,
                                                                  	  	   data_length * sizeof(clFFT_Complex),
                                                                  	  	   0, NULL, NULL, NULL );

    clFinish(command_queue);
    tiff_write_complex("resources/interpolated-sino.tif", interpolated_spectrum, size_s, size_s);
    */

    ///////////////////////////////////////////////////
    /* Applying 2-D FFT to the interpolated spectrum */
    ///////////////////////////////////////////////////

    /* Setup 2D IFFT */
    clFFT_Dim3 sino_2dfft;
    sino_2dfft.x = size_s;
    sino_2dfft.y = size_s;
    sino_2dfft.z = 1;

    /* Create 2D IFFT plan */
    clFFT_Plan plan_2difft = clFFT_CreatePlan(context,
					      sino_2dfft,
					      clFFT_2D,
					      clFFT_InterleavedComplexFormat,
					      &status);
    CL_CHECK_ERROR(status);

    /* Execute 2D IFFT */
    cl_mem reconstructed_image_buffer = clCreateBuffer(context,
						       CL_MEM_READ_WRITE,
						       data_length * sizeof(clFFT_Complex),
						       NULL,
						       &status);
    CL_CHECK_ERROR(status);

    status = clFFT_ExecuteInterleaved(command_queue,
				      plan_2difft,
				      1,
				      clFFT_Inverse,
				      output_buffer,
				      reconstructed_image_buffer,
				      0,
				      NULL,
				      NULL);
    CL_CHECK_ERROR(status);


    // Copy result from device to host
    /*
    clFFT_Complex *ifft2d_interpolated_spectrum = (clFFT_Complex *)malloc(data_length * sizeof(clFFT_Complex));
    clEnqueueReadBuffer(command_queue,
    					reconstructed_image_buffer,
    					CL_TRUE,
    					0,
    					data_length * sizeof(clFFT_Complex),
    					ifft2d_interpolated_spectrum,
    					0,
    					NULL,
    					NULL);
    tiff_write_complex("resources/ifft2d_interpolated_spectrum.tif", ifft2d_interpolated_spectrum, size_s, size_s);
    clFinish(command_queue);
    */

    /////////////////////////////////////////////////
    /* Applying 2-D fftshidt to the restored image */
    /////////////////////////////////////////////////

    /* Buffers */
    cl_mem two_dim_fftshifted_data_buffer = clCreateBuffer(context,
							   CL_MEM_READ_WRITE,
							   data_length * sizeof(clFFT_Complex),
							   NULL,
							   &status);
    CL_CHECK_ERROR(status);

    /* Set arguments */
    cl_int inverse_flag = 0;

    CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 0, sizeof(void *), (void *)&reconstructed_image_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 1, sizeof(void *), (void *)&two_dim_fftshifted_data_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 2, sizeof(interp_rows), &interp_rows));
    CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 3, sizeof(interp_cols), &interp_cols));
    CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 4, sizeof(inverse_flag), &inverse_flag));

    /* Run kernel */
    status = clEnqueueNDRangeKernel(command_queue,
				    kernel_2dshift,
				    1, // work dimensional 1D, 2D, 3D
				    NULL, // offset
				    &global_work_size, // total number of WI
				    &local_work_size, // number of WI in WG
				    1, // number events in wait list
				    &events[6],  // event wait list
				    &events[7]); // event
    CL_CHECK_ERROR(status);

    /* Copy result from device to host */
    /*
    clFFT_Complex *two_dim_fftshifted_data = (clFFT_Complex *)clEnqueueMapBuffer(command_queue,
										     	 two_dim_fftshifted_data_buffer,
										     	 CL_TRUE,
										     	 CL_MAP_READ,
										     	 0,
										     	 data_length * sizeof(clFFT_Complex),
										     	 0, NULL, NULL, NULL );


    clFinish(command_queue);
    */

    ////////////////
    /* Crop data  */
    ///////////////
    float lt_offset = 0, rb_offset = 0;

    int dif_sides = interp_cols - slice_size;
    if (dif_sides%2) {
       lt_offset = floor(dif_sides / 2.0);
       rb_offset = ceil(dif_sides / 2.0);
    }
    else {
       lt_offset = rb_offset = dif_sides / 2.0;
    }

    /* Buffers */
    long cropped_data_length = slice_size * slice_size * sizeof(clFFT_Complex);
    cl_mem cropped_restored_image_data_buffer = 
	      clCreateBuffer(context,
			     CL_MEM_READ_WRITE,
			     cropped_data_length,
			     NULL,
			     &status);
    CL_CHECK_ERROR(status);

    /* Set arguments */
    CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 0, sizeof(void *), (void *)&two_dim_fftshifted_data_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 1, sizeof(void *), (void *)&cropped_restored_image_data_buffer));
    CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 2, sizeof(slice_size), &slice_size));
    CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 3, sizeof(interp_cols), &interp_cols));
    CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 4, sizeof(lt_offset), &lt_offset));
    CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 5, sizeof(rb_offset), &rb_offset));

    /* Run kernel */
    status = clEnqueueNDRangeKernel(command_queue,
				    kernel_crop_data,
				    1, // work dimensional 1D, 2D, 3D
				    NULL, // offset
				    &global_work_size, // total number of WI
				    &local_work_size, // number of WI in WG
				    1, // number events in wait list
				    &events[7],  // event wait list
				    &events[8]); // event
    CL_CHECK_ERROR(status);

    CL_CHECK_ERROR(clFinish(command_queue));
    clFFT_DestroyPlan(plan_2difft);
    clFFT_DestroyPlan(plan_1dfft_sinogram);


    //timing
    float ms = 0.0, total_ms = 0.0, global_ms = 0.0, deg = 1.0e-6f;
 
      /* Stop timer */
  gettimeofday(&global_tim, NULL);
  t2_global = global_tim.tv_sec+(global_tim.tv_usec/1000000.0);
  printf("\n(Total time - timeofday) %f seconds elapsed\n", (t2_global-t1_global)*1000.0);


    cl_ulong start, end;

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[0], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[0], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Sinogram shifting + Zeropadding  write_op1):%f", ms);

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[1], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[1], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Sinogram shifting + Zeropadding  write_op2):%f", ms);

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[2], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[2], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Sinogram shifting + Zeropadding):%f", ms);
    printf("\nTOTAL(Sinogram shifting + Zeropadding):%f\n", total_ms);
    global_ms += total_ms;
    total_ms = 0.0;

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[2], CL_PROFILING_COMMAND_END,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[3], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Applying 1-D FFT):%f\n", total_ms);
    global_ms += total_ms;
    total_ms = 0.0;

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[3], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[3], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Shift 1-D FFT data):%f\n", total_ms);
    global_ms += total_ms;
    total_ms = 0.0;

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[4], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[4], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Data Interpolation write_op1):%f", ms);

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[5], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[5], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Data Interpolation write_op2):%f", ms);

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[6], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[6], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Data Interpolation):%f", ms);
    printf("\nTOTAL(Data Interpolation):%f\n", total_ms);
    global_ms += total_ms;
    total_ms = 0.0;

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[6], CL_PROFILING_COMMAND_END,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[7], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Applying 2-D IFFT):%f\n", total_ms);
    global_ms += total_ms;
    total_ms = 0.0;

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[7], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[7], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Applying 2-D Shift):%f\n", total_ms);
    global_ms += total_ms;
    total_ms = 0.0;

    CL_CHECK_ERROR(clGetEventProfilingInfo(events[8], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL));
    CL_CHECK_ERROR(clGetEventProfilingInfo(events[8], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL));
    ms = (end - start) * deg;
    total_ms += ms;
    printf("\n(Cropping data):%f\n", total_ms);
    global_ms += total_ms;
    total_ms = 0.0;

    printf("\nTOTAL TIME:%f\n", global_ms);

    // Copy result from device to host
    clFFT_Complex *cropped_restored_image = 
	(clFFT_Complex *)clEnqueueMapBuffer(command_queue,
					    cropped_restored_image_data_buffer,
					    CL_TRUE,
					    CL_MAP_READ,
					    0,
					    cropped_data_length,
					    0, NULL, NULL, NULL );




    /* Write the restored slice */
    tiff_write_complex(tiff_output, cropped_restored_image, slice_size, slice_size);
}