int pm_overflow_handler(int irq, struct pt_regs *regs) { int is_kernel; int i, cpu; unsigned int pc, pfctl; unsigned int count[2]; pr_debug("get interrupt in %s\n", __FUNCTION__); if (oprofile_running == 0) { pr_debug("error: entering interrupt when oprofile is stopped.\n\r"); return -1; } is_kernel = get_kernel(); cpu = smp_processor_id(); pc = regs->pc; pfctl = ctr_read(); /* read the two event counter regs */ count_read(count); /* if the counter overflows, add sample to oprofile buffer */ for (i = 0; i < 2; ++i) { if (oprofile_running) { oprofile_add_sample(regs, i); } } /* reset the perfmon counter */ ctr_write(curr_pfctl); count_write(curr_count); return 0; }
void Tensor4d::eltProduct(const Tensor4d & rhs) { assert(dims_[0] == rhs.get_dim(0)); assert(dims_[1] == rhs.get_dim(1)); assert(dims_[2] == rhs.get_dim(2)); assert(dims_[3] == rhs.get_dim(3)); for (UINT i = 0; i < dims_[0]; ++i) for (UINT j = 0; j < dims_[1]; ++j) get_kernel(i, j).eltProduct(rhs.get_kernel(i, j)); }
static void power4_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) { unsigned long pc; int is_kernel; int val; int i; unsigned int mmcr0; unsigned long mmcra; mmcra = mfspr(SPRN_MMCRA); pc = get_pc(regs); is_kernel = get_kernel(pc, mmcra); /* set the PMM bit (see comment below) */ mtmsrd(mfmsr() | MSR_PMM); for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { val = ctr_read(i); if (val < 0) { if (oprofile_running && ctr[i].enabled) { oprofile_add_ext_sample(pc, regs, i, is_kernel); ctr_write(i, reset_value[i]); } else { ctr_write(i, 0); } } } mmcr0 = mfspr(SPRN_MMCR0); /* reset the perfmon trigger */ mmcr0 |= MMCR0_PMXE; /* * We must clear the PMAO bit on some (GQ) chips. Just do it * all the time */ mmcr0 &= ~MMCR0_PMAO; /* Clear the appropriate bits in the MMCRA */ mmcra &= ~cur_cpu_spec->oprofile_mmcra_clear; mtspr(SPRN_MMCRA, mmcra); /* * now clear the freeze bit, counting will not start until we * rfid from this exception, because only at that point will * the PMM bit be cleared */ mmcr0 &= ~MMCR0_FC; mtspr(SPRN_MMCR0, mmcr0); }
void FretData::init_grids (const Floats& d_grid_int, Float R0, Float Rmin, Float Rmax, bool do_limit) { // grid on distance between termini for(unsigned l=0; l<d_term_.size(); ++l){ // grid on sigma for(unsigned i=0; i<s_grid_.size(); ++i){ // grid on distance between center of GMM for(unsigned j=0; j<d_center_.size(); ++j){ Float marg=0.; Float norm=0.; unsigned kmin=0; unsigned kmax=d_grid_int.size(); // find boundaries for marginalization if(do_limit){ kmin=get_closest(d_grid_int,std::max(Rmin,d_term_[l]-Rmax)); kmax=get_closest(d_grid_int,d_term_[l]+Rmax); } // do the marginalization for(unsigned k=kmin+1; k<kmax; ++k){ Float dx = d_grid_int[k] - d_grid_int[k-1]; Float prob = get_probability(d_grid_int[k], d_center_[j], s_grid_[i]); Float probm1 = get_probability(d_grid_int[k-1], d_center_[j], s_grid_[i]); Float kernel = get_kernel( d_grid_int[k], R0 ); Float kernelm1 = get_kernel( d_grid_int[k-1], R0 ); marg += ( kernel * prob + kernelm1 * probm1 ) / 2.0 * dx; norm += ( prob + probm1 ) / 2.0 * dx; } // store in grid_ and norm_ grid_.push_back(marg); norm_.push_back(norm); } } } }
int main(int argc, char *argv[]) { prepare(); base_info vm_base_info; vm_base_info.issue = get_dis(); vm_base_info.hostname = get_hostname(); vm_base_info.kernel = get_kernel(); vm_base_info.arch = get_arch(); printf("%d %d\n", vm_base_info.issue, vm_issue); printf("%s\n", vm_base_info.hostname); printf("%s\n", vm_base_info.kernel); printf("%s\n", vm_base_info.arch); /* TODO: not forget memory free */ }
// [[Rcpp::export]] void rebroadcast_input(SEXP kernel, const std::string& execution_input, const int execution_count) { get_kernel(kernel)->_request_server->rebroadcast_input(execution_input, execution_count); }
// [[Rcpp::export]] void post_handle(SEXP kernel, Rcpp::List res, std::string sockName) { JuniperKernel* jk = get_kernel(kernel); jk->post_handle(res, sockName); }
// [[Rcpp::export]] SEXP sock_recv(SEXP kernel, std::string sockName) { JuniperKernel* jk = get_kernel(kernel); return jk->recv(sockName); }
bool smo::step(int i1, int i2){ if (i1==i2 ) return 0; double a1, a2; double alph1 = (*alpha)[i1]; double alph2 = (*alpha)[i2]; int y1 = (*ypsilon)[i1]; int y2 = (*ypsilon)[i2]; double f1 = (*f_cache)[i1]; double f2 = (alph2 > 0 && alph2 < c)?(*f_cache)[i2]:function(i2) -y2; int s = y1*y2; double L,H,Lobj,Hobj; if (y1==y2) { double gamma = alph1 + alph2; if (gamma > c) { L = gamma - c; H = c; }else{ L = 0; H = gamma; } }else { double gamma = alph1 - alph2; if (gamma > 0) { L = 0; H = c - gamma; }else{ L = -gamma; H = c; } } if ( L == H ) return false; double k12 = get_kernel(i1,i2);//kern->calculate(elem->at(i1),elem->at(i2)); double k11 = get_kernel(i1,i1);//kern->calculate(elem->at(i1),elem->at(i1)); double k22 = get_kernel(i2,i2);//kern->calculate(elem->at(i2),elem->at(i2)); double eta =2*k12 - k11 - k22; if (eta < 0) { a2 = alph2 + y2*(f2 - f1)/eta; //mjenjao if (a2<L) a2 =L; else if (a2>H) a2 =H; }else { double c1 = eta/2; double c2 = y2*(f1-f2) -eta*alph2; Lobj = c1*L*L+c2*L; Hobj = c1*H*H+c2*H; if (Lobj>Hobj) a2 = L; else if (Lobj < Hobj) a2 = H; else a2 = alph2; } if (fabs(a2 -alph2) < eps*(a2+alph2+eps)) return 0; a1 = alph1 - s*(a2 - alph2); if (a1 < 0) { a2 += s*a1; a1 =0; }else if (a1 > c) { double t = a1-c; a2+=s*t; a1=c; } double t1 = y1*(a1-alph1); double t2 = y2*(a2-alph2); (*alpha)[i1] = a1; (*alpha)[i2] = a2; double tmp_low = (*f_cache)[i1]; double tmp_up = (*f_cache)[i1]; int tmp_ilow = i1; int tmp_iup = i1; for (int i = 0; i <alpha->size(); i++) { if ((*alpha)[i] >0 && (*alpha)[i] < c){ (*f_cache)[i] += t1*get_kernel(i1,i)/*kern->calculate(elem->at(i1),elem->at(i))*/+t2*get_kernel(i2,i);/*kern->calculate(elem->at(i2),elem->at(i));*/ } if (((*alpha)[i] >0 && (*alpha)[i] < c) || i==i1 || i==i2){ if ((*f_cache)[i]>tmp_low){ tmp_low = (*f_cache)[i]; i_low = i; } if ((*f_cache)[i]<tmp_up){ tmp_up = (*f_cache)[i]; i_up = i; } } } return 1; }
int main(int argc, char *argv[]) { int rc; TSS_HCONTEXT hContext; TSS_HTPM hTPM; int i; unsigned int pcr_len = 0; unsigned char *pcr_value; unsigned char sha1[20]; /* The argument being the executable */ if (argc <= 1) { printf("Must give atleast one argument\n"); exit(1); } /* Find out the currently running kernel and return its hash */ rc = get_kernel(sha1); if (rc != 0) { printf("Kernel read failed\n"); exit(1); } /* Start creating the TSS context */ rc = Tspi_Context_Create(&hContext); if (rc != TSS_SUCCESS) printf("Context creation failed!\n"); rc = Tspi_Context_Connect(hContext, NULL); if (rc != TSS_SUCCESS) printf("Context connection failed!\n"); rc = Tspi_Context_GetTpmObject(hContext, &hTPM); if (rc != TSS_SUCCESS) printf("Getting TPM Object failed\n"); rc = Tspi_TPM_PcrRead(hTPM, 16, &pcr_len, &pcr_value); printf("Length of data read: %d\n", pcr_len); for (i = 0; i < pcr_len; i++) printf("%x ", pcr_value[i]); printf("\n"); /* Trousers wonkiness - have to pass SHA1 hash */ rc = Tspi_TPM_PcrExtend(hTPM, 16, 20, sha1, NULL, &pcr_len, &pcr_value); if (rc != TSS_SUCCESS) { printf("Kernel Extend failed : %d\n", rc); } /* argv[1] is the path to secure_daemon */ rc = get_hash(argv[1], sha1); if (rc != 0) exit(1); rc = Tspi_TPM_PcrExtend(hTPM, 16, 20, sha1, NULL, &pcr_len, &pcr_value); if (rc != TSS_SUCCESS) { printf("Secure Daemon Extend failed : %d\n", rc); } printf("Length of extended PCR value: %d\n", pcr_len); for (i = 0; i < pcr_len; i++) printf("%x ", pcr_value[i]); printf("\n"); free(pcr_value); Tspi_Context_Close(hContext); return 0; }
// [[Rcpp::export]] SEXP boot_kernel(SEXP kernel, int interrupt_event) { JuniperKernel* jk = get_kernel(kernel); return jk->start_bg_threads(interrupt_event); }
void Tensor4d::copy(const Tensor4d & lhs) { for (UINT i = 0; i < dims_[0]; ++i) for (UINT j = 0; j < dims_[1]; ++j) get_kernel(i, j).copy(lhs.get_kernel(i, j)); }
void Tensor4d::slrProduct(const float scalar) { /* simple implementation for test */ for (UINT i = 0; i < dims_[0]; ++i) for (UINT j = 0; j < dims_[1]; ++j) get_kernel(i, j).slrProduct(scalar); }
void convolution(int argc, char *argv[]) { if (argc != 3) { printf("Invalid number of arguments.\n"); return; } Image *input_image; Image *output_image; int error; if ((error = TGA_readImage(argv[0], &input_image)) != 0) { printf("Error when opening image: %d\n", error); return; } if ((error = Image_new(input_image->width, input_image->height, input_image->channels, &output_image)) != 0) { printf("Error when creating output image : %d\n", error); Image_delete(input_image); return; } Kernel kernel; if ((error = get_kernel(argv[1], &kernel)) != 0) { printf("Error when opening kernel : %d\n", error); Image_delete(input_image); Image_delete(output_image); return; } int radius_x, radius_y; int x, y, c; radius_x = (kernel.width - 1) / 2; radius_y = (kernel.height - 1) / 2; Benchmark bench; start_benchmark(&bench); for (c = 0; c < input_image->channels; ++c) { uint8_t *out_data = output_image->data[c]; for (y = 0; y < input_image->height; ++y) { for (x = 0; x < input_image->width; ++x) { int kx, ky; float *kernel_data = kernel.data; float sum = 0; for (ky = -radius_y; ky <= radius_y; ++ky) { for (kx = -radius_x; kx <= radius_x; ++kx) { int xx = clip(x + kx, 0, input_image->width - 1); int yy = clip(y + ky, 0, input_image->height - 1); sum += Image_getPixel(input_image, xx, yy, c) * *kernel_data++; } } sum /= kernel.sum; *out_data++ = clip(sum, 0, 255); } } } end_benchmark(&bench); printf("%lu ", bench.elapsed_ticks); printf("%lf\n", bench.elapsed_time); if ((error = TGA_writeImage(argv[2], output_image)) != 0) { printf("Error when writing image: %d\n", error); } Image_delete(input_image); Image_delete(output_image); kernel_delete(kernel); }
// [[Rcpp::export]] void execute_result(SEXP kernel, Rcpp::List data) { get_kernel(kernel)->_request_server->execute_result(from_list_r(data)); }
// [[Rcpp::export]] void jk_device(SEXP kernel, std::string bg, double width, double height, double pointsize, bool standalone, Rcpp::List aliases) { makeDevice(get_kernel(kernel), bg, width, height, pointsize, standalone, aliases); }
void Tensor4d::rand_elt() { for (UINT i = 0; i < dims_[0]; ++i) for (UINT j = 0; j < dims_[1]; ++j) get_kernel(i, j).rand_elt(); }
void *spead_api_setup(struct spead_api_module_shared *s) { struct sapi_o *a; a = malloc(sizeof(struct sapi_o)); if (a == NULL){ #ifdef DEBUG fprintf(stderr, "e: logic could not malloc api obj\n"); #endif return NULL; } if (setup_ocl(KERNELDIR KERNELS_FILE, &(a->ctx), &(a->cq), &(a->p)) != CL_SUCCESS){ #ifdef DEBUG fprintf(stderr, "e: setup_ocl error\n"); #endif spead_api_destroy(s, a); return NULL; } a->chirp = get_kernel("coherent_dedisperse", &(a->p)); if (a->chirp == NULL){ #ifdef DEBUG fprintf(stderr, "e: get_kernel error\n"); #endif spead_api_destroy(s, a); return NULL; } a->power = get_kernel("power", &(a->p)); if (a->power == NULL){ #ifdef DEBUG fprintf(stderr, "e: get_kernel error\n"); #endif spead_api_destroy(s, a); return NULL; } a->phase = get_kernel("phase", &(a->p)); if (a->phase == NULL){ #ifdef DEBUG fprintf(stderr, "e: get_kernel error\n"); #endif spead_api_destroy(s, a); return NULL; } #if 0 a->k = get_kernel("ct", &(a->p)); if (a->k == NULL){ #ifdef DEBUG fprintf(stderr, "e: get_kernel error\n"); #endif spead_api_destroy(a); return NULL; } #endif a->clin = NULL; a->clout = NULL; a->clpow = NULL; a->host = NULL; return a; }
void dfi_process_sinogram(const char* tiff_input, const char* tiff_output, int center_rotation) { cl_event events[11]; if(!tiff_input) { printf("The filename of input is not valid. (pointer tiff_input = %p)", tiff_input); return; } if(!tiff_output) { printf("The filename of output is not valid. (pointer tiff_output = %p)", tiff_output); return; } ///////////////////// /* Input Data Part */ ///////////////////// /* Input a slice properties */ int bits_per_sample; int samples_per_pixel; int theta_size; int slice_size; /* Read the slice */ clFFT_Complex *data_tiff = tiff_read_complex(tiff_input, center_rotation, &bits_per_sample, &samples_per_pixel, &slice_size, &theta_size); //tiff_write_complex("resources/initial-sino.tif", data_tiff, slice_size, theta_size); /* * OpenCL */ printf("Hey!1\n"); cl_int status = CL_SUCCESS; cl_platform_id platform; printf("Hey!1.2\n"); CL_CHECK_ERROR(clGetPlatformIDs(1, &platform, NULL)); printf("Hey!2\n"); cl_device_id devices[10]; // Compute device cl_context context; // Compute context cl_uint n_devices = 0; printf("@Hey!3\n"); #if GPU printf("@Hey!GPU Choosed\n"); CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 3, devices, &n_devices)); #else printf("@Hey!CPU Choosed\n"); CL_CHECK_ERROR(clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 3, devices, &n_devices)); #endif //cl_device_id current_device = devices[0]; #define current_device devices[0] printf("Hey!3.1 n_devices %d\n", n_devices); context = clCreateContext(NULL, 1, devices, NULL, NULL, &status); printf("Hey!3.2\n"); CL_CHECK_ERROR(status); /* * Device */ printf("Hey!3.3\n"); cl_int device_max_cu = 0; CL_CHECK_ERROR(clGetDeviceInfo(current_device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &device_max_cu, NULL)); size_t wg_count = device_max_cu * wavefronts_per_SIMD; size_t global_work_size = wg_count * local_work_size; printf("Hey!3.4\n"); /* * Queues, Kernels */ cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; cl_command_queue command_queue = clCreateCommandQueue(context, current_device, properties, &status); CL_CHECK_ERROR(status); printf("Hey!3.5\n"); cl_kernel kernel_linear_interp = get_kernel("linear_interp", &context, ¤t_device); cl_kernel kernel_zero_ifftshift = get_kernel("zero_ifftshift", &context, ¤t_device); cl_kernel kernel_fftshift = get_kernel("fftshift", &context, ¤t_device); cl_kernel kernel_2dshift = get_kernel("shift2d", &context, ¤t_device); cl_kernel kernel_crop_data = get_kernel("crop_data", &context, ¤t_device); printf("@Hey!3.6\n\n"); //////////////////////// /* OpenCL - DFI Part */ //////////////////////// /* Reconstruction properties */ int oversampling_ratio = 2; int dx = 1; /* zoom times */ //int size_s = slice_size * oversampling_ratio; int min_theta = 0; int max_theta = theta_size - 1; int size_zeropad_s = pow(2, ceil(log2((float)slice_size))); /* get length of FFT operations */ int size_s = size_zeropad_s; float d_omega_s = 2 * M_PI / (size_zeropad_s * dx); //normalized ratio [0; 2PI] /* Start timer */ timeval global_tim; gettimeofday(&global_tim, NULL); double t1_global = global_tim.tv_sec + (global_tim.tv_usec/1000000.0), t2_global = 0.0; ///////////////////////////////////// /* Sinogram shifting + Zeropadding */ ///////////////////////////////////// long data_size = slice_size * theta_size * sizeof(clFFT_Complex); printf("6 "); long zeropad_data_size = theta_size * size_zeropad_s * sizeof(clFFT_Complex); /* Buffers */ cl_mem original_data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, data_size, NULL, &status); CL_CHECK_ERROR(status); CL_CHECK_ERROR(clEnqueueWriteBuffer(command_queue, original_data_buffer, CL_FALSE, 0, data_size, data_tiff, 0, NULL, &events[0])); cl_mem zeropad_ifftshift_data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, zeropad_data_size, NULL, &status); CL_CHECK_ERROR(status); float *zero_out = (float *)g_malloc0(zeropad_data_size); CL_CHECK_ERROR(clEnqueueWriteBuffer(command_queue, zeropad_ifftshift_data_buffer, CL_FALSE, 0, zeropad_data_size, zero_out, 0, NULL, &events[1])); /* Set arguments */ CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 0, sizeof(void *), (void *)&original_data_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 1, sizeof(theta_size), &theta_size)); CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 2, sizeof(slice_size), &slice_size)); CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 3, sizeof(void *), (void *)&zeropad_ifftshift_data_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 4, sizeof(theta_size), &theta_size)); CL_CHECK_ERROR(clSetKernelArg(kernel_zero_ifftshift, 5, sizeof(size_zeropad_s), &size_zeropad_s)); /* Run kernel */ status = clEnqueueNDRangeKernel(command_queue, kernel_zero_ifftshift, 1, // work dimensional 1D, 2D, 3D NULL, // offset &global_work_size, // total number of WI &local_work_size, // number of WI in WG 2, // number events in wait list events, // event wait list &events[2]); // event CL_CHECK_ERROR(status); // Copy result from device to host /* clFFT_Complex *fur_kernel_sino = (clFFT_Complex *)clEnqueueMapBuffer(command_queue, zeropad_ifftshift_data_buffer, CL_TRUE, CL_MAP_READ, 0, zeropad_data_size, 0, NULL, NULL, NULL ); clFinish(command_queue); tiff_write_complex("resources/zeropad-sino.tif", fur_kernel_sino, size_zeropad_s, theta_size); */ //////////////////////////////////////////////////////////////////////// /* Applying 1-D FFT to the each strip of the sinogram and shifting it */ //////////////////////////////////////////////////////////////////////// cl_mem zeropadded_1dfft_data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, zeropad_data_size, NULL, &status); CL_CHECK_ERROR(status); /* Setup clAmdFft */ clFFT_Dim3 sino_fft; sino_fft.x = size_zeropad_s; sino_fft.y = 1; sino_fft.z = 1; /* Create FFT plan */ clFFT_Plan plan_1dfft_sinogram = clFFT_CreatePlan(context, sino_fft, clFFT_1D, clFFT_InterleavedComplexFormat, &status); CL_CHECK_ERROR(status); /* Execute FFT */ status = clFFT_ExecuteInterleaved(command_queue, plan_1dfft_sinogram, theta_size, clFFT_Forward, zeropad_ifftshift_data_buffer, zeropadded_1dfft_data_buffer, 0, NULL, NULL); CL_CHECK_ERROR(status); // Free FFT plan //clFFT_DestroyPlan(plan_1dfft_sinogram); // Copy result from device to host /* clFFT_Complex *fourier_kernel_sinogram = (clFFT_Complex *)malloc(zeropad_data_size); clEnqueueReadBuffer(command_queue, zeropadded_1dfft_data_buffer, CL_TRUE, 0, zeropad_data_size, fourier_kernel_sinogram, 0, NULL, NULL); clFinish(command_queue); tiff_write_complex("resources/1dfft-sino.tif", fourier_kernel_sinogram, size_zeropad_s, theta_size); */ /////////////////// /* Make fftshift */ /////////////////// /* Buffers */ cl_mem zeropad_fftshift_data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, zeropad_data_size, NULL, &status); CL_CHECK_ERROR(status); /* Set arguments */ CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 0, sizeof(void *), (void *)&zeropadded_1dfft_data_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 1, sizeof(theta_size), &theta_size)); CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 2, sizeof(size_zeropad_s), &size_zeropad_s)); CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 3, sizeof(void *), (void *)&zeropad_fftshift_data_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 4, sizeof(theta_size), &theta_size)); CL_CHECK_ERROR(clSetKernelArg(kernel_fftshift, 5, sizeof(size_zeropad_s), &size_zeropad_s)); /* Run kernel */ status = clEnqueueNDRangeKernel(command_queue, kernel_fftshift, 1, // work dimensional 1D, 2D, 3D NULL, // offset &global_work_size, // total number of WI &local_work_size, // number of WI in WG 0, // number events in wait list NULL, // event wait list &events[3]); // event CL_CHECK_ERROR(status); /* Copy result from device to host */ /* clFFT_Complex *fur_kernel_fftshift_sino = (clFFT_Complex *)clEnqueueMapBuffer(command_queue, zeropad_fftshift_data_buffer, CL_TRUE, CL_MAP_READ,0,zeropad_data_size, 0, NULL, NULL, NULL ); clFinish(command_queue); tiff_write_complex("resources/fftshift-sino.tif", fur_kernel_fftshift_sino, size_zeropad_s, theta_size); */ //////////////////////// /* Data Interpolation */ //////////////////////// /* Performing Interpolation */ cl_long data_length = size_s * size_s; cl_int in_rows = theta_size; cl_int in_cols = size_zeropad_s; cl_float norm_ratio = d_omega_s/dx; cl_float in_rows_first_val = min_theta; cl_float in_rows_last_val = max_theta; cl_float in_cols_first_val = (-in_cols/2)*norm_ratio; cl_float in_cols_last_val = (in_cols/2-1)*norm_ratio; cl_int interp_rows = size_s; cl_int interp_cols = interp_rows; cl_int iparams[5]; iparams[0] = in_rows; iparams[1] = in_cols; iparams[2] = dx; iparams[3] = interp_rows; iparams[4] = interp_cols; cl_float fparams[5]; fparams[0] = in_rows_first_val; fparams[1] = in_rows_last_val; fparams[2] = in_cols_first_val; fparams[3] = in_cols_last_val; fparams[4] = norm_ratio; /* Buffers */ cl_mem i_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * 5, NULL, &status); CL_CHECK_ERROR(status); CL_CHECK_ERROR(clEnqueueWriteBuffer(command_queue, i_buffer, CL_FALSE, 0, sizeof(cl_int) * 5, iparams, 0, NULL, &events[4])); cl_mem f_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * 5, NULL, &status); CL_CHECK_ERROR(status); CL_CHECK_ERROR(clEnqueueWriteBuffer(command_queue, f_buffer, CL_FALSE, 0, sizeof(cl_float) * 5, fparams, 0, NULL, &events[5])); cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, data_length * sizeof(clFFT_Complex), NULL, &status); CL_CHECK_ERROR(status); /* Set arguments */ CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 0, sizeof(void *), (void *)&i_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 1, sizeof(void *), (void *)&f_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 2, sizeof(void *), (void *)&zeropad_fftshift_data_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 3, sizeof(void *), (void *)&output_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_linear_interp, 4, sizeof(data_length), &data_length)); /* Run kernel */ status = clEnqueueNDRangeKernel(command_queue, kernel_linear_interp, 1, // work dimensional 1D, 2D, 3D NULL, // offset &global_work_size, // total number of WI &local_work_size, // nomber of WI in WG 3, // num events in wait list events + 3, // event wait list &events[6]); // event CL_CHECK_ERROR(status); //clFinish(command_queue); // Copy result from device to host /* clFFT_Complex *interpolated_spectrum = (clFFT_Complex *)clEnqueueMapBuffer(command_queue, output_buffer, CL_TRUE, CL_MAP_READ, 0, data_length * sizeof(clFFT_Complex), 0, NULL, NULL, NULL ); clFinish(command_queue); tiff_write_complex("resources/interpolated-sino.tif", interpolated_spectrum, size_s, size_s); */ /////////////////////////////////////////////////// /* Applying 2-D FFT to the interpolated spectrum */ /////////////////////////////////////////////////// /* Setup 2D IFFT */ clFFT_Dim3 sino_2dfft; sino_2dfft.x = size_s; sino_2dfft.y = size_s; sino_2dfft.z = 1; /* Create 2D IFFT plan */ clFFT_Plan plan_2difft = clFFT_CreatePlan(context, sino_2dfft, clFFT_2D, clFFT_InterleavedComplexFormat, &status); CL_CHECK_ERROR(status); /* Execute 2D IFFT */ cl_mem reconstructed_image_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, data_length * sizeof(clFFT_Complex), NULL, &status); CL_CHECK_ERROR(status); status = clFFT_ExecuteInterleaved(command_queue, plan_2difft, 1, clFFT_Inverse, output_buffer, reconstructed_image_buffer, 0, NULL, NULL); CL_CHECK_ERROR(status); // Copy result from device to host /* clFFT_Complex *ifft2d_interpolated_spectrum = (clFFT_Complex *)malloc(data_length * sizeof(clFFT_Complex)); clEnqueueReadBuffer(command_queue, reconstructed_image_buffer, CL_TRUE, 0, data_length * sizeof(clFFT_Complex), ifft2d_interpolated_spectrum, 0, NULL, NULL); tiff_write_complex("resources/ifft2d_interpolated_spectrum.tif", ifft2d_interpolated_spectrum, size_s, size_s); clFinish(command_queue); */ ///////////////////////////////////////////////// /* Applying 2-D fftshidt to the restored image */ ///////////////////////////////////////////////// /* Buffers */ cl_mem two_dim_fftshifted_data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, data_length * sizeof(clFFT_Complex), NULL, &status); CL_CHECK_ERROR(status); /* Set arguments */ cl_int inverse_flag = 0; CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 0, sizeof(void *), (void *)&reconstructed_image_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 1, sizeof(void *), (void *)&two_dim_fftshifted_data_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 2, sizeof(interp_rows), &interp_rows)); CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 3, sizeof(interp_cols), &interp_cols)); CL_CHECK_ERROR(clSetKernelArg(kernel_2dshift, 4, sizeof(inverse_flag), &inverse_flag)); /* Run kernel */ status = clEnqueueNDRangeKernel(command_queue, kernel_2dshift, 1, // work dimensional 1D, 2D, 3D NULL, // offset &global_work_size, // total number of WI &local_work_size, // number of WI in WG 1, // number events in wait list &events[6], // event wait list &events[7]); // event CL_CHECK_ERROR(status); /* Copy result from device to host */ /* clFFT_Complex *two_dim_fftshifted_data = (clFFT_Complex *)clEnqueueMapBuffer(command_queue, two_dim_fftshifted_data_buffer, CL_TRUE, CL_MAP_READ, 0, data_length * sizeof(clFFT_Complex), 0, NULL, NULL, NULL ); clFinish(command_queue); */ //////////////// /* Crop data */ /////////////// float lt_offset = 0, rb_offset = 0; int dif_sides = interp_cols - slice_size; if (dif_sides%2) { lt_offset = floor(dif_sides / 2.0); rb_offset = ceil(dif_sides / 2.0); } else { lt_offset = rb_offset = dif_sides / 2.0; } /* Buffers */ long cropped_data_length = slice_size * slice_size * sizeof(clFFT_Complex); cl_mem cropped_restored_image_data_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, cropped_data_length, NULL, &status); CL_CHECK_ERROR(status); /* Set arguments */ CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 0, sizeof(void *), (void *)&two_dim_fftshifted_data_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 1, sizeof(void *), (void *)&cropped_restored_image_data_buffer)); CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 2, sizeof(slice_size), &slice_size)); CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 3, sizeof(interp_cols), &interp_cols)); CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 4, sizeof(lt_offset), <_offset)); CL_CHECK_ERROR(clSetKernelArg(kernel_crop_data, 5, sizeof(rb_offset), &rb_offset)); /* Run kernel */ status = clEnqueueNDRangeKernel(command_queue, kernel_crop_data, 1, // work dimensional 1D, 2D, 3D NULL, // offset &global_work_size, // total number of WI &local_work_size, // number of WI in WG 1, // number events in wait list &events[7], // event wait list &events[8]); // event CL_CHECK_ERROR(status); CL_CHECK_ERROR(clFinish(command_queue)); clFFT_DestroyPlan(plan_2difft); clFFT_DestroyPlan(plan_1dfft_sinogram); //timing float ms = 0.0, total_ms = 0.0, global_ms = 0.0, deg = 1.0e-6f; /* Stop timer */ gettimeofday(&global_tim, NULL); t2_global = global_tim.tv_sec+(global_tim.tv_usec/1000000.0); printf("\n(Total time - timeofday) %f seconds elapsed\n", (t2_global-t1_global)*1000.0); cl_ulong start, end; CL_CHECK_ERROR(clGetEventProfilingInfo(events[0], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[0], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Sinogram shifting + Zeropadding write_op1):%f", ms); CL_CHECK_ERROR(clGetEventProfilingInfo(events[1], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[1], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Sinogram shifting + Zeropadding write_op2):%f", ms); CL_CHECK_ERROR(clGetEventProfilingInfo(events[2], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[2], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Sinogram shifting + Zeropadding):%f", ms); printf("\nTOTAL(Sinogram shifting + Zeropadding):%f\n", total_ms); global_ms += total_ms; total_ms = 0.0; CL_CHECK_ERROR(clGetEventProfilingInfo(events[2], CL_PROFILING_COMMAND_END,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[3], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Applying 1-D FFT):%f\n", total_ms); global_ms += total_ms; total_ms = 0.0; CL_CHECK_ERROR(clGetEventProfilingInfo(events[3], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[3], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Shift 1-D FFT data):%f\n", total_ms); global_ms += total_ms; total_ms = 0.0; CL_CHECK_ERROR(clGetEventProfilingInfo(events[4], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[4], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Data Interpolation write_op1):%f", ms); CL_CHECK_ERROR(clGetEventProfilingInfo(events[5], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[5], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Data Interpolation write_op2):%f", ms); CL_CHECK_ERROR(clGetEventProfilingInfo(events[6], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[6], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Data Interpolation):%f", ms); printf("\nTOTAL(Data Interpolation):%f\n", total_ms); global_ms += total_ms; total_ms = 0.0; CL_CHECK_ERROR(clGetEventProfilingInfo(events[6], CL_PROFILING_COMMAND_END,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[7], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Applying 2-D IFFT):%f\n", total_ms); global_ms += total_ms; total_ms = 0.0; CL_CHECK_ERROR(clGetEventProfilingInfo(events[7], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[7], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Applying 2-D Shift):%f\n", total_ms); global_ms += total_ms; total_ms = 0.0; CL_CHECK_ERROR(clGetEventProfilingInfo(events[8], CL_PROFILING_COMMAND_START,sizeof(cl_ulong), &start, NULL)); CL_CHECK_ERROR(clGetEventProfilingInfo(events[8], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL)); ms = (end - start) * deg; total_ms += ms; printf("\n(Cropping data):%f\n", total_ms); global_ms += total_ms; total_ms = 0.0; printf("\nTOTAL TIME:%f\n", global_ms); // Copy result from device to host clFFT_Complex *cropped_restored_image = (clFFT_Complex *)clEnqueueMapBuffer(command_queue, cropped_restored_image_data_buffer, CL_TRUE, CL_MAP_READ, 0, cropped_data_length, 0, NULL, NULL, NULL ); /* Write the restored slice */ tiff_write_complex(tiff_output, cropped_restored_image, slice_size, slice_size); }