static int compute_mask_matrix(cl_mem cl_mask_matrix, int step_x, int step_y) { int i, j, ret = 0; uint32_t *mask_matrix, *mask_x, *mask_y; size_t size_matrix = sizeof(uint32_t) * (2 * step_x + 1) * (2 * step_y + 1); mask_x = av_mallocz(sizeof(uint32_t) * (2 * step_x + 1)); if (!mask_x) { ret = AVERROR(ENOMEM); goto end; } mask_y = av_mallocz(sizeof(uint32_t) * (2 * step_y + 1)); if (!mask_y) { ret = AVERROR(ENOMEM); goto end; } mask_matrix = av_mallocz(size_matrix); if (!mask_matrix) { ret = AVERROR(ENOMEM); goto end; } ret = compute_mask(step_x, mask_x); if (ret < 0) goto end; ret = compute_mask(step_y, mask_y); if (ret < 0) goto end; for (j = 0; j < 2 * step_y + 1; j++) { for (i = 0; i < 2 * step_x + 1; i++) { mask_matrix[i + j * (2 * step_x + 1)] = mask_y[j] * mask_x[i]; } } ret = av_opencl_buffer_write(cl_mask_matrix, (uint8_t *)mask_matrix, size_matrix); end: av_freep(&mask_x); av_freep(&mask_y); av_freep(&mask_matrix); return ret; }
hash_t *hash_init(hash_t *hash, hashcount_t maxcount, hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table, hashcount_t nchains) { if (hash_val_t_bit == 0) /* 1 */ compute_bits(); assert (is_power_of_two(nchains)); hash->table = table; /* 2 */ hash->nchains = nchains; hash->nodecount = 0; hash->maxcount = maxcount; hash->compare = compfun ? compfun : hash_comp_default; hash->function = hashfun ? hashfun : hash_fun_default; hash->dynamic = 0; /* 3 */ hash->mask = compute_mask(nchains); /* 4 */ clear_table(hash); /* 5 */ assert (hash_verify(hash)); return hash; }
int main_nlinv(int argc, char* argv[]) { int iter = 8; float l1 = -1.; bool waterfat = false; bool rvc = false; bool normalize = true; float restrict_fov = -1.; float csh[3] = { 0., 0., 0. }; bool usegpu = false; const char* psf = NULL; const struct opt_s opts[] = { { 'l', true, opt_float, &l1, NULL }, { 'i', true, opt_int, &iter, NULL }, { 'c', false, opt_set, &rvc, NULL }, { 'N', false, opt_clear, &normalize, NULL }, { 'f', true, opt_float, &restrict_fov, NULL }, { 'p', true, opt_string, &psf, NULL }, { 'g', false, opt_set, &usegpu, NULL }, }; cmdline(&argc, argv, 2, 3, usage_str, help_str, ARRAY_SIZE(opts), opts); num_init(); assert(iter > 0); long ksp_dims[DIMS]; complex float* kspace_data = load_cfl(argv[1], DIMS, ksp_dims); long dims[DIMS]; md_copy_dims(DIMS, dims, ksp_dims); if (waterfat) dims[CSHIFT_DIM] = 2; long img_dims[DIMS]; md_select_dims(DIMS, FFT_FLAGS|CSHIFT_FLAG, img_dims, dims); long img_strs[DIMS]; md_calc_strides(DIMS, img_strs, img_dims, CFL_SIZE); complex float* image = create_cfl(argv[2], DIMS, img_dims); long msk_dims[DIMS]; md_select_dims(DIMS, FFT_FLAGS, msk_dims, dims); long msk_strs[DIMS]; md_calc_strides(DIMS, msk_strs, msk_dims, CFL_SIZE); complex float* mask; complex float* norm = md_alloc(DIMS, msk_dims, CFL_SIZE); complex float* sens; if (4 == argc) { sens = create_cfl(argv[3], DIMS, ksp_dims); } else { sens = md_alloc(DIMS, ksp_dims, CFL_SIZE); } complex float* pattern = NULL; long pat_dims[DIMS]; if (NULL != psf) { pattern = load_cfl(psf, DIMS, pat_dims); // FIXME: check compatibility } else { pattern = md_alloc(DIMS, img_dims, CFL_SIZE); estimate_pattern(DIMS, ksp_dims, COIL_DIM, pattern, kspace_data); } if (waterfat) { size_t size = md_calc_size(DIMS, msk_dims); md_copy(DIMS, msk_dims, pattern + size, pattern, CFL_SIZE); long shift_dims[DIMS]; md_select_dims(DIMS, FFT_FLAGS, shift_dims, msk_dims); long shift_strs[DIMS]; md_calc_strides(DIMS, shift_strs, shift_dims, CFL_SIZE); complex float* shift = md_alloc(DIMS, shift_dims, CFL_SIZE); unsigned int X = shift_dims[READ_DIM]; unsigned int Y = shift_dims[PHS1_DIM]; unsigned int Z = shift_dims[PHS2_DIM]; for (unsigned int x = 0; x < X; x++) for (unsigned int y = 0; y < Y; y++) for (unsigned int z = 0; z < Z; z++) shift[(z * Z + y) * Y + x] = cexp(2.i * M_PI * ((csh[0] * x) / X + (csh[1] * y) / Y + (csh[2] * z) / Z)); md_zmul2(DIMS, msk_dims, msk_strs, pattern + size, msk_strs, pattern + size, shift_strs, shift); md_free(shift); } #if 0 float scaling = 1. / estimate_scaling(ksp_dims, NULL, kspace_data); #else float scaling = 100. / md_znorm(DIMS, ksp_dims, kspace_data); #endif debug_printf(DP_INFO, "Scaling: %f\n", scaling); md_zsmul(DIMS, ksp_dims, kspace_data, kspace_data, scaling); if (-1. == restrict_fov) { mask = md_alloc(DIMS, msk_dims, CFL_SIZE); md_zfill(DIMS, msk_dims, mask, 1.); } else { float restrict_dims[DIMS] = { [0 ... DIMS - 1] = 1. }; restrict_dims[0] = restrict_fov; restrict_dims[1] = restrict_fov; restrict_dims[2] = restrict_fov; mask = compute_mask(DIMS, msk_dims, restrict_dims); } #ifdef USE_CUDA if (usegpu) { complex float* kspace_gpu = md_alloc_gpu(DIMS, ksp_dims, CFL_SIZE); md_copy(DIMS, ksp_dims, kspace_gpu, kspace_data, CFL_SIZE); noir_recon(dims, iter, l1, image, NULL, pattern, mask, kspace_gpu, rvc, usegpu); md_free(kspace_gpu); md_zfill(DIMS, ksp_dims, sens, 1.); } else #endif noir_recon(dims, iter, l1, image, sens, pattern, mask, kspace_data, rvc, usegpu); if (normalize) { md_zrss(DIMS, ksp_dims, COIL_FLAG, norm, sens); md_zmul2(DIMS, img_dims, img_strs, image, img_strs, image, msk_strs, norm); } if (4 == argc) { long strs[DIMS]; md_calc_strides(DIMS, strs, ksp_dims, CFL_SIZE); if (norm) md_zdiv2(DIMS, ksp_dims, strs, sens, strs, sens, img_strs, norm); fftmod(DIMS, ksp_dims, FFT_FLAGS, sens, sens); unmap_cfl(DIMS, ksp_dims, sens); } else { md_free(sens); } md_free(norm); md_free(mask); if (NULL != psf) unmap_cfl(DIMS, pat_dims, pattern); else md_free(pattern); unmap_cfl(DIMS, img_dims, image); unmap_cfl(DIMS, ksp_dims, kspace_data); exit(0); }
// Caller is responsible to provide enough storage space in res // Select bit range [start, end] // NOTE: Bit indices are, as usual, zero-based void get_bit_range(I start, I end, C *res) { #ifdef EXPENSIVE_SANITY_CHECKS if (end > n || start > n || start >= end) { // TODO: Throw an exception instead std::cerr << "Internal error: Bit index bogous" << std::endl; std::cerr << "(start=" << start << ", end=" << end << ", n=" << n << ")" << std::endl; std::exit(-1); } #endif memset(res, 0, ceil(((double)(end - start + 1))/8)); // Compute chain elements and bit within this element for the // start and end positions I start_chunk = start/bits_per_type; I end_chunk = end/bits_per_type; unsigned short start_bit = start % bits_per_type; unsigned short end_bit; I dest_chunk = 0; C chunk; C mask; if (start_chunk == end_chunk) end_bit = end % bits_per_type; else end_bit = bits_per_type - 1; mask = compute_mask(start_bit, end_bit); res[dest_chunk] = (data[start_chunk] & mask) >> start_bit; if (start_chunk == end_chunk) return; I dest_start_bit = end_bit - start_bit + 1; if (dest_start_bit == bits_per_type) { dest_chunk++; dest_start_bit = 0; } I dest_bits; for (I curr_chunk = start_chunk+1; curr_chunk < end_chunk; curr_chunk++) { // For the inner chunks, we can always select the full chunk from // the input data, but need to split it across the output // data field chunk = data[curr_chunk]; // How many bits remain in the destination chunk dest_bits = bits_per_type - dest_start_bit; // Fill up the current destination chunk mask = compute_mask(0, dest_bits - 1); res[dest_chunk] |= ((chunk & mask) << dest_start_bit); dest_chunk++; // ... and fill the next destination chunk as far as // possible unless the previous chunk was completely // drained and there is nothing left for the new chunk if (dest_bits != bits_per_type) { mask = compute_mask(dest_bits, bits_per_type - 1); res[dest_chunk] = (chunk & mask) >> dest_bits; } // Compute new starting position in the destination chunk dest_start_bit = bits_per_type - dest_bits; }
// ... and fill the next destination chunk as far as // possible unless the previous chunk was completely // drained and there is nothing left for the new chunk if (dest_bits != bits_per_type) { mask = compute_mask(dest_bits, bits_per_type - 1); res[dest_chunk] = (chunk & mask) >> dest_bits; } // Compute new starting position in the destination chunk dest_start_bit = bits_per_type - dest_bits; } end_bit = end % bits_per_type; dest_bits = bits_per_type - dest_start_bit; mask = compute_mask(0, end_bit); chunk = data[end_chunk] & mask; #ifdef EXPENSIVE_SANITY_CHECKS if (debug_level >= EXCESSIVE_INFO) { std::cerr << "end_bit: " << end_bit << ", dest_bits: " << dest_bits << ", mask: " << std::bitset<32>(mask) << ", end_chunk: " << end_chunk << std::endl; std::cerr << "data[end_chunk]: " << std::bitset<32>(data[end_chunk]) << std::endl; std::cerr << "masked data: " << std::bitset<32>(chunk) << std::endl; std::cerr << "dest_start_bit: " << dest_start_bit << std::endl; } #endif // Any excess bits that do not fit into the current result chunk