static int compute_mask_matrix(cl_mem cl_mask_matrix, int step_x, int step_y)
{
    int i, j, ret = 0;
    uint32_t *mask_matrix, *mask_x, *mask_y;
    size_t size_matrix = sizeof(uint32_t) * (2 * step_x + 1) * (2 * step_y + 1);
    mask_x = av_mallocz(sizeof(uint32_t) * (2 * step_x + 1));
    if (!mask_x) {
        ret = AVERROR(ENOMEM);
        goto end;
    }
    mask_y = av_mallocz(sizeof(uint32_t) * (2 * step_y + 1));
    if (!mask_y) {
        ret = AVERROR(ENOMEM);
        goto end;
    }
    mask_matrix = av_mallocz(size_matrix);
    if (!mask_matrix) {
        ret = AVERROR(ENOMEM);
        goto end;
    }
    ret = compute_mask(step_x, mask_x);
    if (ret < 0)
        goto end;
    ret = compute_mask(step_y, mask_y);
    if (ret < 0)
        goto end;
    for (j = 0; j < 2 * step_y + 1; j++) {
        for (i = 0; i < 2 * step_x + 1; i++) {
            mask_matrix[i + j * (2 * step_x + 1)] = mask_y[j] * mask_x[i];
        }
    }
    ret = av_opencl_buffer_write(cl_mask_matrix, (uint8_t *)mask_matrix, size_matrix);
end:
    av_freep(&mask_x);
    av_freep(&mask_y);
    av_freep(&mask_matrix);
    return ret;
}
Example #2
0
hash_t *hash_init(hash_t *hash, hashcount_t maxcount,
                  hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table,
                  hashcount_t nchains)
{
    if (hash_val_t_bit == 0)    /* 1 */
        compute_bits();

    assert (is_power_of_two(nchains));

    hash->table = table;    /* 2 */
    hash->nchains = nchains;
    hash->nodecount = 0;
    hash->maxcount = maxcount;
    hash->compare = compfun ? compfun : hash_comp_default;
    hash->function = hashfun ? hashfun : hash_fun_default;
    hash->dynamic = 0;      /* 3 */
    hash->mask = compute_mask(nchains); /* 4 */
    clear_table(hash);      /* 5 */

    assert (hash_verify(hash));

    return hash;
}
Example #3
0
File: nlinv.c Project: nckz/bart
int main_nlinv(int argc, char* argv[])
{
    int iter = 8;
    float l1 = -1.;
    bool waterfat = false;
    bool rvc = false;
    bool normalize = true;
    float restrict_fov = -1.;
    float csh[3] = { 0., 0., 0. };
    bool usegpu = false;
    const char* psf = NULL;

    const struct opt_s opts[] = {

        { 'l', true, opt_float, &l1, NULL },
        { 'i', true, opt_int, &iter, NULL },
        { 'c', false, opt_set, &rvc, NULL },
        { 'N', false, opt_clear, &normalize, NULL },
        { 'f', true, opt_float, &restrict_fov, NULL },
        { 'p', true, opt_string, &psf, NULL },
        { 'g', false, opt_set, &usegpu, NULL },
    };

    cmdline(&argc, argv, 2, 3, usage_str, help_str, ARRAY_SIZE(opts), opts);

    num_init();

    assert(iter > 0);


    long ksp_dims[DIMS];
    complex float* kspace_data = load_cfl(argv[1], DIMS, ksp_dims);

    long dims[DIMS];
    md_copy_dims(DIMS, dims, ksp_dims);

    if (waterfat)
        dims[CSHIFT_DIM] = 2;

    long img_dims[DIMS];
    md_select_dims(DIMS, FFT_FLAGS|CSHIFT_FLAG, img_dims, dims);

    long img_strs[DIMS];
    md_calc_strides(DIMS, img_strs, img_dims, CFL_SIZE);


    complex float* image = create_cfl(argv[2], DIMS, img_dims);

    long msk_dims[DIMS];
    md_select_dims(DIMS, FFT_FLAGS, msk_dims, dims);

    long msk_strs[DIMS];
    md_calc_strides(DIMS, msk_strs, msk_dims, CFL_SIZE);

    complex float* mask;
    complex float* norm = md_alloc(DIMS, msk_dims, CFL_SIZE);
    complex float* sens;

    if (4 == argc) {

        sens = create_cfl(argv[3], DIMS, ksp_dims);

    } else {

        sens = md_alloc(DIMS, ksp_dims, CFL_SIZE);
    }


    complex float* pattern = NULL;
    long pat_dims[DIMS];

    if (NULL != psf) {

        pattern = load_cfl(psf, DIMS, pat_dims);

        // FIXME: check compatibility
    } else {

        pattern = md_alloc(DIMS, img_dims, CFL_SIZE);
        estimate_pattern(DIMS, ksp_dims, COIL_DIM, pattern, kspace_data);
    }


    if (waterfat) {

        size_t size = md_calc_size(DIMS, msk_dims);
        md_copy(DIMS, msk_dims, pattern + size, pattern, CFL_SIZE);

        long shift_dims[DIMS];
        md_select_dims(DIMS, FFT_FLAGS, shift_dims, msk_dims);

        long shift_strs[DIMS];
        md_calc_strides(DIMS, shift_strs, shift_dims, CFL_SIZE);

        complex float* shift = md_alloc(DIMS, shift_dims, CFL_SIZE);

        unsigned int X = shift_dims[READ_DIM];
        unsigned int Y = shift_dims[PHS1_DIM];
        unsigned int Z = shift_dims[PHS2_DIM];

        for (unsigned int x = 0; x < X; x++)
            for (unsigned int y = 0; y < Y; y++)
                for (unsigned int z = 0; z < Z; z++)
                    shift[(z * Z + y) * Y + x] = cexp(2.i * M_PI * ((csh[0] * x) / X + (csh[1] * y) / Y + (csh[2] * z) / Z));

        md_zmul2(DIMS, msk_dims, msk_strs, pattern + size, msk_strs, pattern + size, shift_strs, shift);
        md_free(shift);
    }

#if 0
    float scaling = 1. / estimate_scaling(ksp_dims, NULL, kspace_data);
#else
    float scaling = 100. / md_znorm(DIMS, ksp_dims, kspace_data);
#endif
    debug_printf(DP_INFO, "Scaling: %f\n", scaling);
    md_zsmul(DIMS, ksp_dims, kspace_data, kspace_data, scaling);

    if (-1. == restrict_fov) {

        mask = md_alloc(DIMS, msk_dims, CFL_SIZE);
        md_zfill(DIMS, msk_dims, mask, 1.);

    } else {

        float restrict_dims[DIMS] = { [0 ... DIMS - 1] = 1. };
        restrict_dims[0] = restrict_fov;
        restrict_dims[1] = restrict_fov;
        restrict_dims[2] = restrict_fov;
        mask = compute_mask(DIMS, msk_dims, restrict_dims);
    }

#ifdef  USE_CUDA
    if (usegpu) {

        complex float* kspace_gpu = md_alloc_gpu(DIMS, ksp_dims, CFL_SIZE);
        md_copy(DIMS, ksp_dims, kspace_gpu, kspace_data, CFL_SIZE);
        noir_recon(dims, iter, l1, image, NULL, pattern, mask, kspace_gpu, rvc, usegpu);
        md_free(kspace_gpu);

        md_zfill(DIMS, ksp_dims, sens, 1.);

    } else
#endif
        noir_recon(dims, iter, l1, image, sens, pattern, mask, kspace_data, rvc, usegpu);

    if (normalize) {

        md_zrss(DIMS, ksp_dims, COIL_FLAG, norm, sens);
        md_zmul2(DIMS, img_dims, img_strs, image, img_strs, image, msk_strs, norm);
    }

    if (4 == argc) {

        long strs[DIMS];

        md_calc_strides(DIMS, strs, ksp_dims, CFL_SIZE);

        if (norm)
            md_zdiv2(DIMS, ksp_dims, strs, sens, strs, sens, img_strs, norm);

        fftmod(DIMS, ksp_dims, FFT_FLAGS, sens, sens);

        unmap_cfl(DIMS, ksp_dims, sens);

    } else {

        md_free(sens);
    }

    md_free(norm);
    md_free(mask);

    if (NULL != psf)
        unmap_cfl(DIMS, pat_dims, pattern);
    else
        md_free(pattern);


    unmap_cfl(DIMS, img_dims, image);
    unmap_cfl(DIMS, ksp_dims, kspace_data);
    exit(0);
}
Example #4
0
	// Caller is responsible to provide enough storage space in res
	// Select bit range [start, end]
	// NOTE: Bit indices are, as usual, zero-based
	void get_bit_range(I start, I end, C *res) {
#ifdef EXPENSIVE_SANITY_CHECKS
		if (end > n || start > n || start >= end) {
			// TODO: Throw an exception instead
			std::cerr << "Internal error: Bit index bogous" << std::endl;
			std::cerr << "(start=" << start << ", end=" << end
				  << ", n=" << n << ")" << std::endl;
			std::exit(-1);
		}
#endif

		memset(res, 0, ceil(((double)(end - start + 1))/8));
		// Compute chain elements and bit within this element for the
		// start and end positions
		I start_chunk = start/bits_per_type;
		I end_chunk = end/bits_per_type;

		unsigned short start_bit = start % bits_per_type;
		unsigned short end_bit;

		I dest_chunk = 0;
		C chunk;
		C mask;

		if (start_chunk == end_chunk)
			end_bit = end % bits_per_type;
		else
			end_bit = bits_per_type - 1;

		mask = compute_mask(start_bit, end_bit);
		res[dest_chunk] = (data[start_chunk] & mask) >> start_bit;

		if (start_chunk == end_chunk)
			return;

		I dest_start_bit = end_bit - start_bit + 1;
		if (dest_start_bit == bits_per_type) {
			dest_chunk++;
			dest_start_bit = 0;
		}

		I dest_bits;

		for (I curr_chunk = start_chunk+1; curr_chunk < end_chunk; curr_chunk++) {
			// For the inner chunks, we can always select the full chunk from
			// the input data, but need to split it across the output
			// data field
			chunk = data[curr_chunk];

			// How many bits remain in the destination chunk
			dest_bits = bits_per_type - dest_start_bit;

			// Fill up the current destination chunk
			mask = compute_mask(0, dest_bits - 1);
			res[dest_chunk] |= ((chunk & mask) << dest_start_bit);
			dest_chunk++;

			// ... and fill the next destination chunk as far as
			// possible unless the previous chunk was completely
			// drained and there is nothing left for the new chunk
			if (dest_bits != bits_per_type) {
				mask = compute_mask(dest_bits, bits_per_type - 1);
				res[dest_chunk] = (chunk & mask) >> dest_bits;
			}

			// Compute new starting position in the destination chunk
			dest_start_bit = bits_per_type - dest_bits;
		}
Example #5
0
			// ... and fill the next destination chunk as far as
			// possible unless the previous chunk was completely
			// drained and there is nothing left for the new chunk
			if (dest_bits != bits_per_type) {
				mask = compute_mask(dest_bits, bits_per_type - 1);
				res[dest_chunk] = (chunk & mask) >> dest_bits;
			}

			// Compute new starting position in the destination chunk
			dest_start_bit = bits_per_type - dest_bits;
		}

		end_bit = end % bits_per_type;
		dest_bits =  bits_per_type - dest_start_bit;
		
		mask = compute_mask(0, end_bit);
		chunk = data[end_chunk] & mask;

#ifdef EXPENSIVE_SANITY_CHECKS
		if (debug_level >= EXCESSIVE_INFO) {
			std::cerr << "end_bit: " << end_bit << ", dest_bits: " << dest_bits
				  << ", mask: " << std::bitset<32>(mask) << ", end_chunk: "
				  << end_chunk << std::endl;
			std::cerr << "data[end_chunk]: " << std::bitset<32>(data[end_chunk])
				  << std::endl;
			std::cerr << "masked data:     " << std::bitset<32>(chunk)
				  << std::endl;
			std::cerr << "dest_start_bit: " << dest_start_bit << std::endl;
		}
#endif
		// Any excess bits that do not fit into the current result chunk