void fwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* low, complex float* hgh, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen]) { debug_printf(DP_DEBUG4, "fwt1: %d/%d\n", d, N); debug_print_dims(DP_DEBUG4, N, dims); assert(dims[d] >= 2); long odims[N]; md_copy_dims(N, odims, dims); odims[d] = bandsize(dims[d], flen); debug_print_dims(DP_DEBUG4, N, odims); long o = d + 1; long u = N - o; // 0 1 2 3 4 5 6|7 // --d-- * --u--|N // ---o--- assert(d == md_calc_blockdim(d, dims + 0, istr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, dims + o, istr + o, CFL_SIZE * md_calc_size(o, dims))); assert(d == md_calc_blockdim(d, odims + 0, ostr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, odims + o, ostr + o, CFL_SIZE * md_calc_size(o, odims))); // merge dims long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) }; long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, dims) }; long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, odims) }; #ifdef USE_CUDA if (cuda_ondevice(in)) { assert(cuda_ondevice(low)); assert(cuda_ondevice(hgh)); float* flow = md_gpu_move(1, MD_DIMS(flen), filter[0][0], FL_SIZE); float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[0][1], FL_SIZE); wl3_cuda_down3(wdims, wostr, low, wistr, in, flen, flow); wl3_cuda_down3(wdims, wostr, hgh, wistr, in, flen, fhgh); md_free(flow); md_free(fhgh); return; } #endif // no clear needed wavelet_down3(wdims, wostr, low, wistr, in, flen, filter[0][0]); wavelet_down3(wdims, wostr, hgh, wistr, in, flen, filter[0][1]); }
void iwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* out, const long istr[N], const complex float* low, const complex float* hgh, const long flen, const float filter[2][2][flen]) { debug_printf(DP_DEBUG4, "ifwt1: %d/%d\n", d, N); debug_print_dims(DP_DEBUG4, N, dims); assert(dims[d] >= 2); long idims[N]; md_copy_dims(N, idims, dims); idims[d] = bandsize(dims[d], flen); debug_print_dims(DP_DEBUG4, N, idims); long o = d + 1; long u = N - o; // 0 1 2 3 4 5 6|7 // --d-- * --u--|N // ---o--- assert(d == md_calc_blockdim(d, dims + 0, ostr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, dims + o, ostr + o, CFL_SIZE * md_calc_size(o, dims))); assert(d == md_calc_blockdim(d, idims + 0, istr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, idims + o, istr + o, CFL_SIZE * md_calc_size(o, idims))); long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) }; long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, idims) }; long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, dims) }; md_clear(3, wdims, out, CFL_SIZE); // we cannot clear because we merge outputs #ifdef USE_CUDA if (cuda_ondevice(out)) { assert(cuda_ondevice(low)); assert(cuda_ondevice(hgh)); float* flow = md_gpu_move(1, MD_DIMS(flen), filter[1][0], FL_SIZE); float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[1][1], FL_SIZE); wl3_cuda_up3(wdims, wostr, out, wistr, low, flen, flow); wl3_cuda_up3(wdims, wostr, out, wistr, hgh, flen, fhgh); md_free(flow); md_free(fhgh); return; } #endif wavelet_up3(wdims, wostr, out, wistr, low, flen, filter[1][0]); wavelet_up3(wdims, wostr, out, wistr, hgh, flen, filter[1][1]); }
static int siemens_adc_read(bool vd, int fd, bool linectr, bool partctr, const long dims[DIMS], long pos[DIMS], complex float* buf) { char scan_hdr[vd ? 192 : 0]; xread(fd, scan_hdr, sizeof(scan_hdr)); for (pos[COIL_DIM] = 0; pos[COIL_DIM] < dims[COIL_DIM]; pos[COIL_DIM]++) { char chan_hdr[vd ? 32 : 128]; xread(fd, chan_hdr, sizeof(chan_hdr)); struct mdh2 mdh; memcpy(&mdh, vd ? (scan_hdr + 40) : (chan_hdr + 20), sizeof(mdh)); if (0 == pos[COIL_DIM]) { // TODO: rethink this pos[PHS1_DIM] = mdh.sLC[0] + (linectr ? mdh.linectr : 0); pos[AVG_DIM] = mdh.sLC[1]; pos[SLICE_DIM] = mdh.sLC[2]; pos[PHS2_DIM] = mdh.sLC[3] + (partctr ? mdh.partctr : 0); pos[TE_DIM] = mdh.sLC[4]; pos[TIME_DIM] = mdh.sLC[6]; pos[TIME2_DIM] = mdh.sLC[7]; } debug_print_dims(DP_DEBUG1, DIMS, pos); if (dims[READ_DIM] != mdh.samples) { debug_printf(DP_WARN, "Wrong number of samples: %d != %d.\n", dims[READ_DIM], mdh.samples); return -1; } if ((0 != mdh.channels) && (dims[COIL_DIM] != mdh.channels)) { debug_printf(DP_WARN, "Wrong number of channels: %d != %d.\n", dims[COIL_DIM], mdh.channels); return -1; } xread(fd, buf + pos[COIL_DIM] * dims[READ_DIM], dims[READ_DIM] * CFL_SIZE); } pos[COIL_DIM] = 0; return 0; }
/** * Efficiently chain two matrix linops by multiplying the actual matrices together. * Stores a copy of the new matrix. * Returns: C = B A * * @param a first matrix (applied to input) * @param b second matrix (applied to output of first matrix) */ struct linop_s* linop_matrix_chain(const struct linop_s* a, const struct linop_s* b) { const struct operator_matrix_s* a_data = CAST_DOWN(operator_matrix_s, linop_get_data(a)); const struct operator_matrix_s* b_data = CAST_DOWN(operator_matrix_s, linop_get_data(b)); // check compatibility assert(linop_codomain(a)->N == linop_domain(b)->N); assert(md_check_compat(linop_codomain(a)->N, 0u, linop_codomain(a)->dims, linop_domain(b)->dims)); unsigned int D = linop_domain(a)->N; unsigned long outB_flags = md_nontriv_dims(D, linop_codomain(b)->dims); unsigned long inB_flags = md_nontriv_dims(D, linop_domain(b)->dims); unsigned long delB_flags = inB_flags & ~outB_flags; unsigned int N = a_data->N; assert(N == 2 * D); long in_dims[N]; md_copy_dims(N, in_dims, a_data->in_dims); long matA_dims[N]; md_copy_dims(N, matA_dims, a_data->mat_dims); long matB_dims[N]; md_copy_dims(N, matB_dims, b_data->mat_dims); long out_dims[N]; md_copy_dims(N, out_dims, b_data->out_dims); for (unsigned int i = 0; i < D; i++) { if (MD_IS_SET(delB_flags, i)) { matA_dims[2 * i + 0] = a_data->mat_dims[2 * i + 1]; matA_dims[2 * i + 1] = a_data->mat_dims[2 * i + 0]; in_dims[2 * i + 0] = a_data->in_dims[2 * i + 1]; in_dims[2 * i + 1] = a_data->in_dims[2 * i + 0]; } } long matrix_dims[N]; md_singleton_dims(N, matrix_dims); unsigned long iflags = md_nontriv_dims(N, in_dims); unsigned long oflags = md_nontriv_dims(N, out_dims); unsigned long flags = iflags | oflags; // we combine a and b and sum over dims not in input or output md_max_dims(N, flags, matrix_dims, matA_dims, matB_dims); debug_printf(DP_DEBUG1, "tensor chain: %ld x %ld -> %ld\n", md_calc_size(N, matA_dims), md_calc_size(N, matB_dims), md_calc_size(N, matrix_dims)); complex float* matrix = md_alloc(N, matrix_dims, CFL_SIZE); debug_print_dims(DP_DEBUG2, N, matrix_dims); debug_print_dims(DP_DEBUG2, N, in_dims); debug_print_dims(DP_DEBUG2, N, matA_dims); debug_print_dims(DP_DEBUG2, N, matB_dims); debug_print_dims(DP_DEBUG2, N, out_dims); md_ztenmul(N, matrix_dims, matrix, matA_dims, a_data->mat, matB_dims, b_data->mat); // priv2 takes our doubled dimensions struct operator_matrix_s* data = linop_matrix_priv2(N, out_dims, in_dims, matrix_dims, matrix); /* although we internally use different dimensions we define the * correct interface */ struct linop_s* c = linop_create(linop_codomain(b)->N, linop_codomain(b)->dims, linop_domain(a)->N, linop_domain(a)->dims, CAST_UP(data), linop_matrix_apply, linop_matrix_apply_adjoint, linop_matrix_apply_normal, NULL, linop_matrix_del); md_free(matrix); return c; }
int main_twixread(int argc, char* argv[argc]) { long adcs = 0; bool autoc = false; bool linectr = false; bool partctr = false; long dims[DIMS]; md_singleton_dims(DIMS, dims); struct opt_s opts[] = { OPT_LONG('x', &(dims[READ_DIM]), "X", "number of samples (read-out)"), OPT_LONG('y', &(dims[PHS1_DIM]), "Y", "phase encoding steps"), OPT_LONG('z', &(dims[PHS2_DIM]), "Z", "partition encoding steps"), OPT_LONG('s', &(dims[SLICE_DIM]), "S", "number of slices"), OPT_LONG('v', &(dims[AVG_DIM]), "V", "number of averages"), OPT_LONG('c', &(dims[COIL_DIM]), "C", "number of channels"), OPT_LONG('n', &(dims[TIME_DIM]), "N", "number of repetitions"), OPT_LONG('a', &adcs, "A", "total number of ADCs"), OPT_SET('A', &autoc, "automatic [guess dimensions]"), OPT_SET('L', &linectr, "use linectr offset"), OPT_SET('P', &partctr, "use partctr offset"), }; cmdline(&argc, argv, 2, 2, usage_str, help_str, ARRAY_SIZE(opts), opts); if (0 == adcs) adcs = dims[PHS1_DIM] * dims[PHS2_DIM] * dims[SLICE_DIM] * dims[TIME_DIM]; debug_print_dims(DP_DEBUG1, DIMS, dims); int ifd; if (-1 == (ifd = open(argv[1], O_RDONLY))) error("error opening file."); struct hdr_s hdr; bool vd = siemens_meas_setup(ifd, &hdr); long off[DIMS] = { 0 }; if (autoc) { long max[DIMS] = { [COIL_DIM] = 1000 }; long min[DIMS] = { 0 }; // min is always 0 adcs = 0; while (true) { if (-1 == siemens_bounds(vd, ifd, min, max)) break; debug_print_dims(DP_DEBUG3, DIMS, max); adcs++; } for (unsigned int i = 0; i < DIMS; i++) { off[i] = -min[i]; dims[i] = max[i] + off[i]; } debug_printf(DP_DEBUG2, "Dimensions: "); debug_print_dims(DP_DEBUG2, DIMS, dims); debug_printf(DP_DEBUG2, "Offset: "); debug_print_dims(DP_DEBUG2, DIMS, off); siemens_meas_setup(ifd, &hdr); // reset } complex float* out = create_cfl(argv[2], DIMS, dims); md_clear(DIMS, dims, out, CFL_SIZE); long adc_dims[DIMS]; md_select_dims(DIMS, READ_FLAG|COIL_FLAG, adc_dims, dims); void* buf = md_alloc(DIMS, adc_dims, CFL_SIZE); while (adcs--) { long pos[DIMS] = { [0 ... DIMS - 1] = 0 }; if (-1 == siemens_adc_read(vd, ifd, linectr, partctr, dims, pos, buf)) { debug_printf(DP_WARN, "Stopping.\n"); break; } for (unsigned int i = 0; i < DIMS; i++) pos[i] += off[i]; debug_print_dims(DP_DEBUG1, DIMS, pos); if (!md_is_index(DIMS, pos, dims)) { debug_printf(DP_WARN, "Index out of bounds.\n"); continue; } md_copy_block(DIMS, pos, dims, out, adc_dims, buf, CFL_SIZE); } md_free(buf); unmap_cfl(DIMS, dims, out); exit(0); }
unsigned int optimize_dims(unsigned int D, unsigned int N, long dims[N], long (*strs[D])[N]) { merge_dims(D, N, dims, strs); unsigned int ND = remove_empty_dims(D, N, dims, strs); if (0 == ND) { // atleast return a single dimension dims[0] = 1; for (unsigned int j = 0; j < D; j++) (*strs[j])[0] = 0; ND = 1; } debug_print_dims(DP_DEBUG4, ND, dims); float blocking[N]; #ifdef BERKELEY_SVN // actually those are not the blocking factors // as used below but relative to fast memory //demmel_factors(D, ND, blocking, strs); UNUSED(demmel_factors); #endif #if 0 debug_printf(DP_DEBUG4, "DB: "); for (unsigned int i = 0; i < ND; i++) debug_printf(DP_DEBUG4, "%f\t", blocking[i]); debug_printf(DP_DEBUG4, "\n"); #endif #if 1 for (unsigned int i = 0; i < ND; i++) blocking[i] = 0.5; // blocking[i] = 1.; #endif // try to split dimensions according to blocking factors // use space up to N bool split = false; do { if (N == ND) break; split = split_dims(D, ND, dims, strs, blocking); if (split) ND++; } while(split); // printf("Split %c :", split ? 'y' : 'n'); // print_dims(ND, dims); long max_strides[ND]; for (unsigned int i = 0; i < ND; i++) { max_strides[i] = 0; for (unsigned int j = 0; j < D; j++) max_strides[i] = MAX(max_strides[i], (*strs[j])[i]); } unsigned int ord[ND]; compute_permutation(ND, ord, max_strides); // for (unsigned int i = 0; i < ND; i++) // printf("%d: %ld %d\n", i, max_strides[i], ord[i]); #if 1 for (unsigned int j = 0; j < D; j++) reorder_long(ND, ord, *strs[j]); reorder_long(ND, ord, dims); #endif #if 0 printf("opt dims\n"); print_dims(ND, dims); if (D > 0) print_dims(ND, *strs[0]); if (D > 1) print_dims(ND, *strs[1]); if (D > 2) print_dims(ND, *strs[2]); #endif return ND; }
int main_twixread(int argc, char* argv[argc]) { int c; long adcs = 0; bool autoc = false; bool linectr = false; bool partctr = false; long dims[DIMS]; md_singleton_dims(DIMS, dims); while (-1 != (c = getopt(argc, argv, "x:y:z:s:c:a:n:PLAh"))) { switch (c) { case 'x': dims[READ_DIM] = atoi(optarg); break; case 'y': dims[PHS1_DIM] = atoi(optarg); break; case 'z': dims[PHS2_DIM] = atoi(optarg); break; case 's': dims[SLICE_DIM] = atoi(optarg); break; case 'v': dims[AVG_DIM] = atoi(optarg); break; case 'n': dims[TIME_DIM] = atoi(optarg); break; case 'a': adcs = atoi(optarg); break; case 'A': autoc = true; break; case 'c': dims[COIL_DIM] = atoi(optarg); break; case 'P': partctr = true; break; case 'L': linectr = true; break; case 'h': usage(argv[0], stdout); help(); exit(0); default: usage(argv[0], stderr); exit(1); } } if (argc - optind != 2) { usage(argv[0], stderr); exit(1); } if (0 == adcs) adcs = dims[PHS1_DIM] * dims[PHS2_DIM] * dims[SLICE_DIM] * dims[TIME_DIM]; debug_print_dims(DP_DEBUG1, DIMS, dims); int ifd; if (-1 == (ifd = open(argv[optind + 0], O_RDONLY))) error("error opening file."); struct hdr_s hdr; bool vd = siemens_meas_setup(ifd, &hdr); long off[DIMS] = { 0 }; if (autoc) { long max[DIMS] = { [COIL_DIM] = 1000 }; long min[DIMS] = { 0 }; // min is always 0 adcs = 0; while (true) { if (-1 == siemens_bounds(vd, ifd, min, max)) break; debug_print_dims(DP_DEBUG3, DIMS, max); adcs++; } for (unsigned int i = 0; i < DIMS; i++) { off[i] = -min[i]; dims[i] = max[i] + off[i]; } debug_printf(DP_INFO, "Dimensions: "); debug_print_dims(DP_INFO, DIMS, dims); debug_printf(DP_INFO, "Offset: "); debug_print_dims(DP_INFO, DIMS, off); siemens_meas_setup(ifd, &hdr); // reset } complex float* out = create_cfl(argv[optind + 1], DIMS, dims); md_clear(DIMS, dims, out, CFL_SIZE); long adc_dims[DIMS]; md_select_dims(DIMS, READ_FLAG|COIL_FLAG, adc_dims, dims); void* buf = md_alloc(DIMS, adc_dims, CFL_SIZE); while (adcs--) { long pos[DIMS] = { [0 ... DIMS - 1] = 0 }; if (-1 == siemens_adc_read(vd, ifd, linectr, partctr, dims, pos, buf)) { debug_printf(DP_WARN, "Stopping.\n"); break; } for (unsigned int i = 0; i < DIMS; i++) pos[i] += off[i]; debug_print_dims(DP_DEBUG1, DIMS, pos); if (!md_is_index(DIMS, pos, dims)) { debug_printf(DP_WARN, "Index out of bounds.\n"); continue; } md_copy_block(DIMS, pos, dims, out, adc_dims, buf, CFL_SIZE); } md_free(buf); unmap_cfl(DIMS, dims, out); exit(0); }