static void copy_block_in(bench_real *in, int rnk, ptrdiff_t *n, ptrdiff_t *start, ptrdiff_t is, ptrdiff_t *os, ptrdiff_t vn, const bench_real *out) { ptrdiff_t i; if (rnk == 0) { for (i = 0; i < vn; ++i) in[i] = out[i]; } else if (rnk == 1) { /* this case is just an optimization */ ptrdiff_t j; out += start[0] * os[0]; for (j = 0; j < n[0]; ++j) { for (i = 0; i < vn; ++i) in[i] = out[i]; in += is; out += os[0]; } } else { /* we should do n[0] for locality, but this way is simpler to code */ for (i = 0; i < n[rnk - 1]; ++i) copy_block_in(in + i * is, rnk - 1, n, start, is * n[rnk - 1], os, vn, out + (start[rnk - 1] + i) * os[rnk - 1]); } }
static void restore_dc_values (SchroEncoderFrame *frame, int16_t *dc_values, SchroLowDelay *lowdelay, int slice_x, int slice_y) { SchroFrameData block; schro_frame_data_get_codeblock (&block, lowdelay->luma_subbands + 0, slice_x, slice_y, lowdelay->n_horiz_slices, lowdelay->n_vert_slices); copy_block_in (&block, dc_values); dc_values += block.width * block.height; schro_frame_data_get_codeblock (&block, lowdelay->chroma1_subbands + 0, slice_x, slice_y, lowdelay->n_horiz_slices, lowdelay->n_vert_slices); copy_block_in (&block, dc_values); dc_values += block.width * block.height; schro_frame_data_get_codeblock (&block, lowdelay->chroma2_subbands + 0, slice_x, slice_y, lowdelay->n_horiz_slices, lowdelay->n_vert_slices); copy_block_in (&block, dc_values); }
static void do_scatter_in(bench_real *in) { bench_real *ali; int i; if (all_local_in_alloc) { bench_free(all_local_in); all_local_in = (bench_real*) bench_malloc(iNtot*sizeof(bench_real)); all_local_in_alloc = 0; } ali = all_local_in; for (i = 0; i < n_pes; ++i) { copy_block_in(ali, rnk, all_local_ni + i * rnk, all_local_starti + i * rnk, vn, istrides, vn, in); ali += isend_cnt[i]; } MPI_Scatterv(all_local_in, isend_cnt, isend_off, BENCH_MPI_TYPE, local_in, isend_cnt[my_pe], BENCH_MPI_TYPE, 0, MPI_COMM_WORLD); }