/* * In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n) * or a (2*2^n) x 2^n matrix. */ int transpose_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols) { mpd_size_t size = mul_size_t(rows, cols); assert(ispower2(rows)); assert(ispower2(cols)); if (cols == rows) { squaretrans_pow2(matrix, rows); } else if (cols == mul_size_t(2, rows)) { if (!swap_halfrows_pow2(matrix, rows, cols, FORWARD_CYCLE)) { return 0; } squaretrans_pow2(matrix, rows); squaretrans_pow2(matrix+(size/2), rows); } else if (rows == mul_size_t(2, cols)) { squaretrans_pow2(matrix, cols); squaretrans_pow2(matrix+(size/2), cols); if (!swap_halfrows_pow2(matrix, cols, rows, BACKWARD_CYCLE)) { return 0; } } else { abort(); /* GCOV_NOT_REACHED */ } return 1; }
/* * In-place transposition of a 2^n x 2^n or a 2^n x (2*2^n) * or a (2*2^n) x 2^n matrix. */ static int transpose_pow2_c(uint8_t *matrix, mpd_size_t rows, mpd_size_t cols) { mpd_size_t size = mul_size_t(rows, cols); assert(ispower2(rows)); assert(ispower2(cols)); if (cols == rows) { squaretrans_pow2_c(matrix, rows); } else if (cols == mul_size_t(2, rows)) { if (!swap_halfrows_pow2_c(matrix, rows, cols, FORWARD_CYCLE)) { return 0; } squaretrans_pow2_c(matrix, rows); squaretrans_pow2_c(matrix+(size/2), rows); } else if (rows == mul_size_t(2, cols)) { squaretrans_pow2_c(matrix, cols); squaretrans_pow2_c(matrix+(size/2), cols); if (!swap_halfrows_pow2_c(matrix, cols, rows, BACKWARD_CYCLE)) { return 0; } } else { mpd_err_fatal("transpose_pow2_c: illegal matrix size"); } return 1; }
/* The faster in-place functions are tested against std_trans(). */ static int testit_uchar(void (* func)(uint8_t *, mpd_size_t, mpd_size_t), mpd_size_t rows, mpd_size_t cols) { uint8_t *a = NULL, *src = NULL, *dest = NULL; clock_t start_fast, end_fast, start_std, end_std; mpd_size_t msize; mpd_size_t i; int ret = 1; msize = mul_size_t(rows, cols); if ((a = mpd_alloc(msize, sizeof *a)) == NULL) { goto error; } if ((src = mpd_alloc(msize, sizeof *src)) == NULL) { goto error; } if ((dest = mpd_alloc(msize, sizeof *dest)) == NULL) { goto error; } for (i = 0; i < msize; i++) { a[i] = src[i] = random(); } start_std= clock(); std_trans_c(dest, src, rows, cols); end_std= clock(); start_fast = clock(); func(a, rows, cols); end_fast = clock(); for (i = 0; i < msize; i++) { if (a[i] != dest[i]) { fprintf(stderr, "FAIL: a[%"PRI_mpd_size_t"] = %d\t" "dest[%"PRI_mpd_size_t"] = %d\n", i, a[i], i, dest[i]); exit(1); } } fprintf(stderr, "size: %10"PRI_mpd_size_t"\tstd_trans: %6.2f sec\t " "in_place_trans: %6.2f sec\n", msize, (double)(end_std-start_std)/(double)CLOCKS_PER_SEC, (double)(end_fast-start_fast)/(double)CLOCKS_PER_SEC); out: if (a) __mingw_dfp_get_globals()->mpd_free(a); if (src) __mingw_dfp_get_globals()->mpd_free(src); if (dest) __mingw_dfp_get_globals()->mpd_free(dest); return ret; error: ret = 0; goto out; }
int main(void) { mpd_size_t rows, cols; fprintf(stderr, "Running test_transpose ... \n"); fprintf(stderr, "\n2^n * 2^n mpd_uint_t matrices:\n\n"); for (rows = 1; mul_size_t(rows, rows) <= UMOD_ARRAY; rows *= 2) { if (!testit_uint(transfunc_uint transpose_pow2, rows, rows)) { break; } } fprintf(stderr, "\n2^n * 2*2^n mpd_uint_t matrices:\n\n"); for (rows = 8, cols = 16; mul_size_t(rows, cols) <= UMOD_ARRAY; rows *= 2, cols*=2) { if (!testit_uint(transfunc_uint transpose_pow2, rows, cols)) { break; } } fprintf(stderr, "\n2*2^n * 2^n mpd_uint_t matrices:\n\n"); for (rows = 16, cols = 8; mul_size_t(rows, cols) <= UMOD_ARRAY; rows *= 2, cols*=2) { if (!testit_uint(transfunc_uint transpose_pow2, rows, cols)) { break; } } fprintf(stderr, "\n2^n * 2^n uint8_t matrices:\n\n"); for (rows = 1; mul_size_t(rows, rows) <= UCHAR_ARRAY; rows *= 2) { if (!testit_uchar(transfunc_uchar transpose_pow2_c, rows, rows)) { break; } } fprintf(stderr, "\n2^n * 2*2^n uint8_t matrices:\n\n"); for (rows = 8, cols = 16; mul_size_t(rows, cols) <= UCHAR_ARRAY; rows *= 2, cols*=2) { if (!testit_uchar(transfunc_uchar transpose_pow2_c, rows, cols)) { break; } } fprintf(stderr, "\n2*2^n * 2^n uint8_t matrices:\n\n"); for (rows = 16, cols = 8; mul_size_t(rows, cols) <= UCHAR_ARRAY; rows *= 2, cols*=2) { if (!testit_uchar(transfunc_uchar transpose_pow2_c, rows, cols)) { break; } } fprintf(stderr, "\ntest_transpose: PASS\n\n"); return 0; }
/* * Swap half-rows of 2^n * (2*2^n) matrix. * FORWARD_CYCLE: even/odd permutation of the halfrows. * BACKWARD_CYCLE: reverse the even/odd permutation. */ static int swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir) { mpd_uint_t buf1[BUFSIZE]; mpd_uint_t buf2[BUFSIZE]; mpd_uint_t *readbuf, *writebuf, *hp; mpd_size_t *done, dbits; mpd_size_t b = BUFSIZE, stride; mpd_size_t hn, hmax; /* halfrow number */ mpd_size_t m, r=0; mpd_size_t offset; mpd_size_t next; assert(cols == mul_size_t(2, rows)); if (dir == FORWARD_CYCLE) { r = rows; } else if (dir == BACKWARD_CYCLE) { r = 2; } else { abort(); /* GCOV_NOT_REACHED */ } m = cols - 1; hmax = rows; /* cycles start at odd halfrows */ dbits = 8 * sizeof *done; if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) { return 0; } for (hn = 1; hn <= hmax; hn += 2) { if (done[hn/dbits] & mpd_bits[hn%dbits]) { continue; } readbuf = buf1; writebuf = buf2; for (offset = 0; offset < cols/2; offset += b) { stride = (offset + b < cols/2) ? b : cols/2-offset; hp = matrix + hn*cols/2; memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); pointerswap(&readbuf, &writebuf); next = mulmod_size_t(hn, r, m); hp = matrix + next*cols/2; while (next != hn) { memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); memcpy(hp+offset, writebuf, stride*(sizeof *writebuf)); pointerswap(&readbuf, &writebuf); done[next/dbits] |= mpd_bits[next%dbits]; next = mulmod_size_t(next, r, m); hp = matrix + next*cols/2; } memcpy(hp+offset, writebuf, stride*(sizeof *writebuf)); done[hn/dbits] |= mpd_bits[hn%dbits]; } } mpd_free(done); return 1; }