int main(void) { mp_bitcnt_t depth, w; mp_size_t iters, j; double truncation; flint_rand_t state; printf("mul_truncate_sqrt2...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); depth = 13; w = 1; iters = 1; truncation = 1; { mp_size_t n = (1UL<<depth); mp_bitcnt_t bits1 = (n*w - (depth + 1))/2; mp_bitcnt_t bits = 2*n*bits1; mp_size_t int_limbs = ((mp_size_t)(truncation*bits))/FLINT_BITS; mp_size_t j; mp_limb_t * i1, *i2, *r1, *r2; printf("bits = %ld\n", int_limbs*FLINT_BITS); i1 = flint_malloc(6*int_limbs*sizeof(mp_limb_t)); i2 = i1 + int_limbs; r1 = i2 + int_limbs; r2 = r1 + 2*int_limbs; mpn_urandomb(i1, state->gmp_state, int_limbs*FLINT_BITS); mpn_urandomb(i2, state->gmp_state, int_limbs*FLINT_BITS); //mpn_mul(r2, i1, int_limbs, i2, int_limbs); for (j = 0; j < iters; j++) mul_truncate_sqrt2(r1, i1, int_limbs, i2, int_limbs, depth, w); flint_free(i1); } flint_randclear(state); printf("done\n"); return 0; }
int main(void) { int i; mp_size_t j; flint_rand_t state; printf("split/combine_bits...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (i = 0; i < 10000; i++) { mp_size_t total_limbs = n_randint(state, 1000) + 1; mp_limb_t * in = flint_malloc(total_limbs*sizeof(mp_limb_t)); mp_limb_t * out = flint_calloc(total_limbs, sizeof(mp_limb_t)); mp_bitcnt_t bits = n_randint(state, 200) + 1; mp_size_t limbs = (2*bits - 1)/FLINT_BITS + 1; long length = (total_limbs*FLINT_BITS - 1)/bits + 1; mp_limb_t ** poly; poly = flint_malloc(length*sizeof(mp_limb_t *)); for (j = 0; j < length; j++) poly[j] = flint_malloc((limbs + 1)*sizeof(mp_limb_t)); mpn_urandomb(in, state->gmp_state, total_limbs*FLINT_BITS); fft_split_bits(poly, in, total_limbs, bits, limbs); fft_combine_bits(out, poly, length, bits, limbs, total_limbs); for (j = 0; j < total_limbs; j++) { if (in[j] != out[j]) { printf("FAIL:\n"); printf("Error in limb %ld, %lu != %lu\n", j, in[j], out[j]); abort(); } } flint_free(in); flint_free(out); for (j = 0; j < length; j++) flint_free(poly[j]); flint_free(poly); } flint_randclear(state); printf("PASS\n"); return 0; }
int main(void) { int i; mp_size_t j; gmp_randstate_t state; tests_start(); fflush(stdout); gmp_randinit_default(state); for (i = 0; i < 10000; i++) { mp_limb_t total_limbs; mp_limb_t * in; mp_limb_t * out; mp_bitcnt_t bits; mp_size_t limbs; long length; mp_limb_t ** poly; mpn_rrandom(&total_limbs, state, 1); total_limbs = total_limbs % 1000 + 1; in = malloc(total_limbs*sizeof(mp_limb_t)); out = calloc(total_limbs, sizeof(mp_limb_t)); mpn_rrandom(&bits, state, 1); bits = bits % 200 + 1; limbs = (2*bits - 1)/GMP_LIMB_BITS + 1; length = (total_limbs*GMP_LIMB_BITS - 1)/bits + 1; poly = malloc(length*sizeof(mp_limb_t *)); for (j = 0; j < length; j++) poly[j] = malloc((limbs + 1)*sizeof(mp_limb_t)); mpn_urandomb(in, state, total_limbs*GMP_LIMB_BITS); mpir_fft_split_bits(poly, in, total_limbs, bits, limbs); mpir_fft_combine_bits(out, poly, length, bits, limbs, total_limbs); for (j = 0; j < total_limbs; j++) { if (in[j] != out[j]) { printf("FAIL:\n"); gmp_printf("Error in limb %ld, %Mu != %Mu\n", j, in[j], out[j]); abort(); } } free(in); free(out); for (j = 0; j < length; j++) free(poly[j]); free(poly); } gmp_randclear(state); tests_end(); return 0; }
int main(void) { mp_bitcnt_t depth, w; flint_rand_t state; printf("mul_fft_main...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (depth = 6; depth <= 13; depth++) { for (w = 1; w <= 3 - (depth >= 12); w++) { int iter = 1 + 200*(depth <= 8) + 80*(depth <= 9) + 10*(depth <= 10), i; for (i = 0; i < iter; i++) { mp_size_t n = (1UL<<depth); mp_bitcnt_t bits1 = (n*w - (depth + 1))/2; mp_size_t len1 = 2*n + n_randint(state, 2*n) + 1; mp_size_t len2 = 2*n + 2 - len1 + n_randint(state, 2*n); mp_bitcnt_t b1 = len1*bits1, b2; mp_size_t n1, n2; mp_size_t j; mp_limb_t * i1, *i2, *r1, *r2; if (len2 <= 0) len2 = 2*n + n_randint(state, 2*n) + 1; b2 = len2*bits1; n1 = (b1 - 1)/FLINT_BITS + 1; n2 = (b2 - 1)/FLINT_BITS + 1; if (n1 < n2) /* ensure b1 >= b2 */ { mp_size_t t = n1; mp_bitcnt_t tb = b1; n1 = n2; b1 = b2; n2 = t; b2 = tb; } i1 = flint_malloc(3*(n1 + n2)*sizeof(mp_limb_t)); i2 = i1 + n1; r1 = i2 + n2; r2 = r1 + n1 + n2; mpn_urandomb(i1, state->gmp_state, b1); mpn_urandomb(i2, state->gmp_state, b2); mpn_mul(r2, i1, n1, i2, n2); mpn_mul_fft_main(r1, i1, n1, i2, n2); for (j = 0; j < n1 + n2; j++) { if (r1[j] != r2[j]) { printf("error in limb %ld, %lx != %lx\n", j, r1[j], r2[j]); abort(); } } flint_free(i1); } } } flint_randclear(state); printf("PASS\n"); return 0; }
int main(void) { mp_bitcnt_t depth, w, depth1, w1; clock_t start, end; double elapsed; double best = 0.0; mp_size_t best_off, off, best_d, best_w; gmp_randstate_t state; printf("/* fft_tuning.h -- autogenerated by tune-fft */\n\n"); printf("#ifndef FFT_TUNING_H\n"); printf("#define FFT_TUNING_H\n\n"); printf("#include \"mpir.h\"\n\n"); printf("#define FFT_TAB \\\n"); fflush(stdout); gmp_randinit_default(state); printf(" { "); fflush(stdout); for (depth = 6; depth <= 10; depth++) { printf("{ "); fflush(stdout); for (w = 1; w <= 2; w++) { int iters = 100*((mp_size_t) 1 << (3*(10 - depth)/2)), i; mp_size_t n = ((mp_limb_t)1<<depth); mp_bitcnt_t bits1 = (n*w - (depth + 1))/2; mp_size_t len1 = 2*n; mp_size_t len2 = 2*n; mp_bitcnt_t b1 = len1*bits1, b2 = len2*bits1; mp_size_t n1, n2; mp_size_t j; mp_limb_t * i1, *i2, *r1; n1 = (b1 - 1)/GMP_LIMB_BITS + 1; n2 = (b2 - 1)/GMP_LIMB_BITS + 1; i1 = malloc(2*(n1 + n2)*sizeof(mp_limb_t)); i2 = i1 + n1; r1 = i2 + n2; mpn_urandomb(i1, state, b1); mpn_urandomb(i2, state, b2); best_off = -1; for (off = 0; off <= 4; off++) { start = clock(); for (i = 0; i < iters; i++) mpn_mul_trunc_sqrt2(r1, i1, n1, i2, n2, depth - off, w*((mp_size_t)1 << (off*2))); end = clock(); elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; if (elapsed < best || best_off == -1) { best_off = off; best = elapsed; } } printf("%ld", best_off); if (w != 2) printf(","); printf(" "); fflush(stdout); free(i1); } printf("}"); if (depth != 10) printf(","); printf(" "); fflush(stdout); } printf("}\n\n"); best_d = 12; best_w = 1; best_off = -1; printf("#define MULMOD_TAB \\\n"); fflush(stdout); printf(" { "); fflush(stdout); for (depth = 12; best_off != 1 ; depth++) { for (w = 1; w <= 2; w++) { int iters = 100*((mp_size_t) 1 << (3*(18 - depth)/2)), i; mp_size_t n = ((mp_limb_t)1<<depth); mp_bitcnt_t bits = n*w; mp_size_t int_limbs = (bits - 1)/GMP_LIMB_BITS + 1; mp_size_t j; mp_limb_t c, * i1, * i2, * r1, * tt; if (depth <= 21) iters = 32*((mp_size_t) 1 << (21 - depth)); else iters = MAX(32/((mp_size_t) 1 << (depth - 21)), 1); i1 = malloc(6*(int_limbs+1)*sizeof(mp_limb_t)); i2 = i1 + int_limbs + 1; r1 = i2 + int_limbs + 1; tt = r1 + 2*(int_limbs + 1); mpn_urandomb(i1, state, int_limbs*GMP_LIMB_BITS); mpn_urandomb(i2, state, int_limbs*GMP_LIMB_BITS); i1[int_limbs] = 0; i2[int_limbs] = 0; depth1 = 1; while ((((mp_limb_t)1)<<depth1) < bits) depth1++; depth1 = depth1/2; w1 = bits/(((mp_limb_t)1)<<(2*depth1)); best_off = -1; for (off = 0; off <= 4; off++) { start = clock(); for (i = 0; i < iters; i++) mpir_fft_mulmod_2expp1(r1, i1, i2, int_limbs, depth1 - off, w1*((mp_size_t)1 << (off*2))); end = clock(); elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; if (best_off == -1 || elapsed < best) { best_off = off; best = elapsed; } } start = clock(); for (i = 0; i < iters; i++) mpn_mulmod_2expp1_basecase(r1, i1, i2, 0, bits, tt); end = clock(); elapsed = ((double) (end - start)) / CLOCKS_PER_SEC; if (elapsed < best) { best_d = depth + (w == 2); best_w = w + 1 - 2*(w == 2); } printf("%ld", best_off); if (w != 2) printf(", "); fflush(stdout); free(i1); } printf(", "); fflush(stdout); } printf("1 }\n\n"); printf("#define FFT_N_NUM %ld\n\n", 2*(depth - 12) + 1); printf("#define FFT_MULMOD_2EXPP1_CUTOFF %ld\n\n", ((mp_limb_t) 1 << best_d)*best_w/(2*GMP_LIMB_BITS)); gmp_randclear(state); printf("#endif\n"); return 0; }