示例#1
0
void tunetoom(long tablesz)
{
    long high, n;
    int k;
    double T3[1], TK[1], TW[1], T4[1];
    double mint;
    unsigned long *a, *b, *c, *d, *t;

    high = tablesz;
    if (high < BESTMIN)
	high = BESTMIN;

    if (high > GF2X_TOOM_TUNING_LIMIT) {
	fprintf(stderr,
	    "Increase constant GF2X_TOOM_TUNING_LIMIT in thresholds.h to %ld\n",
	     high);
	exit(1);
    }

    a = (unsigned long *) malloc(high * sizeof(unsigned long));
    b = (unsigned long *) malloc(high * sizeof(unsigned long));
    c = (unsigned long *) malloc(2 * high * sizeof(unsigned long));
    d = (unsigned long *) malloc(2 * high * sizeof(unsigned long));
    t = (unsigned long *) malloc(gf2x_toomspace(high) * sizeof(unsigned long));

    for (n = BESTMIN + 1; n <= high; ) {
	srandom(1);
	TK[0] = T3[0] = TW[0] = T4[0] = 0.0;
	printf("%ld ", n);
	fflush(stdout);
	random_wordstring(a, n);
	random_wordstring(b, n);
	if (n >= GF2X_MUL_KARA_THRESHOLD)
	    TIME(TK[0], gf2x_mul_kara(c, a, b, n, t));
	if (n >= MINI_GF2X_MUL_TOOM_THRESHOLD) {
	    TIME(T3[0], gf2x_mul_tc3(d, a, b, n, t));
	    check(a, n, b, n, "Kara", c, "TC3", d);
	}
	if (n >= MINI_GF2X_MUL_TOOMW_THRESHOLD) {
	    TIME(TW[0], gf2x_mul_tc3w(d, a, b, n, t));
	    check(a, n, b, n, "Kara", c, "TC3W", d);
	}
	if (n >= MINI_GF2X_MUL_TOOM4_THRESHOLD) {
	    TIME(T4[0], gf2x_mul_tc4(d, a, b, n, t));
	    check(a, n, b, n, "Kara", c, "TC4", d);
	}
	printf("TC2:%1.2e TC3:%1.2e TC3W:%1.2e TC4:%1.2e ",
	       TK[0], T3[0], TW[0], T4[0]);
	mint = TK[0];
	k = GF2X_SELECT_KARA;
	if ((T3[0] < mint) && (n >= MINI_GF2X_MUL_TOOM_THRESHOLD)) {
	    mint = T3[0];
	    k = GF2X_SELECT_TC3;
	}
	if ((TW[0] < mint) && (n >= MINI_GF2X_MUL_TOOMW_THRESHOLD)) {
	    mint = TW[0];
	    k = GF2X_SELECT_TC3W;
	}
	if ((T4[0] < mint) && (n >= MINI_GF2X_MUL_TOOM4_THRESHOLD)) {
	    mint = T4[0];
	    k = GF2X_SELECT_TC4;
	}
	printf("best:%1.2e %s\n", mint, gf2x_toom_select_string[k]);
        fprintf(rp, "toom %ld %d\n", n, k);
	fflush(stdout);
        long nn = MAX(n * mulstep, n + 1);
        for( ; n < nn && n <= high ; n++) {
            best_tab[n - 1] = k;
        }
    }

    free(a);
    free(b);
    free(c);
    free(d);
    free(t);

    return;
}
示例#2
0
void tuneutoom(long tabsz)
{
    long high;
    int k;
    double T3[1], TK[1];
    double mint;
    unsigned long *a, *b, *c, *d, *t;

    high = tabsz;
    if (high < BESTMINU)
	high = BESTMINU;

    if (high > GF2X_TOOM_TUNING_LIMIT) {
	fprintf(stderr,
                "Increase constant GF2X_TOOM_TUNING_LIMIT in thresholds.c to %ld\n",
	     high);
	exit(1);
    }

    long sa = high;
    long sb = (sa + 1) / 2;

    long sp1 = gf2x_toomuspace(sa);	// space for mul_toom3u
    long sp2 = gf2x_toomspace(sb) + 2 * sb;	// space for mul21
    long sp = (sp1 > sp2) ? sp1 : sp2;

    a = (unsigned long *) malloc(sa * sizeof(unsigned long));
    b = (unsigned long *) malloc(sb * sizeof(unsigned long));
    c = (unsigned long *) malloc(3 * sb * sizeof(unsigned long));
    d = (unsigned long *) malloc(3 * sb * sizeof(unsigned long));
    t = (unsigned long *) malloc(sp * sizeof(unsigned long));


    for (sa = BESTMINU + 1; sa <= high; ) {
	sb = (sa + 1) / 2;
	random_wordstring(a, sa);
	random_wordstring(b, sb);
	TK[0] = T3[0] = 0.0;
	printf("%ld ", sa);
	fflush(stdout);
	TIME(TK[0], gf2x_mul21(c, a, sa, b, t));
	if (sa >= MINI_GF2X_MUL_TOOMU_THRESHOLD) {
	    TIME(T3[0], gf2x_mul_tc3u(d, a, sa, b, t));
	    checku(c, d, sa + sb);
	}
	printf("default:%1.2e TC3U:%1.2e ", TK[0], T3[0]);
	mint = TK[0];
	k = GF2X_SELECT_UNB_DFLT;
	if ((T3[0] < mint) && (sa >= MINI_GF2X_MUL_TOOMU_THRESHOLD)) {
	    mint = T3[0];
	    k = GF2X_SELECT_UNB_TC3U;
	}
	printf("best:%1.2e %s\n", mint, gf2x_utoom_select_string[k]);
	fflush(stdout);
        fprintf(rp, "utoom %ld %d\n", sa, k);
        long nn = MAX(sa * mulstep, sa + 1);
        for( ; sa < nn && sa <= high ; sa++) {
            best_utab[sa - 1] = k;
        }
    }

    free(a);
    free(b);
    free(c);
    free(d);
    free(t);

    return;
}
示例#3
0
文件: tunefft.c 项目: nesciens/gf2x
int main(int argc, char *argv[])
{
    long minn, maxn, mid, n, n2, ns, i;
    long besti;			/* 0 for TC, 1, 2, ... for FFT(K0*3^(bestK-1)) */
    long bestK;
    long K, K0 = 3;		/* try K0, 3*K0, 9*K0 */
    double T[4];		/* T[0] is for TC, T[1] for K0, T[2] for 3*K0, T[3] for 9*K0 */
    double t1[4], t2[4];
    unsigned long *a, *b, *c, *t, *u, *v;
    int nsz = 0;
    int tc_takes_too_long = 0;
    const char * reference = "TC";

    maxn = 1000000;		// default
    minn = GF2X_MUL_FFT_BEGIN_TUNE / 2 + 1;

    char * progname = argc ? argv[0] : "";

    argc--,argv++;
    for( ; argc ; argc--,argv++) {
        int r;

        if (strcmp(argv[0], "--help") == 0) {
            usage(0);
        }

        if (strcmp(argv[0], "--no-toom") == 0) {
            tc_takes_too_long = 1;
            reference = "F1(K0)";
            continue;
        }

        r = handle_tuning_mulstep(&argc, &argv);
        if (r < 0) usage(1); else if (r) continue;
        r = handle_tuning_outfile(&argc, &argv);
        if (r < 0) usage(1); else if (r) continue;

        if (strcmp(argv[0], "-k0") == 0) {
            argc--,argv++;
            if (! argc) usage(1);
            K0 = atoi(argv[0]);
            continue;
        }

        if (nsz == 0) {
            maxn = atoi(argv[0]);
            nsz++;
            continue;
        }
        if (nsz == 1) {
            minn = maxn;
            maxn = atoi(argv[0]);
            nsz++;
            continue;
        }
        usage(1);
    }

    if (nsz == 0)
        usage(1);

    set_tuning_output();

    {
	char date[40];
	time_t t;
	size_t u;
	struct utsname buf;
	time(&t);
	ctime_r(&t, date);
	u = strlen(date);
	for (; u && isspace(date[u - 1]); date[--u] = '\0');
	uname(&buf);

        /* strip the dirname */
        char * ptr = strrchr(progname, '/');
        if (ptr) {
            ptr++;
        } else {
            ptr = progname;
        }

        fprintf(rp, "info-fft \"%s -s %.2f %ld run on %s on %s ; based on %s\"\n",
                ptr,mulstep,maxn,buf.nodename,date,GF2X_TOOM_TUNING_INFO);
    }

    printf("Tuning FFT multiplication to wordsize %ld\n\n", maxn);

    a = (unsigned long *) malloc(maxn * sizeof(unsigned long));
    b = (unsigned long *) malloc(maxn * sizeof(unsigned long));
    c = (unsigned long *) malloc(2 * maxn * sizeof(unsigned long));
    u = (unsigned long *) malloc(2 * maxn * sizeof(unsigned long));
    v = (unsigned long *) malloc(2 * maxn * sizeof(unsigned long));
    t = (unsigned long *) malloc(gf2x_toomspace(maxn) * sizeof(unsigned long));

    random_wordstring(a, maxn);
    random_wordstring(b, maxn);

/* Skip n if (2*n < GF2X_MUL_FFT_BEGIN_TUNE) as this is too small for the FFT */


    for (n = minn; n <= maxn;) {
	n2 = next_step(n, 3 * K0);	// End of interval
	if (n2 > maxn)		// Only go as far
	    n2 = maxn;		// as maxn.
	mid = (n + n2) / 2;	// Mid-point
	printf("%ld..%ld ", n, n2);
	fflush(stdout);

        if (tc_takes_too_long) {
            T[0] = DBL_MAX;
        } else {
            TIME(T[0], gf2x_mul_toom(u, a, b, mid, t));	// Time Toom-Cook
            printf("TC:%1.1e ", T[0]);
        }
	fflush(stdout);
	besti = 0;
	bestK = 1;
        K = K0;
        i = 1;
ugly_label:
	for ( ; i <= 3; i++, K *= 3) {
	    TIME(t1[i], gf2x_mul_fft(c, a, mid, b, mid, K));
            if (tc_takes_too_long) {
                memcpy(u, c, 2 * maxn * sizeof(unsigned long));
            }
            check(a, mid, b, mid, reference, u, "F1", c);
            if (K >= GF2X_WORDSIZE) {
                TIME(t2[i], gf2x_mul_fft(v, a, mid, b, mid, -K));
                check(a, mid, b, mid, "F1", c, "F2", v);
            } else {
                t2[i] = DBL_MAX;
            }
	    if (t1[i] < t2[i]) {
		T[i] = t1[i];
		printf("F1(%ld):%1.1e ", K, T[i]);
	    } else {
		T[i] = t2[i];
		printf("F2(%ld):%1.1e ", K, T[i]);
	    }
	    fflush(stdout);
	    if (T[i] < T[besti]) {
		besti = i;
		bestK = (t2[i] > t1[i]) ? K : -K;	/* -K for FFT2(|K|) */
	    }
	}

	if (T[3] < T[1] && T[3] < T[2]) {
            if (besti) {
                if (besti == 1)
                    abort();
                besti--;
            }
	    K0 *= 3;
            /* K just stays as it was */
            i = 3;
            T[1] = T[2];
            T[2] = T[3];
            goto ugly_label;
            /* Notice that we can't loop forever here. If we have T[3] <
             * T[2], this will ensure T[2] < T[1] at the next turn,
             * thereby forcing the other case not to happen */
        } else if (T[1] < T[2] && T[1] < T[3] && K0 > 3) {
	    K0 /= 3;
        }

        /* OK, this stair is done */

	if (bestK == 1)
	    printf("TC");
	else {
	    if (bestK > 0)
		printf("F1(%ld)", bestK);
	    else
		printf("F2(%ld)", -bestK);
	}
	printf("\n");
	fflush(stdout);

        if (T[0] >= 4 * T[besti] && !tc_takes_too_long) {
            printf("TC is taking too long, disabling for next sizes\n");
            tc_takes_too_long = 1;
            reference = "F1(K0)";
        }

	/* go to next size */
	ns = n;
	n = next_step(n, 3 * K0);	/* middle value of K */
	if (n > n2)
	    n = n2;		/* end of last stair if K0 increased */
	n++;
	if (n < mid) {		/* redo the last stair if K0 decreased */
	    n = ns;
        } else {
            fprintf(rp, "fft %ld %ld\n",
                    ns == minn ? 1 : ns, ns == minn ? 1 : bestK);
        }
    }

    free(a);
    free(b);
    free(c);
    free(t);
    free(u);
    free(v);

    return 0;
}