Ejemplo n.º 1
0
int main(int argc, char *argv[])
{
    long tabsz1 = 0, tabsz2 = 0;

    int nsz = 0;

    char * progname = argc ? argv[0] : "";

    argc--,argv++;
    for( ; argc ; argc--,argv++) {
        int r;
        if (strcmp(argv[0], "--help") == 0) {
            usage(0);
        }
        r = handle_tuning_mulstep(&argc, &argv);
        if (r < 0) usage(1); else if (r) continue;
        r = handle_tuning_outfile(&argc, &argv);
        if (r < 0) usage(1); else if (r) continue;

        if (nsz == 0) {
            tabsz1 = tabsz2 = atoi(argv[0]);
            nsz++;
            continue;
        }
        if (nsz == 1) {
            tabsz2 = atoi(argv[0]);
            nsz++;
            continue;
        }
        usage(1);
    }

    if (nsz == 0)
        usage(1);

    set_tuning_output();

    {
        char date[40];
        time_t t;
        size_t u;
        struct utsname buf;
        time(&t);
        ctime_r(&t, date);
        u = strlen(date);
        for(;u && isspace(date[u-1]);date[--u]='\0');
        uname(&buf);

        /* strip the dirname */
        char * ptr = strrchr(progname, '/');
        if (ptr) {
            ptr++;
        } else {
            ptr = progname;
        }

        fprintf(rp, "info-toom \"%s -s %.2f %ld %ld run on %s on %s\"\n",
                ptr,mulstep,tabsz1,tabsz2,buf.nodename,date);
    }

    tunetoom(tabsz1);		// Tune balanced routines
    tuneutoom(tabsz2);		// Tune unbalanced routines

    fflush(stdout);

    return 0;
}
Ejemplo n.º 2
0
int main(int argc, char *argv[])
{
    long minn, maxn, mid, n, n2, ns, i;
    long besti;			/* 0 for TC, 1, 2, ... for FFT(K0*3^(bestK-1)) */
    long bestK;
    long K, K0 = 3;		/* try K0, 3*K0, 9*K0 */
    double T[4];		/* T[0] is for TC, T[1] for K0, T[2] for 3*K0, T[3] for 9*K0 */
    double t1[4], t2[4];
    unsigned long *a, *b, *c, *t, *u, *v;
    int nsz = 0;
    int tc_takes_too_long = 0;
    const char * reference = "TC";

    maxn = 1000000;		// default
    minn = GF2X_MUL_FFT_BEGIN_TUNE / 2 + 1;

    char * progname = argc ? argv[0] : "";

    argc--,argv++;
    for( ; argc ; argc--,argv++) {
        int r;

        if (strcmp(argv[0], "--help") == 0) {
            usage(0);
        }

        if (strcmp(argv[0], "--no-toom") == 0) {
            tc_takes_too_long = 1;
            reference = "F1(K0)";
            continue;
        }

        r = handle_tuning_mulstep(&argc, &argv);
        if (r < 0) usage(1); else if (r) continue;
        r = handle_tuning_outfile(&argc, &argv);
        if (r < 0) usage(1); else if (r) continue;

        if (strcmp(argv[0], "-k0") == 0) {
            argc--,argv++;
            if (! argc) usage(1);
            K0 = atoi(argv[0]);
            continue;
        }

        if (nsz == 0) {
            maxn = atoi(argv[0]);
            nsz++;
            continue;
        }
        if (nsz == 1) {
            minn = maxn;
            maxn = atoi(argv[0]);
            nsz++;
            continue;
        }
        usage(1);
    }

    if (nsz == 0)
        usage(1);

    set_tuning_output();

    {
	char date[40];
	time_t t;
	size_t u;
	struct utsname buf;
	time(&t);
	ctime_r(&t, date);
	u = strlen(date);
	for (; u && isspace(date[u - 1]); date[--u] = '\0');
	uname(&buf);

        /* strip the dirname */
        char * ptr = strrchr(progname, '/');
        if (ptr) {
            ptr++;
        } else {
            ptr = progname;
        }

        fprintf(rp, "info-fft \"%s -s %.2f %ld run on %s on %s ; based on %s\"\n",
                ptr,mulstep,maxn,buf.nodename,date,GF2X_TOOM_TUNING_INFO);
    }

    printf("Tuning FFT multiplication to wordsize %ld\n\n", maxn);

    a = (unsigned long *) malloc(maxn * sizeof(unsigned long));
    b = (unsigned long *) malloc(maxn * sizeof(unsigned long));
    c = (unsigned long *) malloc(2 * maxn * sizeof(unsigned long));
    u = (unsigned long *) malloc(2 * maxn * sizeof(unsigned long));
    v = (unsigned long *) malloc(2 * maxn * sizeof(unsigned long));
    t = (unsigned long *) malloc(gf2x_toomspace(maxn) * sizeof(unsigned long));

    random_wordstring(a, maxn);
    random_wordstring(b, maxn);

/* Skip n if (2*n < GF2X_MUL_FFT_BEGIN_TUNE) as this is too small for the FFT */


    for (n = minn; n <= maxn;) {
	n2 = next_step(n, 3 * K0);	// End of interval
	if (n2 > maxn)		// Only go as far
	    n2 = maxn;		// as maxn.
	mid = (n + n2) / 2;	// Mid-point
	printf("%ld..%ld ", n, n2);
	fflush(stdout);

        if (tc_takes_too_long) {
            T[0] = DBL_MAX;
        } else {
            TIME(T[0], gf2x_mul_toom(u, a, b, mid, t));	// Time Toom-Cook
            printf("TC:%1.1e ", T[0]);
        }
	fflush(stdout);
	besti = 0;
	bestK = 1;
        K = K0;
        i = 1;
ugly_label:
	for ( ; i <= 3; i++, K *= 3) {
	    TIME(t1[i], gf2x_mul_fft(c, a, mid, b, mid, K));
            if (tc_takes_too_long) {
                memcpy(u, c, 2 * maxn * sizeof(unsigned long));
            }
            check(a, mid, b, mid, reference, u, "F1", c);
            if (K >= GF2X_WORDSIZE) {
                TIME(t2[i], gf2x_mul_fft(v, a, mid, b, mid, -K));
                check(a, mid, b, mid, "F1", c, "F2", v);
            } else {
                t2[i] = DBL_MAX;
            }
	    if (t1[i] < t2[i]) {
		T[i] = t1[i];
		printf("F1(%ld):%1.1e ", K, T[i]);
	    } else {
		T[i] = t2[i];
		printf("F2(%ld):%1.1e ", K, T[i]);
	    }
	    fflush(stdout);
	    if (T[i] < T[besti]) {
		besti = i;
		bestK = (t2[i] > t1[i]) ? K : -K;	/* -K for FFT2(|K|) */
	    }
	}

	if (T[3] < T[1] && T[3] < T[2]) {
            if (besti) {
                if (besti == 1)
                    abort();
                besti--;
            }
	    K0 *= 3;
            /* K just stays as it was */
            i = 3;
            T[1] = T[2];
            T[2] = T[3];
            goto ugly_label;
            /* Notice that we can't loop forever here. If we have T[3] <
             * T[2], this will ensure T[2] < T[1] at the next turn,
             * thereby forcing the other case not to happen */
        } else if (T[1] < T[2] && T[1] < T[3] && K0 > 3) {
	    K0 /= 3;
        }

        /* OK, this stair is done */

	if (bestK == 1)
	    printf("TC");
	else {
	    if (bestK > 0)
		printf("F1(%ld)", bestK);
	    else
		printf("F2(%ld)", -bestK);
	}
	printf("\n");
	fflush(stdout);

        if (T[0] >= 4 * T[besti] && !tc_takes_too_long) {
            printf("TC is taking too long, disabling for next sizes\n");
            tc_takes_too_long = 1;
            reference = "F1(K0)";
        }

	/* go to next size */
	ns = n;
	n = next_step(n, 3 * K0);	/* middle value of K */
	if (n > n2)
	    n = n2;		/* end of last stair if K0 increased */
	n++;
	if (n < mid) {		/* redo the last stair if K0 decreased */
	    n = ns;
        } else {
            fprintf(rp, "fft %ld %ld\n",
                    ns == minn ? 1 : ns, ns == minn ? 1 : bestK);
        }
    }

    free(a);
    free(b);
    free(c);
    free(t);
    free(u);
    free(v);

    return 0;
}