static double domeasure (mpfr_prec_t *threshold, double (*func) (struct speed_params *), mpfr_prec_t p) { struct speed_params s; mp_size_t size; double t; s.align_xp = s.align_yp = s.align_wp = 64; s.size = p; size = (p - 1)/GMP_NUMB_BITS+1; s.xp = malloc (2*size*sizeof (mp_limb_t)); if (s.xp == NULL) { fprintf (stderr, "Can't allocate memory.\n"); abort (); } mpn_random (s.xp, size); s.yp = s.xp + size; mpn_random (s.yp, size); t = speed_measure (func, &s); if (t == -1.0) { fprintf (stderr, "Failed to measure function!\n"); abort (); } free (s.xp); return t; }
void run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) { const char *first_open_fastest, *first_open_notfastest, *first_close; int j,i, fastest, want_data; double fastest_time; TMP_DECL; TMP_MARK; /* allocate data, unless all routines are NODATA */ want_data = 0; for (i = 0; i < num_choices; i++) want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); if (want_data) { SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp); SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp); data_fill (s->xp, s->size); data_fill (s->yp, s->size); } else { sp.xp = NULL; sp.yp = NULL; } if (prev_size == -1 && option_cmp == CMP_DIFFPREV) { first_open_fastest = "(#"; first_open_notfastest = " ("; first_close = ")"; } else { first_open_fastest = "#"; first_open_notfastest = " "; first_close = ""; } fastest = -1; fastest_time = -1.0; for (i = 0; i < num_choices; i++) { if( choice[i].nsum!=0)continue; s->r = choice[i].r; if( choice[i].colfile==-1) {choice[i].time = speed_measure (choice[i].p->fun, s);} else {FILE *fp;char buf[1024],buf2[1024],*p;int got=0; choice[i].time=-1.0; fp=fopen(choice[i].filename,"rt"); if(fp==0){printf("Cant open %s\n",choice[i].filename);exit(1);} while(fgets(buf,1024,fp)!=0) if(atoi(buf)==s->size) {p=buf; for(j=0;j<=choice[i].colfile;j++) {if(sscanf(p," %s",buf2)!=1)break; p=strstr(p,buf2)+strlen(buf2);} if(j==choice[i].colfile+1) {while((p=strstr(buf2,"#"))!=0)*p=' ';// exclude # choice[i].time=atof(buf2); } break;} fclose(fp); } choice[i].no_time = (choice[i].time == -1.0); if (! choice[i].no_time) choice[i].time *= choice[i].scale; /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time is before any differences. */ if(choice[i].colfile==-1) { double t; t = choice[i].time; if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) { if (choice[i].prev_time == -1.0) choice[i].no_time = 1; else choice[i].time = choice[i].time - choice[i].prev_time; } choice[i].prev_time = t; } } for (i = 0; i < num_choices; i++) {if(choice[i].nsum==0)continue; choice[i].time=0;choice[i].no_time=0; for(j=0;j<choice[i].nsum;j++) {choice[i].time+=choice[choice[i].sum[j]].time; if(choice[choice[i].sum[j]].no_time)choice[i].no_time=1; } } for (i = 0; i < num_choices; i++) { if (choice[i].no_time || choice[i].colfile!=-1) continue; if (option_cmp == CMP_DIFFPREV) { /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ if (option_unit == UNIT_CYCLES) choice[i].time /= speed_cycletime; else if (option_unit == UNIT_CYCLESPERLIMB) { if (prev_size == -1) choice[i].time /= speed_cycletime; else choice[i].time /= (speed_cycletime * (SIZE_TO_DIVISOR(s->size) - SIZE_TO_DIVISOR(prev_size))); } } else { if (option_unit == UNIT_CYCLES) choice[i].time /= speed_cycletime; else if (option_unit == UNIT_CYCLESPERLIMB) choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); } } for (i = 0; i < num_choices; i++) { if (choice[i].no_time) continue; /* Look for the fastest after CMP_DIFFPREV has been applied, but before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown if there's more than one routine. */ for(j=0;j<xcoln;j++)if(xcol[j]==i)break; // excluded from fastest choice if (j==xcoln && num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) { fastest = i; fastest_time = choice[i].time; } } for (i = 0; i < num_choices; i++) { if (choice[i].no_time ) continue; if (option_cmp != CMP_DIFFPREV) { if (option_cmp == CMP_RATIO && i != option_cmp_pos) { /* A ratio isn't affected by the units chosen. */ if (choice[option_cmp_pos].no_time || choice[option_cmp_pos].time == 0.0) choice[i].no_time = 1; else choice[i].time /= choice[option_cmp_pos].time; } else if (option_cmp == CMP_DIFFERENCE && i != option_cmp_pos) { if (choice[option_cmp_pos].no_time) { choice[i].no_time = 1; continue; } choice[i].time -= choice[option_cmp_pos].time; } } } if (option_gnuplot) { /* In CMP_DIFFPREV, don't print anything for the first size, start with the second where an actual difference is available. In CMP_RATIO, print the "first" ie option_cmp_pos column as 1.0. The 9 decimals printed is much more than the expected precision of the measurements actually. */ if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) { fprintf (fp, "%-6ld ", s->size); for (i = 0; i < num_choices; i++) fprintf (fp, " %.9e", choice[i].no_time ? 0.0 : (option_cmp == CMP_RATIO && i == option_cmp_pos) ? 1.0 : choice[i].time); fprintf (fp, "\n"); } } else { fprintf (fp, "%-6ld ", s->size); for (i = 0; i < num_choices; i++) { char buf[128]; int decimals; if (choice[i].no_time) { fprintf (fp, " %*s", COLUMN_WIDTH, "n/a"); } else {if (option_unit == UNIT_CYCLESPERLIMB || (option_cmp == CMP_RATIO && i != option_cmp_pos)) decimals = 4; else if (option_unit == UNIT_CYCLES) decimals = 2; else decimals = 9; sprintf (buf, "%s%.*f%s", i == fastest ? first_open_fastest : first_open_notfastest, decimals, choice[i].time, first_close); fprintf (fp, " %*s", COLUMN_WIDTH, buf); } } fprintf (fp, "\n"); } TMP_FREE; }
int main (int argc, char *argv[]) { int i; int opt; /* Unbuffered so output goes straight out when directed to a pipe or file and isn't lost on killing the program half way. */ setbuf (stdout, NULL); for (;;) { #if _GNU_SOURCE opt = getopt(argc, argv, "a:CcDd::EFf:o:p:P:r::Rs:t:ux:y:w:W:z"); #else opt = getopt(argc, argv, "a:CcDd:EFf:o:p:P:r:Rs:t:ux:y:w:W:z"); #endif if (opt == EOF) break; switch (opt) { case 'a': if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM; else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2; else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS; else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; else { fprintf (stderr, "unrecognised data option: %s\n", optarg); exit (1); } break; case 'C': if (option_unit != UNIT_SECONDS) goto bad_unit; option_unit = UNIT_CYCLESPERLIMB; break; case 'c': if (option_unit != UNIT_SECONDS) { bad_unit: fprintf (stderr, "cannot use more than one of -c, -C\n"); exit (1); } option_unit = UNIT_CYCLES; break; case 'D': if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; option_cmp = CMP_DIFFPREV; break; case 'd': if (option_cmp != CMP_ABSOLUTE) { bad_cmp: fprintf (stderr, "cannot use more than one of -d, -D, -r\n"); exit (1); } option_cmp = CMP_DIFFERENCE; option_cmp_pos=0; if(optarg!=0)option_cmp_pos=atoi(optarg)-1; break; case 'E': option_square = 1; break; case 'F': option_square = 2; break; case 'f': option_factor = atof (optarg); if (option_factor <= 1.0) { fprintf (stderr, "-f factor must be > 1.0\n"); exit (1); } break; case 'o': speed_option_set (optarg); break; case 'P': option_gnuplot = 1; option_gnuplot_basename = optarg; break; case 'p': speed_precision = atoi (optarg); break; case 'R': option_seed = time (NULL); break; case 'r': if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; option_cmp = CMP_RATIO; option_cmp_pos=0; if(optarg!=0)option_cmp_pos = atoi(optarg)-1; break; case 's': { char *s; for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ",")) { if (size_num == size_allocnum) { size_array = (struct size_array_t *) __gmp_allocate_or_reallocate (size_array, size_allocnum * sizeof(size_array[0]), (size_allocnum+10) * sizeof(size_array[0])); size_allocnum += 10; } size_array[size_num].inc = 0; if (sscanf (s, "%ld(%ld)%ld", &size_array[size_num].start, &size_array[size_num].inc, &size_array[size_num].end) != 3) { if (sscanf (s, "%ld-%ld", &size_array[size_num].start, &size_array[size_num].end) != 2) { size_array[size_num].start = size_array[size_num].end = atol (s); } } if (size_array[size_num].start < 0 || size_array[size_num].end < 0 || size_array[size_num].start > size_array[size_num].end) { fprintf (stderr, "invalid size parameter: %s\n", s); exit (1); } size_num++; } } break; case 't': option_step = atol (optarg); if (option_step < 1) { fprintf (stderr, "-t step must be >= 1\n"); exit (1); } break; case 'u': option_resource_usage = 1; break; case 'z': sp.cache = 1; break; case 'x': sp.align_xp = atol (optarg); check_align_option ("-x", sp.align_xp); break; case 'y': sp.align_yp = atol (optarg); check_align_option ("-y", sp.align_yp); break; case 'w': sp.align_wp = atol (optarg); check_align_option ("-w", sp.align_wp); break; case 'W': sp.align_wp2 = atol (optarg); check_align_option ("-W", sp.align_wp2); break; case '?': exit(1); } } if (optind >= argc) { usage (); exit (1); } if (size_num == 0) { fprintf (stderr, "-s <size> must be specified\n"); exit (1); } gmp_randinit_default (__gmp_rands); __gmp_rands_initialized = 1; gmp_randseed_ui (__gmp_rands, option_seed); choice = (struct choice_t *) (*__gmp_allocate_func) ((argc - optind) * sizeof(choice[0])); for ( ; optind < argc; optind++) { struct choice_t c; routine_find (&c, argv[optind]); choice[num_choices] = c; num_choices++; } if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) && num_choices < 2) { fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n"); } speed_time_init (); if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB) speed_cycletime_need_cycles (); else speed_cycletime_need_seconds (); if (option_gnuplot) { run_gnuplot (argc, argv); } else { if (option_unit == UNIT_SECONDS) printf ("overhead %.9f secs", speed_measure (speed_noop, NULL)); else printf ("overhead %.2f cycles", speed_measure (speed_noop, NULL) / speed_cycletime); printf (", precision %d units of %.2e secs", speed_precision, speed_unittime); if (speed_cycletime == 1.0 || speed_cycletime == 0.0) printf (", CPU freq unknown\n"); else printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime); printf (" "); for (i = 0; i < num_choices; i++) printf (" %*s", COLUMN_WIDTH, choice[i].name); printf ("\n"); run_all (stdout); } if (option_resource_usage) { #if HAVE_GETRUSAGE { /* This doesn't give data sizes on linux 2.0.x, only utime. */ struct rusage r; if (getrusage (RUSAGE_SELF, &r) != 0) perror ("getrusage"); else printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n", r.ru_utime.tv_sec, r.ru_utime.tv_usec, r.ru_idrss, r.ru_isrss, r.ru_ixrss); } #else printf ("getrusage() not available\n"); #endif /* Linux kernel. */ { char buf[128]; sprintf (buf, "/proc/%d/status", getpid()); if (access (buf, R_OK) == 0) { sprintf (buf, "cat /proc/%d/status", getpid()); system (buf); } } } return 0; }
void generate_2D_sample (FILE *output, struct speed_params2D param) { mpfr_t temp; double incr_prec; mpfr_t incr_x; mpfr_t x, x2; double prec; struct speed_params s; int i; int test; int nb_functions; double *t; /* store the timing of each implementation */ /* We first determine how many implementations we have */ nb_functions = 0; while (param.speed_funcs[nb_functions] != NULL) nb_functions++; t = malloc (nb_functions * sizeof (double)); if (t == NULL) { fprintf (stderr, "Can't allocate memory.\n"); abort (); } mpfr_init2 (temp, MPFR_SMALL_PRECISION); /* The precision is sampled from min_prec to max_prec with */ /* approximately nb_points_prec points. If logarithmic_scale_prec */ /* is true, the precision is multiplied by incr_prec at each */ /* step. Otherwise, incr_prec is added at each step. */ if (param.logarithmic_scale_prec) { mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU); mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU); mpfr_root (temp, temp, (unsigned long int)param.nb_points_prec, MPFR_RNDU); incr_prec = mpfr_get_d (temp, MPFR_RNDU); } else { incr_prec = (double)param.max_prec - (double)param.min_prec; incr_prec = incr_prec/((double)param.nb_points_prec); } /* The points x are sampled according to the following rule: */ /* If logarithmic_scale_x = 0: */ /* nb_points_x points are equally distributed between min_x and max_x */ /* If logarithmic_scale_x = 1: */ /* nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At */ /* each step, the current point is multiplied by incr_x. */ /* If logarithmic_scale_x = -1: */ /* nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x) */ /* (at each step, the current point is divided by incr_x); and */ /* nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x) */ /* (at each step, the current point is multiplied by incr_x). */ mpfr_init2 (incr_x, param.max_prec); if (param.logarithmic_scale_x == 0) { mpfr_set_d (incr_x, (param.max_x - param.min_x)/(double)param.nb_points_x, MPFR_RNDU); } else if (param.logarithmic_scale_x == -1) { mpfr_set_d (incr_x, 2.*(param.max_x - param.min_x)/(double)param.nb_points_x, MPFR_RNDU); mpfr_exp2 (incr_x, incr_x, MPFR_RNDU); } else { /* other values of param.logarithmic_scale_x are considered as 1 */ mpfr_set_d (incr_x, (param.max_x - param.min_x)/(double)param.nb_points_x, MPFR_RNDU); mpfr_exp2 (incr_x, incr_x, MPFR_RNDU); } /* Main loop */ mpfr_init2 (x, param.max_prec); mpfr_init2 (x2, param.max_prec); prec = (double)param.min_prec; while (prec <= param.max_prec) { printf ("prec = %d\n", (int)prec); if (param.logarithmic_scale_x == 0) mpfr_set_d (temp, param.min_x, MPFR_RNDU); else if (param.logarithmic_scale_x == -1) { mpfr_set_d (temp, param.max_x, MPFR_RNDD); mpfr_exp2 (temp, temp, MPFR_RNDD); mpfr_neg (temp, temp, MPFR_RNDU); } else { mpfr_set_d (temp, param.min_x, MPFR_RNDD); mpfr_exp2 (temp, temp, MPFR_RNDD); } /* We perturb x a little bit, in order to avoid trailing zeros that */ /* might change the behavior of algorithms. */ mpfr_const_pi (x, MPFR_RNDN); mpfr_div_2ui (x, x, 7, MPFR_RNDN); mpfr_add_ui (x, x, 1, MPFR_RNDN); mpfr_mul (x, x, temp, MPFR_RNDN); test = 1; while (test) { mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN)); mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec); s.r = (mp_limb_t)mpfr_get_exp (x); s.size = (mpfr_prec_t)prec; s.align_xp = (mpfr_sgn (x) > 0)?1:2; mpfr_set_prec (x2, (mpfr_prec_t)prec); mpfr_set (x2, x, MPFR_RNDU); s.xp = x2->_mpfr_d; for (i=0; i<nb_functions; i++) { t[i] = speed_measure (param.speed_funcs[i], &s); mpfr_fprintf (output, "%e\t", t[i]); } fprintf (output, "%d\n", 1 + find_best (t, nb_functions)); if (param.logarithmic_scale_x == 0) { mpfr_add (x, x, incr_x, MPFR_RNDU); if (mpfr_cmp_d (x, param.max_x) > 0) test=0; } else { if (mpfr_sgn (x) < 0 ) { /* if x<0, it means that logarithmic_scale_x=-1 */ mpfr_div (x, x, incr_x, MPFR_RNDU); mpfr_abs (temp, x, MPFR_RNDD); mpfr_log2 (temp, temp, MPFR_RNDD); if (mpfr_cmp_d (temp, param.min_x) < 0) mpfr_neg (x, x, MPFR_RNDN); } else { mpfr_mul (x, x, incr_x, MPFR_RNDU); mpfr_set (temp, x, MPFR_RNDD); mpfr_log2 (temp, temp, MPFR_RNDD); if (mpfr_cmp_d (temp, param.max_x) > 0) test=0; } } } prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec) : (prec + incr_prec) ); fprintf (output, "\n"); } free (t); mpfr_clear (incr_x); mpfr_clear (x); mpfr_clear (x2); mpfr_clear (temp); return; }
void run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) { const char *first_open_fastest, *first_open_notfastest, *first_close; int i, fastest, want_data; double fastest_time; TMP_DECL; TMP_MARK; /* allocate data, unless all routines are NODATA */ want_data = 0; for (i = 0; i < num_choices; i++) want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); if (want_data) { SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp); SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp); data_fill (s->xp, s->size); data_fill (s->yp, s->size); } else { sp.xp = NULL; sp.yp = NULL; } if (prev_size == -1 && option_cmp == CMP_DIFFPREV) { first_open_fastest = "(#"; first_open_notfastest = " ("; first_close = ")"; } else { first_open_fastest = "#"; first_open_notfastest = " "; first_close = ""; } fastest = -1; fastest_time = -1.0; for (i = 0; i < num_choices; i++) { s->r = choice[i].r; choice[i].time = speed_measure (choice[i].p->fun, s); choice[i].no_time = (choice[i].time == -1.0); if (! choice[i].no_time) choice[i].time *= choice[i].scale; /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time is before any differences. */ { double t; t = choice[i].time; if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) { if (choice[i].prev_time == -1.0) choice[i].no_time = 1; else choice[i].time = choice[i].time - choice[i].prev_time; } choice[i].prev_time = t; } if (choice[i].no_time) continue; /* Look for the fastest after CMP_DIFFPREV has been applied, but before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown if there's more than one routine. */ if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) { fastest = i; fastest_time = choice[i].time; } if (option_cmp == CMP_DIFFPREV) { /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ if (option_unit == UNIT_CYCLES) choice[i].time /= speed_cycletime; else if (option_unit == UNIT_CYCLESPERLIMB) { if (prev_size == -1) choice[i].time /= speed_cycletime; else choice[i].time /= (speed_cycletime * (SIZE_TO_DIVISOR(s->size) - SIZE_TO_DIVISOR(prev_size))); } } else { if (option_unit == UNIT_CYCLES) choice[i].time /= speed_cycletime; else if (option_unit == UNIT_CYCLESPERLIMB) choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); if (option_cmp == CMP_RATIO && i > 0) { /* A ratio isn't affected by the units chosen. */ if (choice[0].no_time || choice[0].time == 0.0) choice[i].no_time = 1; else choice[i].time /= choice[0].time; } else if (option_cmp == CMP_DIFFERENCE && i > 0) { if (choice[0].no_time) { choice[i].no_time = 1; continue; } choice[i].time -= choice[0].time; } } } if (option_gnuplot) { /* In CMP_DIFFPREV, don't print anything for the first size, start with the second where an actual difference is available. In CMP_RATIO, print the first column as 1.0. The 9 decimals printed is much more than the expected precision of the measurements actually. */ if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) { fprintf (fp, "%-6ld ", s->size); for (i = 0; i < num_choices; i++) fprintf (fp, " %.9e", choice[i].no_time ? 0.0 : (option_cmp == CMP_RATIO && i == 0) ? 1.0 : choice[i].time); fprintf (fp, "\n"); } } else { fprintf (fp, "%-6ld ", s->size); for (i = 0; i < num_choices; i++) { char buf[128]; int decimals; if (choice[i].no_time) { fprintf (fp, " %*s", COLUMN_WIDTH, "n/a"); } else {if (option_unit == UNIT_CYCLESPERLIMB || (option_cmp == CMP_RATIO && i > 0)) decimals = 4; else if (option_unit == UNIT_CYCLES) decimals = 2; else decimals = 9; sprintf (buf, "%s%.*f%s", i == fastest ? first_open_fastest : first_open_notfastest, decimals, choice[i].time, first_close); fprintf (fp, " %*s", COLUMN_WIDTH, buf); } } fprintf (fp, "\n"); } TMP_FREE; }