int main(int argc, char **argv) { int j, k, t; /**< indices */ int d; /**< number of dimensions */ int N; /**< number of source nodes */ int M; /**< number of target nodes */ int n; /**< expansion degree */ int m; /**< cut-off parameter */ int p; /**< degree of smoothness */ const char *s; /**< name of kernel */ C (*kernel)(R, int, const R *); /**< kernel function */ R c; /**< parameter for kernel */ fastsum_plan my_fastsum_plan; /**< plan for fast summation */ C *direct; /**< array for direct computation */ ticks t0, t1; /**< for time measurement */ R time; /**< for time measurement */ R error = K(0.0); /**< for error computation */ R eps_I; /**< inner boundary */ R eps_B; /**< outer boundary */ FILE *fid1, *fid2; R temp; if (argc != 11) { printf("\nfastsum_test d N M n m p kernel c\n\n"); printf(" d dimension \n"); printf(" N number of source nodes \n"); printf(" M number of target nodes \n"); printf(" n expansion degree \n"); printf(" m cut-off parameter \n"); printf(" p degree of smoothness \n"); printf(" kernel kernel function (e.g., gaussian)\n"); printf(" c kernel parameter \n"); printf(" eps_I inner boundary \n"); printf(" eps_B outer boundary \n\n"); exit(-1); } else { d = atoi(argv[1]); N = atoi(argv[2]); c = K(1.0) / POW((R)(N), K(1.0) / ((R)(d))); M = atoi(argv[3]); n = atoi(argv[4]); m = atoi(argv[5]); p = atoi(argv[6]); s = argv[7]; c = (R)(atof(argv[8])); eps_I = (R)(atof(argv[9])); eps_B = (R)(atof(argv[10])); if (strcmp(s, "gaussian") == 0) kernel = gaussian; else if (strcmp(s, "multiquadric") == 0) kernel = multiquadric; else if (strcmp(s, "inverse_multiquadric") == 0) kernel = inverse_multiquadric; else if (strcmp(s, "logarithm") == 0) kernel = logarithm; else if (strcmp(s, "thinplate_spline") == 0) kernel = thinplate_spline; else if (strcmp(s, "one_over_square") == 0) kernel = one_over_square; else if (strcmp(s, "one_over_modulus") == 0) kernel = one_over_modulus; else if (strcmp(s, "one_over_x") == 0) kernel = one_over_x; else if (strcmp(s, "inverse_multiquadric3") == 0) kernel = inverse_multiquadric3; else if (strcmp(s, "sinc_kernel") == 0) kernel = sinc_kernel; else if (strcmp(s, "cosc") == 0) kernel = cosc; else if (strcmp(s, "cot") == 0) kernel = kcot; else { s = "multiquadric"; kernel = multiquadric; } } printf( "d=%d, N=%d, M=%d, n=%d, m=%d, p=%d, kernel=%s, c=%" __FGS__ ", eps_I=%" __FGS__ ", eps_B=%" __FGS__ " \n", d, N, M, n, m, p, s, c, eps_I, eps_B); /** init two dimensional fastsum plan */ fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, 0, n, m, p, eps_I, eps_B); /*fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, EXACT_NEARFIELD, n, m, p);*/ /** load source knots and coefficients */ fid1 = fopen("x.dat", "r"); fid2 = fopen("alpha.dat", "r"); for (k = 0; k < N; k++) { for (t = 0; t < d; t++) { fscanf(fid1, __FR__, &my_fastsum_plan.x[k * d + t]); } fscanf(fid2, __FR__, &temp); my_fastsum_plan.alpha[k] = temp; fscanf(fid2, __FR__, &temp); my_fastsum_plan.alpha[k] += temp * II; } fclose(fid1); fclose(fid2); /** load target knots */ fid1 = fopen("y.dat", "r"); for (j = 0; j < M; j++) { for (t = 0; t < d; t++) { fscanf(fid1, __FR__, &my_fastsum_plan.y[j * d + t]); } } fclose(fid1); /** direct computation */ printf("direct computation: "); fflush(NULL); t0 = getticks(); fastsum_exact(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** copy result */ direct = (C *) NFFT(malloc)((size_t)(my_fastsum_plan.M_total) * (sizeof(C))); for (j = 0; j < my_fastsum_plan.M_total; j++) direct[j] = my_fastsum_plan.f[j]; /** precomputation */ printf("pre-computation: "); fflush(NULL); t0 = getticks(); fastsum_precompute(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** fast computation */ printf("fast computation: "); fflush(NULL); t0 = getticks(); fastsum_trafo(&my_fastsum_plan); t1 = getticks(); time = NFFT(elapsed_seconds)(t1, t0); printf(__FI__ "sec\n", time); /** compute max error */ error = K(0.0); for (j = 0; j < my_fastsum_plan.M_total; j++) { if (CABS(direct[j] - my_fastsum_plan.f[j]) / CABS(direct[j]) > error) error = CABS(direct[j] - my_fastsum_plan.f[j]) / CABS(direct[j]); } printf("max relative error: " __FE__ "\n", error); /** write result to file */ fid1 = fopen("f.dat", "w+"); fid2 = fopen("f_direct.dat", "w+"); if (fid1 == NULL) { printf("Fehler!\n"); exit(EXIT_FAILURE); } for (j = 0; j < M; j++) { temp = CREAL(my_fastsum_plan.f[j]); fprintf(fid1, " % .16" __FES__ "", temp); temp = CIMAG(my_fastsum_plan.f[j]); fprintf(fid1, " % .16" __FES__ "\n", temp); temp = CREAL(direct[j]); fprintf(fid2, " % .16" __FES__ "", temp); temp = CIMAG(direct[j]); fprintf(fid2, " % .16" __FES__ "\n", temp); } fclose(fid1); fclose(fid2); /** finalise the plan */ fastsum_finalize(&my_fastsum_plan); return EXIT_SUCCESS; }
int main(int argc, char **argv) { int j,k,t; /**< indices */ int d; /**< number of dimensions */ int N; /**< number of source nodes */ int M; /**< number of target nodes */ int n; /**< expansion degree */ int m; /**< cut-off parameter */ int p; /**< degree of smoothness */ char *s; /**< name of kernel */ double _Complex (*kernel)(double , int , const double *); /**< kernel function */ double c; /**< parameter for kernel */ fastsum_plan my_fastsum_plan; /**< plan for fast summation */ double _Complex *direct; /**< array for direct computation */ ticks t0, t1; /**< for time measurement */ double time; /**< for time measurement */ double error=0.0; /**< for error computation */ double eps_I; /**< inner boundary */ double eps_B; /**< outer boundary */ if (argc!=11) { printf("\nfastsum_test d N M n m p kernel c eps_I eps_B\n\n"); printf(" d dimension \n"); printf(" N number of source nodes \n"); printf(" M number of target nodes \n"); printf(" n expansion degree \n"); printf(" m cut-off parameter \n"); printf(" p degree of smoothness \n"); printf(" kernel kernel function (e.g., gaussian)\n"); printf(" c kernel parameter \n"); printf(" eps_I inner boundary \n"); printf(" eps_B outer boundary \n\n"); exit(-1); } else { d=atoi(argv[1]); N=atoi(argv[2]); c=1.0/pow((double)N,1.0/(double)d); M=atoi(argv[3]); n=atoi(argv[4]); m=atoi(argv[5]); p=atoi(argv[6]); s=argv[7]; c=atof(argv[8]); eps_I=atof(argv[9]); eps_B=atof(argv[10]); if (strcmp(s,"gaussian")==0) kernel = gaussian; else if (strcmp(s,"multiquadric")==0) kernel = multiquadric; else if (strcmp(s,"inverse_multiquadric")==0) kernel = inverse_multiquadric; else if (strcmp(s,"logarithm")==0) kernel = logarithm; else if (strcmp(s,"thinplate_spline")==0) kernel = thinplate_spline; else if (strcmp(s,"one_over_square")==0) kernel = one_over_square; else if (strcmp(s,"one_over_modulus")==0) kernel = one_over_modulus; else if (strcmp(s,"one_over_x")==0) kernel = one_over_x; else if (strcmp(s,"inverse_multiquadric3")==0) kernel = inverse_multiquadric3; else if (strcmp(s,"sinc_kernel")==0) kernel = sinc_kernel; else if (strcmp(s,"cosc")==0) kernel = cosc; else if (strcmp(s,"cot")==0) kernel = kcot; else { s="multiquadric"; kernel = multiquadric; } } printf("d=%d, N=%d, M=%d, n=%d, m=%d, p=%d, kernel=%s, c=%g, eps_I=%g, eps_B=%g \n",d,N,M,n,m,p,s,c,eps_I,eps_B); #ifdef NF_KUB printf("nearfield correction using piecewise cubic Lagrange interpolation\n"); #elif defined(NF_QUADR) printf("nearfield correction using piecewise quadratic Lagrange interpolation\n"); #elif defined(NF_LIN) printf("nearfield correction using piecewise linear Lagrange interpolation\n"); #endif #ifdef _OPENMP #pragma omp parallel { #pragma omp single { printf("nthreads=%d\n", omp_get_max_threads()); } } fftw_init_threads(); #endif /** init d-dimensional fastsum plan */ fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, 0, n, m, p, eps_I, eps_B); //fastsum_init_guru(&my_fastsum_plan, d, N, M, kernel, &c, NEARFIELD_BOXES, n, m, p, eps_I, eps_B); if (my_fastsum_plan.flags & NEARFIELD_BOXES) printf("determination of nearfield candidates based on partitioning into boxes\n"); else printf("determination of nearfield candidates based on search tree\n"); /** init source knots in a d-ball with radius 0.25-eps_b/2 */ k = 0; while (k < N) { double r_max = 0.25 - my_fastsum_plan.eps_B/2.0; double r2 = 0.0; for (j=0; j<d; j++) my_fastsum_plan.x[k*d+j] = 2.0 * r_max * (double)rand()/(double)RAND_MAX - r_max; for (j=0; j<d; j++) r2 += my_fastsum_plan.x[k*d+j] * my_fastsum_plan.x[k*d+j]; if (r2 >= r_max * r_max) continue; k++; } for (k=0; k<N; k++) { /* double r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((double)rand()/(double)RAND_MAX,1.0/d); my_fastsum_plan.x[k*d+0] = r; for (j=1; j<d; j++) { double phi=2.0*KPI*(double)rand()/(double)RAND_MAX; my_fastsum_plan.x[k*d+j] = r; for (t=0; t<j; t++) { my_fastsum_plan.x[k*d+t] *= cos(phi); } my_fastsum_plan.x[k*d+j] *= sin(phi); } */ my_fastsum_plan.alpha[k] = (double)rand()/(double)RAND_MAX + _Complex_I*(double)rand()/(double)RAND_MAX; } /** init target knots in a d-ball with radius 0.25-eps_b/2 */ k = 0; while (k < M) { double r_max = 0.25 - my_fastsum_plan.eps_B/2.0; double r2 = 0.0; for (j=0; j<d; j++) my_fastsum_plan.y[k*d+j] = 2.0 * r_max * (double)rand()/(double)RAND_MAX - r_max; for (j=0; j<d; j++) r2 += my_fastsum_plan.y[k*d+j] * my_fastsum_plan.y[k*d+j]; if (r2 >= r_max * r_max) continue; k++; } /* for (k=0; k<M; k++) { double r=(0.25-my_fastsum_plan.eps_B/2.0)*pow((double)rand()/(double)RAND_MAX,1.0/d); my_fastsum_plan.y[k*d+0] = r; for (j=1; j<d; j++) { double phi=2.0*KPI*(double)rand()/(double)RAND_MAX; my_fastsum_plan.y[k*d+j] = r; for (t=0; t<j; t++) { my_fastsum_plan.y[k*d+t] *= cos(phi); } my_fastsum_plan.y[k*d+j] *= sin(phi); } } */ /** direct computation */ printf("direct computation: "); fflush(NULL); t0 = getticks(); fastsum_exact(&my_fastsum_plan); t1 = getticks(); time=nfft_elapsed_seconds(t1,t0); printf("%fsec\n",time); /** copy result */ direct = (double _Complex *)nfft_malloc(my_fastsum_plan.M_total*(sizeof(double _Complex))); for (j=0; j<my_fastsum_plan.M_total; j++) direct[j]=my_fastsum_plan.f[j]; /** precomputation */ printf("pre-computation: "); fflush(NULL); t0 = getticks(); fastsum_precompute(&my_fastsum_plan); t1 = getticks(); time=nfft_elapsed_seconds(t1,t0); printf("%fsec\n",time); /** fast computation */ printf("fast computation: "); fflush(NULL); t0 = getticks(); fastsum_trafo(&my_fastsum_plan); t1 = getticks(); time=nfft_elapsed_seconds(t1,t0); printf("%fsec\n",time); /** compute max error */ error=0.0; for (j=0; j<my_fastsum_plan.M_total; j++) { if (cabs(direct[j]-my_fastsum_plan.f[j])/cabs(direct[j])>error) error=cabs(direct[j]-my_fastsum_plan.f[j])/cabs(direct[j]); } printf("max relative error: %e\n",error); /** finalise the plan */ fastsum_finalize(&my_fastsum_plan); return 0; }
int bench_openmp(FILE *infile, int n, int m, int p, C (*kernel)(R, int, const R *), R c, R eps_I, R eps_B) { fastsum_plan my_fastsum_plan; int d, L, M; int t, j; R re, im; R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0); ticks t0, t1; R tt_total; fscanf(infile, "%d %d %d", &d, &L, &M); #ifdef _OPENMP FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_threads.plan"); #else FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_single.plan"); #endif fastsum_init_guru(&my_fastsum_plan, d, L, M, kernel, &c, NEARFIELD_BOXES, n, m, p, eps_I, eps_B); #ifdef _OPENMP FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_threads.plan"); #else FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_single.plan"); #endif for (j = 0; j < L; j++) { for (t = 0; t < d; t++) { R v; fscanf(infile, __FR__, &v); my_fastsum_plan.x[d * j + t] = v * r_max; } } for (j = 0; j < L; j++) { fscanf(infile, __FR__ " " __FR__, &re, &im); my_fastsum_plan.alpha[j] = re + II * im; } for (j = 0; j < M; j++) { for (t = 0; t < d; t++) { R v; fscanf(infile, __FR__, &v); my_fastsum_plan.y[d * j + t] = v * r_max; } } /** precomputation */ t0 = getticks(); fastsum_precompute(&my_fastsum_plan); /** fast computation */ fastsum_trafo(&my_fastsum_plan); t1 = getticks(); tt_total = NFFT(elapsed_seconds)(t1, t0); #ifndef MEASURE_TIME my_fastsum_plan.MEASURE_TIME_t[0] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[1] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[2] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[3] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[4] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[5] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[6] = K(0.0); my_fastsum_plan.MEASURE_TIME_t[7] = K(0.0); my_fastsum_plan.mv1.MEASURE_TIME_t[0] = K(0.0); my_fastsum_plan.mv1.MEASURE_TIME_t[2] = K(0.0); my_fastsum_plan.mv2.MEASURE_TIME_t[0] = K(0.0); my_fastsum_plan.mv2.MEASURE_TIME_t[2] = K(0.0); #endif #ifndef MEASURE_TIME_FFTW my_fastsum_plan.mv1.MEASURE_TIME_t[1] = K(0.0); my_fastsum_plan.mv2.MEASURE_TIME_t[1] = K(0.0); #endif printf( "%.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ "\n", my_fastsum_plan.MEASURE_TIME_t[0], my_fastsum_plan.MEASURE_TIME_t[1], my_fastsum_plan.MEASURE_TIME_t[2], my_fastsum_plan.MEASURE_TIME_t[3], my_fastsum_plan.MEASURE_TIME_t[4], my_fastsum_plan.MEASURE_TIME_t[5], my_fastsum_plan.MEASURE_TIME_t[6], my_fastsum_plan.MEASURE_TIME_t[7], tt_total - my_fastsum_plan.MEASURE_TIME_t[0] - my_fastsum_plan.MEASURE_TIME_t[1] - my_fastsum_plan.MEASURE_TIME_t[2] - my_fastsum_plan.MEASURE_TIME_t[3] - my_fastsum_plan.MEASURE_TIME_t[4] - my_fastsum_plan.MEASURE_TIME_t[5] - my_fastsum_plan.MEASURE_TIME_t[6] - my_fastsum_plan.MEASURE_TIME_t[7], tt_total, my_fastsum_plan.mv1.MEASURE_TIME_t[0], my_fastsum_plan.mv1.MEASURE_TIME_t[1], my_fastsum_plan.mv1.MEASURE_TIME_t[2], my_fastsum_plan.mv2.MEASURE_TIME_t[0], my_fastsum_plan.mv2.MEASURE_TIME_t[1], my_fastsum_plan.mv2.MEASURE_TIME_t[2]); fastsum_finalize(&my_fastsum_plan); return 0; }