void CompareLocale() { json::Value output; for (auto& pair : compareList) { json::Value lhs, rhs; File flhs("locale" / pair.name); File frhs("locale_diff" / pair.name); if (!frhs) continue; if (flhs) json::parse(flhs, lhs, json::mJSCall); json::parse(frhs, rhs, json::mJSCall); pair.func(lhs, rhs, output); } json::write(File("locale_diff/stringlist.js", "w"), output, json::mJSON); }
void main(int argc, char** argv) { //int k = atoi(argv[1]); //int N = pow(2,k); int N=1024; int k=10; float * a = (float *) malloc(sizeof(float)*N* N * 2); float * b = (float *) malloc(sizeof(float) *N*N * 2); float * c = (float *) malloc(sizeof(float) * N*N* 2); float p = 2*M_PI ; for (int i =0; i< N*N; i++) { a[2*i] = 1; a[2*i+1] = 0; b[2*i] = 1; b[2*i+1] = 0; } #if 0 srand(1); for(int i =0;i<N*N;i++) { a[2*i]=sin(i%N *2 *M_PI); //printf("%f\n",uu[2*i]); a[2*i+1] =0 ; } #endif print_platforms_devices(); cl_context ctx; cl_command_queue queue; create_context_on("NVIDIA","GeForce GTX 590",0,&ctx,&queue,0); cl_int status; cl_mem buf_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(float) *N *N* 2 , 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(float) * N *N* 2 , 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_c = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(float) * N *N* 2 , 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_d = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(float)*N *N* 2 , 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_e = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(float) *N *N* 2 , 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_f = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(float) *N *N* 2 , 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_g = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(float) *N *N* 2 , 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); CALL_CL_GUARDED(clEnqueueWriteBuffer, ( queue, buf_a, /*blocking*/ CL_TRUE, /*offset*/ 0, sizeof(float) *N*N*2, a, 0, NULL, NULL)); CALL_CL_GUARDED(clEnqueueWriteBuffer, ( queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0, sizeof(float) *N *N* 2, b, 0, NULL, NULL)); CALL_CL_GUARDED(clEnqueueWriteBuffer, ( queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0, sizeof(float) *N* N*2, c, 0, NULL, NULL)); char *knl_text = read_file("vec_add.cl"); cl_kernel vec_add = kernel_from_string(ctx, knl_text, "sum", NULL); free(knl_text); knl_text = read_file("mat_etr_mul.cl"); cl_kernel mat_etr_mul = kernel_from_string(ctx, knl_text, "mult", NULL); free(knl_text); knl_text = read_file("radix-4-float.cl"); cl_kernel fft1D = kernel_from_string(ctx, knl_text, "fft1D", NULL); free(knl_text); knl_text = read_file("radix-4-init.cl"); cl_kernel fft_init = kernel_from_string(ctx, knl_text, "fft1D_init", NULL); free(knl_text); knl_text = read_file("radix-4-interm.cl"); cl_kernel fft_interm = kernel_from_string(ctx, knl_text, "fft1D", NULL); free(knl_text); knl_text = read_file("transpose-soln-gpu.cl"); cl_kernel mat_trans = kernel_from_string(ctx, knl_text, "transpose", NULL); free(knl_text); knl_text = read_file("radix-4-modi.cl"); cl_kernel fft_init_w = kernel_from_string(ctx, knl_text, "fft1D_init", NULL); free(knl_text); knl_text = read_file("vec_zero.cl"); cl_kernel vec_zero = kernel_from_string(ctx, knl_text, "zero", NULL); free(knl_text); knl_text = read_file("reduction.cl"); cl_kernel reduct_mul = kernel_from_string(ctx, knl_text, "reduction_mult", NULL); free(knl_text); knl_text = read_file("reduction1D.cl"); cl_kernel reduct = kernel_from_string(ctx, knl_text, "reduction", NULL); free(knl_text); knl_text = read_file("reduction-init.cl"); cl_kernel reduct_init = kernel_from_string(ctx, knl_text, "reduction_init", NULL); free(knl_text); knl_text = read_file("reduct-energy.cl"); cl_kernel reduct_eng = kernel_from_string(ctx, knl_text, "reduction_eng", NULL); free(knl_text); knl_text = read_file("resid.cl"); cl_kernel resid = kernel_from_string(ctx, knl_text, "resid", NULL); free(knl_text); knl_text = read_file("resid-init.cl"); cl_kernel resid_init = kernel_from_string(ctx, knl_text, "resid_init", NULL); free(knl_text); knl_text = read_file("radix-4-big.cl"); cl_kernel fft_big = kernel_from_string(ctx, knl_text, "fft1D_big", NULL); free(knl_text); knl_text = read_file("radix-4-big-clean.cl"); cl_kernel fft_clean = kernel_from_string(ctx, knl_text, "fft1D_clean", NULL); free(knl_text); knl_text = read_file("radix-4-2D.cl"); cl_kernel fft_2D = kernel_from_string(ctx, knl_text, "fft2D_big", NULL); free(knl_text); knl_text = read_file("radix-4-2D-clean.cl"); cl_kernel fft_2D_clean = kernel_from_string(ctx, knl_text, "fft2D_clean", NULL); free(knl_text); knl_text = read_file("mat-trans-3D.cl"); cl_kernel mat_trans_3D = kernel_from_string(ctx, knl_text, "transpose_3D", NULL); free(knl_text); int Ns =1 ; int direction = 1; timestamp_type time1, time2; struct parameter param; param.N = N; param.epsilon = 0.1; param.s =1; float kk =1e-4; param.h = 2*PI/N; param.N = N; param.maxCG = 1000; param.maxN = 5; //Minimum and starting time step float mink = 1e-7; float startk = 1e-4; // Tolerances param.Ntol = 1e-4; param.cgtol = 1e-7; float ksafety = 0.8; float kfact = 1.3; float kfact2 = 1/1.3; float Nfact = 0.7; float CGfact = 0.7; double elapsed ; CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time1); //for(int s=0;s<100;s++) //fft_1D_big(buf_a,buf_b,buf_c,N,fft_big,fft_clean,mat_trans,queue,direction,0); //fft_1D_new(buf_a,buf_b,buf_c,N,fft_init,fft_interm, fft1D,queue,direction,0); //fft_1D(buf_a,buf_b,buf_c,N,fft_init, fft1D,queue,direction,0); //fft2D(buf_a,buf_b,buf_c,buf_d,N,fft_init,fft1D,mat_trans,queue, 1); //fft2D_new(buf_a,buf_b,buf_c,buf_d,N,fft_init,fft_interm,fft1D,mat_trans,queue, 1); //fft2D_big(buf_a,buf_b,buf_c,buf_d,N,fft_big,fft_clean,mat_trans,queue,direction); //fft2D_big_new(buf_a,buf_b,buf_c,buf_d,N,fft_2D,fft_2D_clean, //mat_trans,mat_trans_3D,queue,direction); //fft_w(buf_a,buf_b,buf_c,buf_d,buf_e,N,0.1,0,1,fft_init_w,fft_init,fft1D,mat_trans,queue); #if 0 frhs(buf_a,buf_b,buf_c,buf_d,buf_e,¶m,fft1D_init,fft1D,mat_trans, vec_add, queue); #endif #if 0 float E1 = energy(buf_a, buf_b, buf_c,buf_d, buf_e,buf_f,1e-4, ¶m, fft_init,fft1D,mat_trans,reduct_eng, reduct,queue); #endif //float reside = residual(buf_a,buf_b,resid,resid_init,queue,N*N); /*fft_d_q(buf_a,buf_b,buf_c,buf_d, N,0.1,k ,1, fft1D_init, fft1D,mat_trans,queue);*/ //for(int j= 0;j<N;j++) //{ //fft_1D_w_orig(buf_a,buf_b,buf_c,N,fft1D_init,fft1D,queue,1,j); //} //fft_shar(buf_a,buf_b,buf_c,buf_d,N,0.1,0,1,fft1D_init,fft1D,mat_trans,queue); //mat__trans(buf_a,buf_b,N,mat_trans,queue,4,0.1,0,1); //double elapsed = reduction_mult(buf_a, buf_b,buf_c,N*N,reduct_mul,reduct,queue); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time1); fft_1D_big(buf_a,buf_b,buf_c,N*N,fft_big,fft_clean,mat_trans,queue,direction,0); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time2); elapsed = timestamp_diff_in_seconds(time1,time2); printf("Hierarchy 1D FFT of size %d array on gpu takes %f s\n", N*N,elapsed); printf("achieve %f GFLOPS \n",6*2*N*N*k/elapsed*1e-9); printf("---------------------------------------------\n"); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time1); fft2D(buf_a,buf_b,buf_c,buf_d,N,fft_init,fft1D,mat_trans,queue, 1); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time2); elapsed = timestamp_diff_in_seconds(time1,time2); printf("Navie 2D FFT of size %d * %d matrix on gpu takes %f s\n", N,N,elapsed); printf("achieve %f GFLOPS \n",6*2*N*N*k/elapsed*1e-9); printf("---------------------------------------------\n"); //printf("data access from global achieve %f GB/s\n",sizeof(float)*2*16*N*N/elapsed*1e-9); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time1); fft2D_new(buf_a,buf_b,buf_c,buf_d,N,fft_init,fft_interm,fft1D,mat_trans,queue, 1); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time2); elapsed = timestamp_diff_in_seconds(time1,time2); printf("local data exchange 2D FFT of size %d * %d matrix on gpu takes %f s\n", N,N,elapsed); printf("achieve %f GFLOPS \n",6*2*N*N*k/elapsed*1e-9); printf("---------------------------------------------\n"); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time1); fft2D_big(buf_a,buf_b,buf_c,buf_d,N,fft_big,fft_clean,mat_trans,queue,direction); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time2); elapsed = timestamp_diff_in_seconds(time1,time2); printf("Hierarchy 2D FFT of size %d * %d matrix on gpu takes %f s\n", N,N,elapsed); printf("achieve %f GFLOPS \n",6*2*N*N*k/elapsed*1e-9); printf("---------------------------------------------\n"); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time1); fft2D_big_new(buf_a,buf_b,buf_c,buf_d,N,fft_2D,fft_2D_clean, mat_trans,mat_trans_3D,queue,direction); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time2); elapsed = timestamp_diff_in_seconds(time1,time2); printf("Using 2D kernel 2D FFT of size %d * %d matrix on gpu takes %f s\n", N,N,elapsed); printf("achieve %f GFLOPS \n",6*2*N*N*k/elapsed*1e-9); printf("---------------------------------------------\n"); get_timestamp(&time1); direction = -1; //fft_1D(buf_b,buf_c,buf_d,N,fft_init, fft1D,queue,direction,0); fft2D(buf_b,buf_c,buf_d,buf_e,N,fft_init,fft1D,mat_trans,queue, direction); //fft2D_new(buf_b,buf_c,buf_e,buf_d,N,fft_init,fft_interm,fft1D,mat_trans,queue, -1); //fft2D_big(buf_b,buf_c,buf_d,buf_e,N,fft_big,fft_clean,mat_trans,queue,direction); CALL_CL_GUARDED(clFinish, (queue)); get_timestamp(&time2); elapsed = timestamp_diff_in_seconds(time1,time2); //printf("1D inverse %f s\n", elapsed); #if 0 float test; CALL_CL_GUARDED(clFinish, (queue)); CALL_CL_GUARDED(clEnqueueReadBuffer, ( queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0, sizeof(float), &test, 0, NULL, NULL)); printf("test success and %f \n",test); #endif #if 0 CALL_CL_GUARDED(clFinish, (queue)); CALL_CL_GUARDED(clEnqueueReadBuffer, ( queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0, 2*N*N* sizeof(float), c, 0, NULL, NULL)); /*for(int i =0; i< N; i++) { printf("a%f+ i*",a[2*i]); printf("%f\n",a[2*i+1]); }*/ int T = 10<N? 10:N ; for(int i =0; i< T; i++) { printf("%f + i*",a[2*i]); printf("%f\t",a[2*i+1]); printf("%f + i*",c[2*i]); printf("%f\n",c[2*i+1]); } #endif /* for( Ns = 1;Ns < N; Ns *= 2 ) { for (int j = 0; j<N/2; j++) { fftiteration(j,N,Ns,a,b); } float * d; d = a ; a = b; b = d; //printf("ok\n"); } */ CALL_CL_GUARDED(clReleaseMemObject, (buf_a)); CALL_CL_GUARDED(clReleaseMemObject, (buf_b)); CALL_CL_GUARDED(clReleaseMemObject, (buf_c)); CALL_CL_GUARDED(clReleaseMemObject, (buf_d)); CALL_CL_GUARDED(clReleaseMemObject, (buf_e)); CALL_CL_GUARDED(clReleaseKernel, (fft1D)); CALL_CL_GUARDED(clReleaseKernel, (fft_init)); CALL_CL_GUARDED(clReleaseKernel, (vec_add)); CALL_CL_GUARDED(clReleaseKernel, (reduct_mul)); CALL_CL_GUARDED(clReleaseKernel, (reduct)); CALL_CL_GUARDED(clReleaseKernel, (mat_trans)); CALL_CL_GUARDED(clReleaseCommandQueue, (queue)); CALL_CL_GUARDED(clReleaseContext, (ctx)); }
/*---------------------------------RK5------------------------------*/ double ode5(double yold[], double ynew[], double &h, double t, //&h - Call by reference - Increments h in main int numberofequations, void frhs(double[], double, double[])) { int i; double *k1, *k2, *k3, *k4, *k5, *k6, *temp, *ynewstar; //*yerror; //pointers to arrays //Values from Numerical Recipes double c2 = 0.2, c3 = 0.3, c4 = 0.8, c5 = 8.0 / 9.0, //Constants a21 = 0.2, //Coefficients a31 = 3.0 / 40.0, a32 = 9.0 / 40.0, a41 = 44.0 / 45.0, a42 = -56.0 / 15.0, a43 = 32.0 / 9.0, a51 = 19372.0 / 6561.0, a52 = -25360.0 / 2187.0, a53 = 64448.0 / 6561.0, a54 = -212.0 / 729.0, a61 = 9017.0 / 3168.0, a62 = -355.0 / 33.0, a63 = 46732.0 / 5247.0, a64 = 49.0 / 176.0, a65 = -5103.0 / 18656.0, a71 = 35.0 / 384.0, a73 = 500.0 / 1113.0, a74 = 125.0 / 192.0, a75 = -2187.0 / 6784.0, a76 = 11.0 / 84.0; /*Error value coefficients*/ double e1 = 71.0 / 57600.0, e3 = -71.0 / 16695.0, e4 = 71.0 / 1920.0, e5 = -17253.0 / 339200.0, e6 = 22.0 / 525.0, e7 = -1.0 / 40.0; double *error, *scale; //Scale now double hmin = 1.0e-10; //Minimum size to avoid getting stuck in an infinite loop double tolerable_error = 1.0e-7; //Error limit double error_min = 1.0e-6; //Error limit for scale double max_error_i; //To find maximum error k1 = new double[10 * numberofequations]; //integer*numberofequations where the integer=# of arrays being used if (NULL == k1) { printf("Cannot allocate k1 in ode5() \n"); return (0); } k2 = k1 + numberofequations; k3 = k2 + numberofequations; k4 = k3 + numberofequations; k5 = k4 + numberofequations; k6 = k5 + numberofequations; temp = k6 + numberofequations; ynewstar = temp + numberofequations; //For the embedded 4th order error = ynewstar + numberofequations; //For the error array scale = error + numberofequations; //For the scale array frhs(yold, t, k1); // Get the RHS //printf("\n Calling frhs (yold,t,k1) \n"); repeat_current_step: //goto label //printf("h = %f\n",h); double h_remember = h; for (i = 0; i < numberofequations; i++)//Step 1 { temp[i] = yold[i] + a21 * h * k1[i]; } frhs(temp, t + c2*h, k2); //Step 2 //printf("\n Calling frhs (yold,t+c2*h,k2) \n"); for (i = 0; i < numberofequations; i++) { temp[i] = yold[i] + h * (a31 * k1[i] + a32 * k2[i]); } //printf("\n Calling frhs (yold,t+c3*h,k3) \n"); frhs(temp, t + c3*h, k3); //Step 3 for (i = 0; i < numberofequations; i++) { temp[i] = yold[i] + h * (a41 * k1[i] + a42 * k2[i] + a43 * k3[i]); } //printf("\n Calling frhs (yold,t+c4*h,k4) \n"); frhs(temp, t + c4*h, k4); //Step 4 for (i = 0; i < numberofequations; i++) { temp[i] = yold[i] + h * (a51 * k1[i] + a52 * k2[i] + a53 * k3[i] + a54 * k4[i]); } //printf("\n Calling frhs (yold,t+c5*h,k5) \n"); frhs(temp, t + c5*h, k5); //Step 5 for (i = 0; i < numberofequations; i++) { temp[i] = yold[i] + h * (a61 * k1[i] + a62 * k2[i] + a63 * k3[i] + a64 * k4[i] + a65 * k5[i]); } //printf("\n Calling frhs (yold,t+c6*h,k6) \n"); frhs(temp, t + h, k6); //Step 6 for (i = 0; i < numberofequations; i++) { ynew[i] = yold[i] + h * (a71 * k1[i] + a73 * k3[i] + a74 * k4[i] + a75 * k5[i] + a76 * k6[i]); } //printf("\n Final \n"); frhs(ynew, t + h, ynewstar); //Final for (i = 0; i < numberofequations; i++)//to set up the scale scale[i] = fabs(yold[i]) + fabs(h * k1[i]) + fabs(error_min); for (i = 0; i < numberofequations; i++)//Estimate error { error[i] = (h * (e1 * k1[i] + e3 * k3[i] + e4 * k4[i] + e5 * k5[i] + e6 * k6[i] + e7 * ynewstar[i])) / scale[i]; //printf("Error(%d) = %g\n",i,error[i]); } max_error_i = fabs(error[0]); for (i = 0; i < numberofequations; i++) { if (fabs(error[i]) > max_error_i) { max_error_i = fabs(error[i]); } }//Finding the maximum error if (max_error_i > tolerable_error) { h = h / 5; if (h < hmin) exit(0); goto repeat_current_step; } if (max_error_i < tolerable_error) { h = h * (pow(tolerable_error / max_error_i, 0.2)); } //printf("h value in ode5 = %f\n",h); //getch(); //exit(0); delete[]k1; //Free memory return (h_remember); }//end ode5