int main (int argc, const char * argv[]) { printf("4. OpenCL Profile No Overhead in the Loop\n"); float range = BIG_RANGE; float *in, *out; // ======== Initialize init_all_perfs(); create_data(&in, &out); start_perf_measurement(&total_perf); // ======== Setup OpenCL setup_cl(argc, argv, &opencl_device, &opencl_context, &opencl_queue); // ======== Setup the computation setup_cl_compute(); start_perf_measurement(&write_perf); copy_data_to_device(in, out); stop_perf_measurement(&write_perf); // ======== Compute while (range > LIMIT) { // Calculation start_perf_measurement(&update_perf); update_cl(get_in_buffer(), get_out_buffer()); stop_perf_measurement(&update_perf); // Read back the data start_perf_measurement(&read_perf); read_back_data(get_out_buffer(), out); stop_perf_measurement(&read_perf); // Compute Range start_perf_measurement(&range_perf); range = find_range(out, SIZE*SIZE); stop_perf_measurement(&range_perf); iterations++; printf("Iteration %d, range=%f.\n", iterations, range); } // ======== Finish and cleanup OpenCL start_perf_measurement(&finish_perf); clFinish(opencl_queue); stop_perf_measurement(&finish_perf); start_perf_measurement(&cleanup_perf); cleanup_cl(); stop_perf_measurement(&cleanup_perf); stop_perf_measurement(&total_perf); print_perfs(); free(in); free(out); }
void test_BS (cl_uint sizeMin, cl_uint sizeMax, int warmup_num_iters, int run_num_iters, size_t blockMin, size_t blockMax, size_t gridMin, size_t gridMax, char *filename, cl_float(*gen)(cl_float low, cl_float high)) { BS_test_t t; t.num = sizeMin; t.blockSize = blockMin; t.gridSize = gridMin; t.Riskfree = R; t.Volatility = V; bool result = false; FILE * csv = fopen(filename, "a"); if (csv == NULL) fprintf(stderr, "can't open output file\n"); if (csv != NULL) fprintf(csv, "size, block size, grid size, total time\n"); setup_cl(&t); setup_buffers(&t, gen); if (t.maxBlockSize < blockMin) blockMin = t.maxBlockSize; if (t.maxBlockSize < blockMax) blockMax = t.maxBlockSize; printf("blockMIN: %d blockMAX: %d t.maxBlockSize: %d\n", blockMin, blockMax, t.maxBlockSize); warmup(&t, warmup_num_iters); for (t.num = sizeMin; t.num <= sizeMax; t.num <<= 1) { for (t.gridSize = gridMin; t.gridSize <= gridMax; t.gridSize += 16*1024) { for (t.blockSize = blockMin; t.blockSize <= blockMax; t.blockSize += 64) { if (t.gridSize % t.blockSize != 0) continue; setup_buffers(&t, gen); result = run(&t, run_num_iters); post(&t, run_num_iters, result); if (result && csv != NULL) output(&t, csv, run_num_iters); cleanup(&t); } } } if (csv != NULL) fclose(csv); }
MUST COMPILE WITH QIO FOR THE SCRATCH FILE #endif /* Comment these out if you want to suppress detailed timing */ /*#define IOTIME*/ /*#define PRTIME*/ int main(int argc, char *argv[]) { int meascount; int prompt; Real avm_iters,avs_iters; double starttime,endtime; #ifdef IOTIME double dtime; int iotime = 1; #else int iotime = 0; #endif int MinCG,MaxCG; Real RsdCG, RRsdCG; int spin,color,j,k; int flag; int status; w_prop_file *fp_in_w[MAX_KAP]; /* For reading binary propagator files */ w_prop_file *fp_out_w[MAX_KAP]; /* For writing binary propagator files */ w_prop_file *fp_scr[MAX_KAP]; quark_source wqs_scr; /* scratch file */ char scratch_file[MAX_KAP][MAXFILENAME]; wilson_vector *psi = NULL; wilson_prop_field *quark_propagator = NULL; wilson_prop_field *quark_prop2 = NULL; int cg_cl = CL_CG; int source_type; initialize_machine(&argc,&argv); /* Remap standard I/O */ if(remap_stdio_from_args(argc, argv) == 1)terminate(1); g_sync(); /* set up */ prompt = setup_cl(); /* loop over input sets */ psi = create_wv_field(); quark_propagator = create_wp_field(3); quark_prop2 = create_wp_field(3); while( readin(prompt) == 0) { starttime=dclock(); MaxCG=niter; avm_iters=0.0; meascount=0; spectrum_cl_hl_init(); if( fixflag == COULOMB_GAUGE_FIX) { if(this_node == 0) printf("Fixing to Coulomb gauge\n"); #ifdef IOTIME dtime = -dclock(); #endif gaugefix(TUP,(Real)1.5,500,GAUGE_FIX_TOL); #ifdef IOTIME dtime += dclock(); if(this_node==0)printf("Time to gauge fix = %e\n",dtime); #endif invalidate_this_clov(gen_clov); } else if(this_node == 0)printf("COULOMB GAUGE FIXING SKIPPED.\n"); /* save lattice if requested */ if( saveflag != FORGET ){ savelat_p = save_lattice( saveflag, savefile, stringLFN ); } if(this_node==0)printf("END OF HEADER\n"); /* if(this_node==0) printf("num_kap = %d\n", num_kap); */ /* Loop over kappas to compute and store quark propagator */ for(k=0;k<num_kap;k++){ kappa=kap[k]; RsdCG=resid[k]; RRsdCG=relresid[k]; if(this_node==0)printf("Kappa= %g r0= %g residue= %g rel= %g\n", (double)kappa,(double)wqs.r0,(double)RsdCG, (double)RRsdCG); /* open files for wilson propagators */ #ifdef IOTIME dtime = -dclock(); #endif wqstmp = wqs; /* For clover_info.c */ fp_in_w[k] = r_open_wprop(startflag_w[k], startfile_w[k]); fp_out_w[k] = w_open_wprop(saveflag_w[k], savefile_w[k], wqs.type); #ifdef IOTIME dtime += dclock(); if(startflag_w[k] != FRESH) node0_printf("Time to open prop = %e\n",dtime); #endif /* Open scratch file and write header */ sprintf(scratch_file[k],"%s_%02d",scratchstem_w,k); source_type = UNKNOWN; fp_scr[k] = w_open_wprop(scratchflag, scratch_file[k], source_type); init_qs(&wqs_scr); /* Loop over source spins */ for(spin=0;spin<4;spin++){ /* Loop over source colors */ for(color=0;color<3;color++){ meascount ++; /*if(this_node==0)printf("color=%d spin=%d\n",color,spin);*/ if(startflag_w[k] == CONTINUE) { if(k == 0) { node0_printf("Can not continue propagator here! Zeroing it instead\n"); startflag_w[k] = FRESH; } else { copy_wv_from_wp(psi, quark_propagator, color, spin); } } /* Saves one multiplication by zero in cgilu */ if(startflag_w[k] == FRESH)flag = 0; else flag = 1; /* load psi if requested */ status = reload_wprop_sc_to_field( startflag_w[k], fp_in_w[k], &wqs, spin, color, psi, iotime); if(status != 0) { node0_printf("control_cl_hl: Recovering from error by resetting initial guess to zero\n"); reload_wprop_sc_to_field( FRESH, fp_in_w[k], &wqs, spin, color, psi,0); flag = 0; } /* Complete the source structure */ wqs.color = color; wqs.spin = spin; /* If we are starting afresh, we set a minimum number of iterations */ if(startflag_w[k] == FRESH || status != 0)MinCG = nt/2; else MinCG = 0; /* Load inversion control structure */ qic.prec = PRECISION; qic.min = 0; qic.max = MaxCG; qic.nrestart = nrestart; qic.parity = EVENANDODD; qic.start_flag = flag; qic.nsrc = 1; qic.resid = RsdCG; qic.relresid = RRsdCG; /* Load Dirac matrix parameters */ dcp.Kappa = kappa; dcp.Clov_c = clov_c; dcp.U0 = u0; switch (cg_cl) { case BICG: avs_iters = (Real)wilson_invert_field_wqs(&wqs, w_source_field, psi, bicgilu_cl_field, &qic,(void *)&dcp); break; case HOP: avs_iters = (Real)wilson_invert_field_wqs(&wqs, w_source_field, psi, hopilu_cl_field, &qic,(void *)&dcp); break; case MR: avs_iters = (Real)wilson_invert_field_wqs(&wqs, w_source_field, psi, mrilu_cl_field, &qic,(void *)&dcp); break; case CG: avs_iters = (Real)wilson_invert_field_wqs(&wqs, w_source_field, psi, cgilu_cl_field, &qic,(void *)&dcp); break; default: node0_printf("main(%d): Inverter choice %d not supported\n", cg_cl, this_node); } avm_iters += avs_iters; copy_wp_from_wv(quark_propagator, psi, color, spin); /* Write psi to scratch disk */ #ifdef IOTIME dtime = -dclock(); #endif save_wprop_sc_from_field(scratchflag, fp_scr[k], &wqs_scr, spin, color, psi, "Scratch record", iotime); #ifdef IOTIME dtime += dclock(); if(this_node==0) printf("Time to dump prop spin %d color %d %e\n", spin,color,dtime); #endif /* save psi if requested */ save_wprop_sc_from_field( saveflag_w[k],fp_out_w[k], &wqs, spin,color,psi,"", iotime); } /* source colors */ } /* source spins */ /* Close and release scratch file */ w_close_wprop(scratchflag, fp_scr[k]); /*if(this_node==0)printf("Dumped prop to file %s\n", scratch_file[k]); */ /* close files for wilson propagators */ #ifdef IOTIME dtime = -dclock(); #endif r_close_wprop(startflag_w[k],fp_in_w[k]); w_close_wprop(saveflag_w[k],fp_out_w[k]); #ifdef IOTIME dtime += dclock(); if(saveflag_w[k] != FORGET) node0_printf("Time to close prop = %e\n",dtime); #endif } /* kappas */ /* Loop over heavy kappas for the point sink spectrum */ for(k=0;k<num_kap;k++){ /* Read the propagator from the scratch file */ #ifdef IOTIME dtime = -dclock(); #endif kappa=kap[k]; init_qs(&wqs_scr); reload_wprop_to_wp_field(scratchflag, scratch_file[k], &wqs_scr, quark_propagator, iotime); #ifdef IOTIME dtime += dclock(); if(this_node==0) { printf("Time to read 12 spin,color combinations %e\n",dtime); fflush(stdout); } #endif /*if(this_node==0) printf("Closed scratch file %s\n",scratch_file[k]); fflush(stdout); */ /* Diagonal spectroscopy */ spectrum_cl_hl_diag_baryon(quark_propagator, k); spectrum_cl_hl_diag_meson(quark_propagator, k); spectrum_cl_hl_diag_rot_meson(quark_propagator, k); if(strstr(spectrum_request,",sink_smear,") != NULL){ spectrum_cl_hl_diag_smeared_meson(quark_propagator, k); } /* Heavy-light spectroscopy */ /* Loop over light kappas for the point sink spectrum */ for(j=k+1;j<num_kap;j++){ #ifdef IOTIME dtime = -dclock(); #endif /* Read the propagator from the scratch file */ kappa=kap[j]; init_qs(&wqs_scr); reload_wprop_to_wp_field(scratchflag, scratch_file[j], &wqs_scr, quark_prop2, iotime); #ifdef IOTIME dtime += dclock(); if(this_node==0) { printf("Time to read 12 spin,color combinations %e\n",dtime); fflush(stdout); } #endif #ifdef PRTIME dtime = -dclock(); #endif spectrum_cl_hl_offdiag_baryon( quark_propagator, quark_prop2, j, k); spectrum_cl_hl_offdiag_meson( quark_propagator, quark_prop2, j, k); spectrum_cl_hl_offdiag_rot_meson( quark_propagator, quark_prop2, j, k); #ifdef PRTIME dtime = -dclock(); #endif } /* light kappas */ /* Smear the heavy propagator in place */ sink_smear_prop( quark_propagator ); /* Write the smeared propagator to the scratch file (overwriting)*/ kappa=kap[k]; #ifdef IOTIME dtime = -dclock(); #endif save_wprop_from_wp_field(scratchflag, scratch_file[k], &wqs_scr, quark_propagator, "Scratch propagator", iotime); #ifdef IOTIME dtime += dclock(); if(this_node==0) { printf("Time to dump convolution %d %e\n",k,dtime); fflush(stdout); } #endif } /* heavy kappas */ /* Loop over heavy kappas for the shell sink spectrum */ if(strstr(spectrum_request,",sink_smear,") != NULL) for(k=0;k<num_kap;k++){ #ifdef IOTIME dtime = -dclock(); #endif /* Read the propagator from the scratch file */ kappa=kap[k]; init_qs(&wqs_scr); reload_wprop_to_wp_field(scratchflag, scratch_file[k], &wqs_scr, quark_propagator, iotime); #ifdef IOTIME dtime += dclock(); if(this_node==0) { printf("Time to read convolution %d %e\n",k,dtime); fflush(stdout); } #endif /* Diagonal spectroscopy */ spectrum_cl_hl_diag_smeared_meson(quark_propagator, k); /* Heavy-light spectroscopy */ /* Loop over light kappas for the shell sink spectrum */ for(j=k+1;j<num_kap;j++){ #ifdef PRTIME dtime = -dclock(); #endif /* Read the propagator from the scratch file */ kappa=kap[j]; init_qs(&wqs_scr); reload_wprop_to_wp_field(scratchflag, scratch_file[j], &wqs_scr, quark_prop2, iotime); /* Compute the spectrum */ spectrum_cl_hl_offdiag_smeared_meson( quark_propagator, quark_prop2, j, k); #ifdef PRTIME dtime += dclock(); if(this_node==0) { printf("Time to read and do off diagonal mesons %d %d %e\n", j,k,dtime); fflush(stdout); } #endif } /* light kappas */ } /* heavy kappas */ spectrum_cl_hl_print(wqs.t0); spectrum_cl_hl_cleanup(); if(this_node==0)printf("RUNNING COMPLETED\n"); if(meascount>0){ if(this_node==0)printf("total cg iters for measurement= %e\n", (double)avm_iters); if(this_node==0)printf("cg iters for measurement= %e\n", (double)avm_iters/(double)meascount); } endtime=dclock(); if(this_node==0){ printf("Time = %e seconds\n",(double)(endtime-starttime)); printf("total_iters = %d\n",total_iters); } fflush(stdout); } destroy_wv_field(psi); destroy_wp_field(quark_propagator); return 0; }
int main(int argc, char *argv[]) { int meascount; int prompt; Real avm_iters,avs_iters; double starttime,endtime; #ifdef IOTIME double dtime; int iotime = 1; #else int iotime = 0; #endif int MaxCG; Real RsdCG, RRsdCG; int spin,color,k; int flag; int status; int cl_cg = CL_CG; w_prop_file *fp_in_w[MAX_KAP]; /* For propagator files */ w_prop_file *fp_out_w[MAX_KAP]; /* For propagator files */ wilson_vector *psi = NULL; wilson_prop_field quark_propagator = NULL; initialize_machine(&argc,&argv); #ifdef HAVE_QDP QDP_initialize(&argc, &argv); #endif /* Remap standard I/O */ if(remap_stdio_from_args(argc, argv) == 1)terminate(1); g_sync(); /* set up */ prompt = setup_cl(); /* loop over input sets */ psi = create_wv_field(); quark_propagator = create_wp_field(); while( readin(prompt) == 0) { starttime=dclock(); MaxCG=niter; wqstmp = wqs; /* For clover_info.c */ avm_iters=0.0; meascount=0; if( fixflag == COULOMB_GAUGE_FIX) { if(this_node == 0) printf("Fixing to Coulomb gauge\n"); #ifdef IOTIME dtime = -dclock(); #endif gaugefix(TUP,(Real)1.5,500,GAUGE_FIX_TOL); #ifdef IOTIME dtime += dclock(); if(this_node==0)printf("Time to gauge fix = %e\n",dtime); #endif invalidate_this_clov(gen_clov); } else if(this_node == 0)printf("COULOMB GAUGE FIXING SKIPPED.\n"); /* save lattice if requested */ if( saveflag != FORGET ){ savelat_p = save_lattice( saveflag, savefile, stringLFN ); } if(this_node==0)printf("END OF HEADER\n"); /* if(this_node==0) printf("num_kap = %d\n", num_kap); */ /* Loop over kappas */ for(k=0;k<num_kap;k++){ kappa=kap[k]; RsdCG=resid[k]; RRsdCG=relresid[k]; if(this_node==0)printf("Kappa= %g r0= %g residue= %g rel= %g\n", (double)kappa,(double)wqs.r0,(double)RsdCG, (double)RRsdCG); /* open files for wilson propagators */ #ifdef IOTIME dtime = -dclock(); #endif fp_in_w[k] = r_open_wprop(startflag_w[k], startfile_w[k]); #ifdef IOTIME dtime += dclock(); if(startflag_w[k] != FRESH) node0_printf("Time to open prop = %e\n",dtime); #endif fp_out_w[k] = w_open_wprop(saveflag_w[k], savefile_w[k], wqs.type); /* Loop over source spins */ for(spin=0;spin<4;spin++){ /* Loop over source colors */ for(color=0;color<3;color++){ meascount ++; /*if(this_node==0)printf("color=%d spin=%d\n",color,spin); */ if(startflag_w[k] == CONTINUE) { node0_printf("Can not continue propagator here! Zeroing it instead\n"); startflag_w[k] = FRESH; } /* Saves one multiplication by zero in cgilu */ if(startflag_w[k] == FRESH)flag = 0; else flag = 1; /* load psi if requested */ status = reload_wprop_sc_to_field( startflag_w[k], fp_in_w[k], &wqs, spin, color, psi, iotime); if(status != 0) { node0_printf("control_cl: Recovering from error by resetting initial guess to zero\n"); reload_wprop_sc_to_field( FRESH, fp_in_w[k], &wqs, spin, color, psi, 0); flag = 0; } /* Complete the source structure */ wqs.color = color; wqs.spin = spin; /* Load inversion control structure */ qic.prec = PRECISION; qic.max = MaxCG; qic.nrestart = nrestart; qic.resid = RsdCG; qic.relresid = RRsdCG; qic.start_flag = flag; /* Load Dirac matrix parameters */ dcp.Kappa = kappa; dcp.Clov_c = clov_c; dcp.U0 = u0; /* compute the propagator. Result in psi. */ switch (cl_cg) { case BICG: avs_iters = (Real)wilson_invert_field_wqs(&wqs, w_source_field, psi, bicgilu_cl_field, &qic,(void *)&dcp); break; case HOP: avs_iters = (Real)wilson_invert_field_wqs(&wqs, w_source_field, psi, hopilu_cl_field, &qic,(void *)&dcp); break; case MR: avs_iters = (Real)wilson_invert_field_wqs(&wqs, w_source_field, psi, mrilu_cl_field, &qic,(void *)&dcp); break; case CG: avs_iters = (Real)wilson_invert_field_wqs(&wqs, w_source_field, psi, cgilu_cl_field, &qic,(void *)&dcp); break; default: node0_printf("main(%d): Inverter choice %d not supported\n", this_node,cl_cg); } avm_iters += avs_iters; copy_wp_from_wv(quark_propagator, psi, color, spin); /* save psi if requested */ save_wprop_sc_from_field( saveflag_w[k],fp_out_w[k], &wqs, spin,color,psi,"Fill in record info here",iotime); } /* source spins */ } /* source colors */ /* close files for wilson propagators */ r_close_wprop(startflag_w[k],fp_in_w[k]); #ifdef IOTIME dtime = -dclock(); #endif w_close_wprop(saveflag_w[k],fp_out_w[k]); #ifdef IOTIME dtime += dclock(); if(saveflag_w[k] != FORGET) node0_printf("Time to close prop = %e\n",dtime); #endif /* spectrum, if requested */ if(strstr(spectrum_request,",spectrum,") != NULL) spectrum_cl(quark_propagator, wqs.t0, k); } /* kappas */ if(this_node==0)printf("RUNNING COMPLETED\n"); if(meascount>0){ if(this_node==0)printf("total cg iters for measurement= %e\n", (double)avm_iters); if(this_node==0)printf("cg iters for measurement= %e\n", (double)avm_iters/(double)meascount); } endtime=dclock(); if(this_node==0){ printf("Time = %e seconds\n",(double)(endtime-starttime)); printf("total_iters = %d\n",total_iters); } fflush(stdout); } destroy_wv_field(psi); destroy_wp_field(quark_propagator); return 0; }