int main(int argc, char** argv) { double *A; int n, ret, event; double startTime; double endTime; long long value; n = atoi(argv[2]); A = load_matrix(argv[1], n); event = atoi(argv[3]); if (event != 5) { papi_init(event); papi_start(); } else { startTime = dclock(); } ret = chol(A, n); if (event != 5) { value = papi_stop(); printf("%lld\n", value); } else { endTime = dclock(); printf("%lf\n", endTime - startTime); } fprintf(stderr, "RET:%d\n", ret); check(A,n); free(A); return 0; }
int main( int argc, const char* argv[] ) { int i,j,iret; double first[SIZE][SIZE]; double second[SIZE][SIZE]; double multiply[SIZE][SIZE]; double dtime; for (i = 0; i < SIZE; i++) { //rows in first for (j = 0; j < SIZE; j++) { //columns in first first[i][j]=i+j; second[i][j]=i-j; } } dtime = dclock(); iret=mm(first,second,multiply); dtime = dclock()-dtime; printf( "Time: %le \n", dtime); fflush( stdout ); double check=0.0; for(i=0;i<SIZE;i++){ for(j=0;j<SIZE;j++){ check+=multiply[i][j]; } } printf("check %le \n",check); fflush( stdout ); return iret; }
void contract_light_twopt(complex *corr, field_offset q_zonked, field_offset q_sequential, int zonked_pt, int spect_pt) { double t_start ; int base_pt, q_stride, op_stride; t_start = dclock() ; /* Compute partial offset for storage of result in corr[] */ base_pt = TWOPT_FORM_WHERE(0,zonked_pt,spect_pt,0,0 ) ; q_stride = TWOPT_FORM_WHERE(0,0, 0, 1,0 ) ; op_stride = TWOPT_FORM_WHERE(0,0, 0, 0,1 ) ; meson_cont_mom(corr , q_zonked, q_sequential, base_pt, q_stride, op_stride, two_pt, MAX_TWOPT); IF_VERBOSE_ON(1) printf("Time to Wick contract light 2pt correlators = %g sec\n", dclock() - t_start) ; }
int main() { srand((unsigned int)time(NULL)); double *A; double dtime; int i, j; A = generateSPDmatrix(); for(i = 0; i < SIZE; i++){ for(j = 0; j < SIZE; j++){ printf("%le \t", A[IDX(i, j, SIZE)]); } printf("\n"); } dtime = dclock(); chol_left_looking(A, SIZE); dtime = dclock()-dtime; double gflops = ((1.0/3.0) * SIZE * SIZE * SIZE * 10e-9) / dtime; printf( "Time: %le \n", dtime); printf("Gflops: %le \n", gflops); return 0; }
void contract_LL2(complex *corr, field_offset q_zonked, field_offset q_spectator, int zonked_pt, int spect_pt) { int base_pt, q_stride, op_stride; double t_start ; t_start = dclock() ; /************************************************************/ /* Compute partial offset for storage of result in corr[] */ base_pt = LL_TWOPT_FORM_WHERE(0,zonked_pt,spect_pt,0,0 ) ; q_stride = LL_TWOPT_FORM_WHERE(0,0, 0, 1,0 ) ; op_stride = LL_TWOPT_FORM_WHERE(0,0, 0, 0,1 ) ; meson_cont_mom_lean2(corr , q_zonked, q_spectator, base_pt, q_stride, op_stride, w_meson_store_t,w_meson_my_t,w_meson_nstore, no_k_values,k_momstore, MAX_TWOPT, two_pt, F_OFFSET(QTMP),DIMQTMP); IF_VERBOSE_ON(1) printf("Time to Wick contract light-light 2pt correlators = %g sec\n", dclock() - t_start) ; }
void create_hisq_links_milc(info_t *info, fn_links_t **fn, fn_links_t **fn_deps, hisq_auxiliary_t **aux, ks_action_paths_hisq *ap, su3_matrix *links, int want_deps, int want_back){ //char myname[] = "create_hisq_links_milc"; int n_naiks = ap->n_naiks; int i; double final_flop = 0.; double dtime = -dclock(); *aux = create_hisq_auxiliary_t(ap, links); load_hisq_aux_links(info, ap, *aux, links); final_flop += info->final_flop; for(i = 0; i < n_naiks; i++) fn[i] = create_fn_links(); if(want_deps) *fn_deps = create_fn_links(); else *fn_deps = NULL; load_hisq_fn_links(info, fn, *fn_deps, *aux, ap, links, want_deps, want_back); final_flop += info->final_flop; dtime += dclock(); info->final_sec = dtime; }
static QOP_FermionLinksWilson * create_qop_wilson_fermion_links( Real clov ) { QOP_FermionLinksWilson *qop_links = NULL; QOP_info_t info; QOP_GaugeField *links; QOP_wilson_coeffs_t coeffs; double remaptime; /* Load coeffs structure */ load_qop_wilson_coeffs(&coeffs, clov); /* Map SU(3) gauge field to G type */ remaptime = -dclock(); links = create_G_from_site4(F_OFFSET(link),EVENANDODD); remaptime += dclock(); /* Create links */ qop_links = QOP_wilson_create_L_from_G(&info, &coeffs, links); QOP_destroy_G(links); #ifdef FFTIME #ifdef REMAP node0_printf("FFREMAP: time = %e\n",remaptime); #endif node0_printf("FFTIME: time = %e (cl_qop) terms = 1 mflops = %e\n", info.final_sec, (Real)info.final_flop/(1e6*info.final_sec) ); #endif return qop_links; }
// Generate the rational approximation x^(pnum/pden) void AlgRemez::generateApprox() { char *fname = "generateApprox()"; Float time = -dclock(); iter = 0; spread = 1.0e37; if (approx_type == RATIONAL_APPROX_ZERO_POLE) { n--; neq--; } initialGuess(); stpini(step); while (spread > tolerance) { //iterate until convergance if (iter++%100==0) VRB.Result(cname,fname,"Iteration %d, spread %e delta %e\n", iter-1,(Float)spread,(Float)delta); equations(); if (delta < tolerance) ERR.General(cname, fname,"Delta too small, try increasing precision\n"); search(step); } int sign; Float error = (Float)getErr(mm[0],&sign); VRB.Result(cname,fname,"Converged at %d iterations, error = %e\n", iter,error); //!< Once the approximation has been generated, calculate the roots if(!root()) ERR.General(cname,fname,"Root finding failed\n"); if (approx_type == RATIONAL_APPROX_ZERO_POLE) { roots[n] = (bigfloat)0.0; n++; neq++; } //!< Now find the partial fraction expansions if (remez_arg->field_type == BOSON) { getPFE(remez_arg->residue, remez_arg->pole, &(remez_arg->norm)); getIPFE(remez_arg->residue_inv, remez_arg->pole_inv, &(remez_arg->norm_inv)); } else { getIPFE(remez_arg->residue, remez_arg->pole, &(remez_arg->norm)); getPFE(remez_arg->residue_inv, remez_arg->pole_inv, &(remez_arg->norm_inv)); } remez_arg->error = error; time += dclock(); print_time(cname,fname,time); }
CPS_START_NAMESPACE /*!\file \brief Definitions of functions that perform operations on complex matrices and vectors. $Id: vector_util.C,v 1.10 2013-04-19 20:25:52 chulwoo Exp $ */ //-------------------------------------------------------------------- // CVS keywords // // $Author: chulwoo $ // $Date: 2013-04-19 20:25:52 $ // $Header: /home/chulwoo/CPS/repo/CVS/cps_only/cps_pp/src/util/vector/comsrc/vector_util.C,v 1.10 2013-04-19 20:25:52 chulwoo Exp $ // $Id: vector_util.C,v 1.10 2013-04-19 20:25:52 chulwoo Exp $ // $Name: not supported by cvs2svn $ // $Locker: $ // $Revision: 1.10 $ // $Source: /home/chulwoo/CPS/repo/CVS/cps_only/cps_pp/src/util/vector/comsrc/vector_util.C,v $ // $State: Exp $ // //-------------------------------------------------------------------- /*------------------------------------------------------------------*/ /* For these functions there exists optimized assembly code. */ /*------------------------------------------------------------------*/ CPS_END_NAMESPACE #include <string.h> /* memcpy */ #include <util/vector.h> #include <util/time_cps.h> //#include<omp.h> CPS_START_NAMESPACE /*! \param b The vector to be copied to \param a The vector to be copied from. \param len The number of bytes to be copied. The arrays \a c and \a b must not alias each other. */ //---------------------------------------------------------------// void moveMem(void *b, const void *a, int len) { #undef PROFILE #ifdef PROFILE double time = -dclock(); #endif memcpy(b, a, len); #ifdef PROFILE time += dclock(); print_flops("","moveMem",len,time); #endif }
//Parallel transport of a vector through one hop void PT::vec(int n, IFloat **vout, IFloat **vin, const int *dir){ int i; static int call_num=0; SCUDirArgIR *SCUarg_p[2*n]; call_num++; //for(int s = 0; s < GJP.VolNodeSites(); s++) // { // for(int t = 0; t < 4; t++) // { // printf("site = %d, direction = %d\n",s,t); // for(int u = 0; u < 9; u++) // printf("%e %e\n",*(gauge_field_addr+4*GAUGE_LEN*s + GAUGE_LEN*t + 2*u),*(gauge_field_addr+4*GAUGE_LEN*s + GAUGE_LEN*t + 2*u+1)); // } // } #ifdef PROFILE Float dtime = - dclock(); #endif int wire[n]; SCUDirArgMulti SCUmulti; char *fname="pt_1vec"; // VRB.Func("",fname); int non_local_dir=0; for(i=0;i<n;i++) wire[i] = dir[i]; // from (x,y,z,t) to (t,x,y,z) // for(i=0;i<n;i++) printf("wire[%d]=%d\n",i,dir[i]); for(i=0;i<n;i++) if (!local[wire[i]/2]){ IFloat * addr = (vin[i]+VECT_LEN*offset[wire[i]]); SCUarg_p[2*non_local_dir] = SCUarg[0][2*wire[i]]; SCUarg_p[2*non_local_dir+1] = SCUarg[0][2*wire[i]+1]; SCUarg_p[2*non_local_dir+1]->Addr((void *)addr); non_local_dir++; } if(non_local_dir){ SCUmulti.Init(SCUarg_p,non_local_dir*2); SCUmulti.SlowStartTrans(); } for(i=0;i<n;i++) partrans_cmv_agg(local_chi[wire[i]],(long)uc_l[wire[i]], (long)vin[i],(long)vout[i]); if(non_local_dir){ SCUmulti.TransComplete(); } for(i=0;i<n;i++) partrans_cmv_agg(non_local_chi[wire[i]],(long)uc_nl[wire[i]], (long)rcv_buf[wire[i]],(long)vout[i]); #ifdef PROFILE dtime +=dclock(); print_flops("",fname,66*n*vol,dtime); #endif Flops +=66*n*vol; }
void moveVec(Float *b, const Float *a, int len) { #undef PROFILE #ifdef PROFILE double time = -dclock(); #endif // for(int i =0;i<len*6;i++) *b++ = *a++; memcpy(b, a, len*sizeof(Vector)); #ifdef PROFILE time += dclock(); print_flops("","moveVec",len*sizeof(Float),time); #endif }
ForceArg GimprRect::EvolveMomGforce(Matrix *mom, Float dt){ char *fname = "EvolveMomGforce(M*,F)"; VRB.Func(cname,fname); Float L1=0.0; Float L2=0.0; Float Linf=0.0; #ifdef PROFILE Float time = -dclock(); ForceFlops = 0; #endif setCbufCntrlReg(4, CBUF_MODE4); int x[4]; for(x[0] = 0; x[0] < GJP.XnodeSites(); ++x[0]) for(x[1] = 0; x[1] < GJP.YnodeSites(); ++x[1]) for(x[2] = 0; x[2] < GJP.ZnodeSites(); ++x[2]) for(x[3] = 0; x[3] < GJP.TnodeSites(); ++x[3]) { int uoff = GsiteOffset(x); for (int mu = 0; mu < 4; ++mu) { GforceSite(*mp0, x, mu); IFloat *ihp = (IFloat *)(mom+uoff+mu); IFloat *dotp = (IFloat *)mp0; fTimesV1PlusV2(ihp, dt, dotp, ihp, 18); Float norm = ((Matrix*)dotp)->norm(); Float tmp = sqrt(norm); L1 += tmp; L2 += norm; Linf = (tmp>Linf ? tmp : Linf); } } ForceFlops +=GJP.VolNodeSites()*4*18*2; #ifdef PROFILE time += dclock(); print_flops(cname,fname,ForceFlops,time); #endif glb_sum(&L1); glb_sum(&L2); glb_max(&Linf); L1 /= 4.0*GJP.VolSites(); L2 /= 4.0*GJP.VolSites(); VRB.FuncEnd(cname,fname); return ForceArg(dt*L1, dt*sqrt(L2), dt*Linf); }
//!< Calculate gauge contribution to the Hamiltonian Float AlgMomentum::energy() { Float dtime = -dclock(); const char *fname = "energy()"; Lattice &lat = LatticeFactory::Create(F_CLASS_NONE, G_CLASS_NONE); Float h = lat.MomHamiltonNode(mom); LatticeFactory::Destroy(); dtime += dclock(); print_flops(cname, fname, 0, dtime); return h; }
//!< evolve method evolves the gauge field due to the momentum void AlgMomentum::evolve(Float dt, int steps) { const char *fname = "evolve()"; Float dtime = -dclock(); Lattice &lat = LatticeFactory::Create(F_CLASS_NONE, G_CLASS_NONE); for (int i=0; i<steps; i++) lat.EvolveGfield(mom, dt); lat.MdTimeInc(dt*steps); VRB.Flow(cname,fname,"%s%f\n", md_time_str, IFloat(lat.MdTime())); LatticeFactory::Destroy(); dtime += dclock(); print_flops(cname, fname, 1968. * 4. * GJP.VolNodeSites() * steps, dtime); }
int main(int argc, char *argv[]) { unsigned n; int evt; double *A; int i, j; double checksum = 0; double startTime, endTime; long long counter; if (argc < 2) { return -1; } n = atoi(argv[1]); evt = (argc > 2) ? atoi(argv[2]) : -1; A = randomMatrix(n); assert(A != NULL); if (evt == -1) { startTime = dclock(); } else { papi_init(evt); papi_start(); } if (chol(A, n)) { fprintf(stderr, "Error: matrix is either not symmetric or not positive definite.\n"); } else { for (i = 0; i < n; i++) { for (j = i; j < n; j++) { checksum += A[IDX(i, j, n)]; } } printf("Checksum: %f \n", checksum); } if (evt == -1) { endTime = dclock(); fprintf(stderr, "%f\n", endTime - startTime); } else { counter = papi_stop(); fprintf(stderr, "%lld\n", counter); } free(A); return 0; }
void wilson_vector_hqet_src(field_offset out, field_offset in, int spin, int tB) { register int i; register site *s; int colour ; double t_start ; t_start = dclock() ; FORALLSITES(i,s) { /*** zero the hqet source **/ zero_zu3_matrix( (su3_matrix *)F_PT(s,out) ); if( s->t == tB) { for(colour = 0 ; colour < 3 ; ++colour) ((su3_matrix *)F_PT(s,out))->e[colour][colour] = ((wilson_vector *)F_PT(s,in))->d[spin].c[colour] ; } } /** end of the loop over lattice sites ****/
void hopping(field_offset src, field_offset temp, field_offset light_quark, int nhop, Real kappa_c, int parity_of_source, int color, int spin, int wallflag, FILE * fp_m_out, int fb_m_out) { double dtime ; /** double dtime1; ****/ int N_iter; register int i; register site *s; Real size_src, size_r; int old_parity, new_parity = 0x00, channel; double **meson_prop; wilson_vector *light_wall = NULL, *heavy_wall = NULL; /* Start Hopping */ dtime = -dclock(); /* Normalisation */ size_src = 0.0; FORSOMEPARITY(i, s, parity_of_source) { size_src += magsq_wvec(((wilson_vector *) F_PT(s, src))); }
void contract_hqet_to_light(complex *corr, field_offset q_zonked, field_offset q_zonked_rot, field_offset q_sequential, int vel_pt, int zonked_pt, int spect_pt) { double t_start ; int base_pt, q_stride, op_stride ; t_start = dclock() ; /* Compute partial offset for storage of result in corr[] */ base_pt = HQET_FORM_WHERE(0,zonked_pt,spect_pt,0,vel_pt, 0 ) ; q_stride = HQET_FORM_WHERE(0,0, 0, 1,0, 0 ) ; op_stride = HQET_FORM_WHERE(0,0, 0, 0,0, 1 ) ; /* First, contract zonked and sequential */ meson_cont_mom(corr , q_zonked, q_sequential, base_pt, q_stride, op_stride, hqet_to_light, MAX_THREEPT) ; /* Second, contract rotated zonked and sequential Results go to second half of corr */ base_pt += op_stride*MAX_THREEPT; meson_cont_mom(corr , q_zonked_rot, q_sequential, base_pt, q_stride, op_stride, hqet_to_light, MAX_THREEPT) ; IF_VERBOSE_ON(1) printf("contract_hqet_to_light::Time to Wick contract hqet-->light correlators = %g sec\n",dclock() - t_start) ; }
void update_time(void) { double c; c = dclock(); runtime += (c-cprev); cprev = c; }
SPDP dtime() { SPDP q; q = dclock(); return q; }
//!< Heat Bath for the conjugate momentum void AlgMomentum::heatbath() { const char *fname = "heatbath()"; Float dtime = -dclock(); Lattice &lat = LatticeFactory::Create(F_CLASS_NONE, G_CLASS_NONE); lat.RandGaussAntiHermMatrix(mom, 1.0); //!< reset MD time in Lattice (a momentum refresh means a new trajectory) lat.MdTime(0.0); VRB.Flow(cname,fname,"%s%f\n", md_time_str, IFloat(lat.MdTime())); LatticeFactory::Destroy(); dtime += dclock(); print_flops(cname, fname, 0, dtime); }
double dtime() { double q; q = dclock(); return q; }
void moveFloat(Float *b, const Float *a, int len) { #undef PROFILE #ifdef PROFILE double time = -dclock(); #endif #ifdef USE_OMP #pragma omp parallel for for(int i =0;i<len;i++) b[i] = a[i]; #else memcpy(b, a, len*sizeof(Float)); #endif #ifdef PROFILE time += dclock(); print_flops("","moveFloat",len*sizeof(Float),time); #endif }
/*--------------------------------------------------------------------*/ void print_timing(double dtime, char *str){ #ifdef PRTIME dtime += dclock(); node0_printf("Time for %s %e\n",str, dtime); fflush(stdout); #endif }
static void create_qop_links_from_milc_fn(ferm_links_t *fn) { double remaptime; char myname[] = "create_qop_links_from_milc"; remaptime = -dclock(); DESTROY_QOP_ASQTAD_FERMION_LINKS(fn); fn->QOP_L = CREATE_L_FROM_FIELDS(fn->fat, fn->lng, EVENANDODD); remaptime += dclock(); #ifdef LLTIME #ifdef REMAP node0_printf("LLREMAP: time = %e\n",remaptime); #endif #endif }
void load_fn_links_gpu(info_t *info, fn_links_t *fn, ks_action_paths *ap, su3_matrix *links, int want_back) { ks_component_paths *p = &ap->p; double final_flop = 0; double dtime = -dclock(); load_fatlonglinks_gpu(info, fn->fat, fn->lng, p, links); if(want_back) load_fn_backlinks(fn); else destroy_fn_backlinks(fn); dtime += dclock(); info->final_sec = dtime; info->final_flop = final_flop; }
static void __timer_reset(mtimer_t * timer) { #ifdef XT3 timer->starttime = timer->stoptime = dclock(); #else gettimeofday(&timer->start_time, 0); timer->stop_time = timer->start_time; #endif }
int main( int argc, const char* argv[] ) { FILE *fp = init_file(argv[0] + 2); int iret; for (SIZE = 8; SIZE <= 512; SIZE += 8) { int i,j; double first[SIZE][SIZE]; double second[SIZE][SIZE]; double multiply[SIZE][SIZE]; double dtime; double gflops; for (i = 0; i < SIZE; i++) { //rows in first for (j = 0; j < SIZE; j++) { //columns in first first[i][j]=i+j; second[i][j]=i-j; multiply[i][j]=0.0; } } papi_init(); dtime = dclock(); iret = mm(first,second,multiply); dtime = dclock()-dtime; fprintf(fp, "%d, ", SIZE); papi_results(fp); gflops = 2.0 * SIZE * SIZE * SIZE * 1e-9 / dtime; printf( "%d, %le, %f\n", SIZE, dtime, gflops); //double check=0.0; //for(i=0;i<SIZE;i++){ // for(j=0;j<SIZE;j++){ // check+=multiply[i][j]; // } //} //printf("check %le \n",check); fflush( stdout ); } return iret; }
/* cray timers */ void metric_read_craytimers(int tid, int idx, double values[]) { #ifdef CRAY_TIMERS #ifdef TAU_CATAMOUNT /* for Cray XT3 */ values[idx] = dclock() * 1.0e6; #else /* for Cray X1 */ long long tick = _rtc(); values[idx] = (double)tick / HZ; #endif /* TAU_CATAMOUNT */ #endif /* CRAY_TIMERS */ }
/*--------------------------------------------------------------------*/ double start_timing(void){ double dtime; #ifdef PRTIME dtime = -dclock(); #else dtime = 0; #endif return dtime; }