Beispiel #1
0
int
main(int argc, char** argv)
{
	double *A;
	int n, ret, event;
	double startTime;
	double endTime;
	long long value;

	n = atoi(argv[2]);
	A = load_matrix(argv[1], n);
	event = atoi(argv[3]);
	if (event != 5) {
  		papi_init(event);
	  	papi_start();
	} else {
		startTime = dclock();
	}
	ret = chol(A, n);
	if (event != 5) {
		value = papi_stop();
		printf("%lld\n", value);
	} else {
		endTime = dclock();
		printf("%lf\n", endTime - startTime);
	}
	fprintf(stderr, "RET:%d\n", ret);
	check(A,n);
	free(A);
	return 0;
}
Beispiel #2
0
Datei: mm3.c Projekt: msiwek/oora
int main( int argc, const char* argv[] )
{
  int i,j,iret;
  double first[SIZE][SIZE];
  double second[SIZE][SIZE];
  double multiply[SIZE][SIZE];
  double dtime;
  for (i = 0; i < SIZE; i++) { //rows in first
    for (j = 0; j < SIZE; j++) { //columns in first
      first[i][j]=i+j;
      second[i][j]=i-j;
    }
  }
  dtime = dclock();
  iret=mm(first,second,multiply);
  dtime = dclock()-dtime;
  printf( "Time: %le \n", dtime);
  fflush( stdout );

  double check=0.0;
  for(i=0;i<SIZE;i++){
    for(j=0;j<SIZE;j++){
      check+=multiply[i][j];
    }
  }
  printf("check %le \n",check);
  fflush( stdout );

  return iret;
}
Beispiel #3
0
void contract_light_twopt(complex *corr, field_offset q_zonked, 
			  field_offset q_sequential,
			  int zonked_pt, int spect_pt)
{
  double t_start ;
  int base_pt, q_stride, op_stride;

  t_start = dclock() ;

  /* Compute partial offset for storage of result in corr[] */

  base_pt   = TWOPT_FORM_WHERE(0,zonked_pt,spect_pt,0,0 )  ; 
  q_stride  = TWOPT_FORM_WHERE(0,0,        0,       1,0 )  ; 
  op_stride = TWOPT_FORM_WHERE(0,0,        0,       0,1 )  ; 

  meson_cont_mom(corr , q_zonked, q_sequential, 
		 base_pt, q_stride, op_stride,
		 two_pt, MAX_TWOPT);

  IF_VERBOSE_ON(1)
    printf("Time to Wick contract light 2pt correlators = %g sec\n",
	   dclock() - t_start) ;


} 
Beispiel #4
0
int
main()
{
	srand((unsigned int)time(NULL));
	double *A;
	double dtime;
	int i, j;
	A = generateSPDmatrix();
	for(i = 0; i < SIZE; i++){
		for(j = 0; j < SIZE; j++){
			printf("%le \t", A[IDX(i, j, SIZE)]);	
		}
		printf("\n");
	}
	dtime = dclock();
 	chol_left_looking(A, SIZE);
 	dtime = dclock()-dtime;
 	double gflops = ((1.0/3.0) * SIZE * SIZE * SIZE * 10e-9) / dtime;
	printf( "Time: %le \n", dtime);
	printf("Gflops: %le \n", gflops);

	

	
	return 0;
}
void contract_LL2(complex *corr, field_offset q_zonked, field_offset q_spectator,
int zonked_pt, int spect_pt)
{
  int base_pt, q_stride, op_stride;
  double t_start ;

  t_start = dclock() ;


  /************************************************************/

  /* Compute partial offset for storage of result in corr[] */

  base_pt   = LL_TWOPT_FORM_WHERE(0,zonked_pt,spect_pt,0,0 )  ; 
  q_stride  = LL_TWOPT_FORM_WHERE(0,0,        0,       1,0 )  ; 
  op_stride = LL_TWOPT_FORM_WHERE(0,0,        0,       0,1 )  ; 

  meson_cont_mom_lean2(corr , q_zonked, q_spectator,
		 base_pt, q_stride, op_stride,
		 w_meson_store_t,w_meson_my_t,w_meson_nstore,
		 no_k_values,k_momstore,
		 MAX_TWOPT, two_pt,
		 F_OFFSET(QTMP),DIMQTMP);

  IF_VERBOSE_ON(1)
    printf("Time to Wick contract light-light 2pt correlators = %g sec\n",
	   dclock() - t_start) ;


} 
void 
create_hisq_links_milc(info_t *info, fn_links_t **fn, fn_links_t **fn_deps,
		       hisq_auxiliary_t **aux, ks_action_paths_hisq *ap, 
		       su3_matrix *links, int want_deps, int want_back){
  //char myname[] = "create_hisq_links_milc";

  int n_naiks = ap->n_naiks;
  int i;
  double final_flop = 0.;
  double dtime = -dclock();

  *aux = create_hisq_auxiliary_t(ap, links);
  
  load_hisq_aux_links(info, ap, *aux, links);
  final_flop += info->final_flop;
  
  for(i = 0; i < n_naiks; i++)
    fn[i] = create_fn_links();

  if(want_deps)
    *fn_deps = create_fn_links();
  else
    *fn_deps = NULL;

  load_hisq_fn_links(info, fn, *fn_deps, *aux, ap, links, 
		     want_deps, want_back);
  final_flop += info->final_flop;

  dtime += dclock();
  info->final_sec = dtime;
}
Beispiel #7
0
static QOP_FermionLinksWilson *
create_qop_wilson_fermion_links( Real clov )
{
  QOP_FermionLinksWilson *qop_links = NULL;
  QOP_info_t info;
  QOP_GaugeField *links;
  QOP_wilson_coeffs_t coeffs;
  double remaptime;

  /* Load coeffs structure */
  load_qop_wilson_coeffs(&coeffs, clov);

  /* Map SU(3) gauge field to G type */
  remaptime = -dclock(); 
  links = create_G_from_site4(F_OFFSET(link),EVENANDODD);
  remaptime += dclock();

  /* Create links */
  qop_links = QOP_wilson_create_L_from_G(&info, &coeffs, links);

  QOP_destroy_G(links);

#ifdef FFTIME
#ifdef REMAP
    node0_printf("FFREMAP:  time = %e\n",remaptime);
#endif
  node0_printf("FFTIME:  time = %e (cl_qop) terms = 1 mflops = %e\n",
	       info.final_sec, (Real)info.final_flop/(1e6*info.final_sec) );
#endif
  return qop_links;
}
Beispiel #8
0
// Generate the rational approximation x^(pnum/pden)
void AlgRemez::generateApprox()
{
  char *fname = "generateApprox()";

  Float time = -dclock();
  iter = 0;
  spread = 1.0e37;

  if (approx_type == RATIONAL_APPROX_ZERO_POLE) {
    n--;
    neq--;
  }

  initialGuess();
  stpini(step);

  while (spread > tolerance) { //iterate until convergance

    if (iter++%100==0) 
      VRB.Result(cname,fname,"Iteration %d, spread %e delta %e\n", iter-1,(Float)spread,(Float)delta);
    equations();
    if (delta < tolerance)
      ERR.General(cname, fname,"Delta too small, try increasing precision\n");

    search(step);

  }

  int sign;
  Float error = (Float)getErr(mm[0],&sign);
  VRB.Result(cname,fname,"Converged at %d iterations, error = %e\n",
	     iter,error);

  //!< Once the approximation has been generated, calculate the roots
  if(!root()) ERR.General(cname,fname,"Root finding failed\n");
  
  if (approx_type == RATIONAL_APPROX_ZERO_POLE) {
    roots[n] = (bigfloat)0.0;
    n++;
    neq++;
  }

  //!< Now find the partial fraction expansions
  if (remez_arg->field_type == BOSON) {
    getPFE(remez_arg->residue, remez_arg->pole, &(remez_arg->norm));
    getIPFE(remez_arg->residue_inv, remez_arg->pole_inv, &(remez_arg->norm_inv));
  } else {
    getIPFE(remez_arg->residue, remez_arg->pole, &(remez_arg->norm));
    getPFE(remez_arg->residue_inv, remez_arg->pole_inv, &(remez_arg->norm_inv));
  }

  remez_arg->error = error;
  time += dclock();
  print_time(cname,fname,time);

}
Beispiel #9
0
CPS_START_NAMESPACE
/*!\file
  \brief  Definitions of functions that perform operations on complex matrices
  and vectors.

  $Id: vector_util.C,v 1.10 2013-04-19 20:25:52 chulwoo Exp $
*/
//--------------------------------------------------------------------
//  CVS keywords
//
//  $Author: chulwoo $
//  $Date: 2013-04-19 20:25:52 $
//  $Header: /home/chulwoo/CPS/repo/CVS/cps_only/cps_pp/src/util/vector/comsrc/vector_util.C,v 1.10 2013-04-19 20:25:52 chulwoo Exp $
//  $Id: vector_util.C,v 1.10 2013-04-19 20:25:52 chulwoo Exp $
//  $Name: not supported by cvs2svn $
//  $Locker:  $
//  $Revision: 1.10 $
//  $Source: /home/chulwoo/CPS/repo/CVS/cps_only/cps_pp/src/util/vector/comsrc/vector_util.C,v $
//  $State: Exp $
//
//--------------------------------------------------------------------
/*------------------------------------------------------------------*/
/*
   For these functions there exists optimized assembly 
   code.
*/
/*------------------------------------------------------------------*/

CPS_END_NAMESPACE
#include <string.h>		/* memcpy */
#include <util/vector.h>
#include <util/time_cps.h>
//#include<omp.h>
CPS_START_NAMESPACE


/*!
  \param b The vector to be copied to
  \param a The vector to be copied from.
  \param len The number of bytes to be copied.

   The arrays \a c and \a b must not alias each other.
*/
//---------------------------------------------------------------//
void moveMem(void *b, const void *a, int len) 
{
#undef PROFILE
#ifdef PROFILE
    double time  = -dclock();
#endif
    memcpy(b, a, len); 
#ifdef PROFILE
    time += dclock();
    print_flops("","moveMem",len,time);
#endif
}
Beispiel #10
0
//Parallel transport of a vector through one hop
void PT::vec(int n, IFloat **vout, IFloat **vin, const int *dir){
  int i;
  static int call_num=0;
  SCUDirArgIR *SCUarg_p[2*n];
  call_num++;
  //for(int s = 0; s < GJP.VolNodeSites(); s++)
  //  {
  //    for(int t = 0; t < 4; t++)
  //	{
  //	  printf("site = %d, direction = %d\n",s,t);
  //	  for(int u = 0; u < 9; u++)
  //	    printf("%e %e\n",*(gauge_field_addr+4*GAUGE_LEN*s + GAUGE_LEN*t + 2*u),*(gauge_field_addr+4*GAUGE_LEN*s + GAUGE_LEN*t + 2*u+1));
  //	}
  //  }

#ifdef PROFILE
  Float dtime  = - dclock();
#endif
  int wire[n];
  SCUDirArgMulti SCUmulti;

  char *fname="pt_1vec";
//  VRB.Func("",fname);
	
  int non_local_dir=0;
  for(i=0;i<n;i++) wire[i] = dir[i]; // from (x,y,z,t) to (t,x,y,z)
//  for(i=0;i<n;i++) printf("wire[%d]=%d\n",i,dir[i]);
  for(i=0;i<n;i++)
  if (!local[wire[i]/2]){
    IFloat * addr = (vin[i]+VECT_LEN*offset[wire[i]]);
    SCUarg_p[2*non_local_dir] = SCUarg[0][2*wire[i]];
    SCUarg_p[2*non_local_dir+1] = SCUarg[0][2*wire[i]+1];
    SCUarg_p[2*non_local_dir+1]->Addr((void *)addr);
    non_local_dir++;
  }
  if(non_local_dir){
    SCUmulti.Init(SCUarg_p,non_local_dir*2);
    SCUmulti.SlowStartTrans();
  }
	
  for(i=0;i<n;i++) 
    partrans_cmv_agg(local_chi[wire[i]],(long)uc_l[wire[i]], (long)vin[i],(long)vout[i]);
	
  if(non_local_dir){ SCUmulti.TransComplete(); }

  for(i=0;i<n;i++) 
    partrans_cmv_agg(non_local_chi[wire[i]],(long)uc_nl[wire[i]], (long)rcv_buf[wire[i]],(long)vout[i]);

#ifdef PROFILE
  dtime +=dclock();
  print_flops("",fname,66*n*vol,dtime);
#endif
  Flops +=66*n*vol;
}
Beispiel #11
0
void moveVec(Float *b, const Float *a, int len) {
#undef PROFILE
#ifdef PROFILE
    double time  = -dclock();
#endif
//    for(int i =0;i<len*6;i++) *b++ = *a++; 
    memcpy(b, a, len*sizeof(Vector)); 
#ifdef PROFILE
    time += dclock();
    print_flops("","moveVec",len*sizeof(Float),time);
#endif
}
ForceArg GimprRect::EvolveMomGforce(Matrix *mom, Float dt){
  char *fname = "EvolveMomGforce(M*,F)";
  VRB.Func(cname,fname);

  Float L1=0.0;
  Float L2=0.0;
  Float Linf=0.0;

#ifdef PROFILE
  Float time = -dclock();
  ForceFlops = 0;
#endif
  
  setCbufCntrlReg(4, CBUF_MODE4);

  int x[4];
  
  for(x[0] = 0; x[0] < GJP.XnodeSites(); ++x[0])
  for(x[1] = 0; x[1] < GJP.YnodeSites(); ++x[1])
  for(x[2] = 0; x[2] < GJP.ZnodeSites(); ++x[2])
  for(x[3] = 0; x[3] < GJP.TnodeSites(); ++x[3]) {

    int uoff = GsiteOffset(x);

    for (int mu = 0; mu < 4; ++mu) {
      GforceSite(*mp0, x, mu);

      IFloat *ihp = (IFloat *)(mom+uoff+mu);
      IFloat *dotp = (IFloat *)mp0;
      fTimesV1PlusV2(ihp, dt, dotp, ihp, 18);
      Float norm = ((Matrix*)dotp)->norm();
      Float tmp = sqrt(norm);
      L1 += tmp;
      L2 += norm;
      Linf = (tmp>Linf ? tmp : Linf);
   }
  }
  ForceFlops +=GJP.VolNodeSites()*4*18*2;
#ifdef PROFILE
  time += dclock();
  print_flops(cname,fname,ForceFlops,time);
#endif

  glb_sum(&L1);
  glb_sum(&L2);
  glb_max(&Linf);

  L1 /= 4.0*GJP.VolSites();
  L2 /= 4.0*GJP.VolSites();

  VRB.FuncEnd(cname,fname);
  return ForceArg(dt*L1, dt*sqrt(L2), dt*Linf);
}
Beispiel #13
0
//!< Calculate gauge contribution to the Hamiltonian
Float AlgMomentum::energy() {
  Float dtime = -dclock();

  const char *fname = "energy()";
  Lattice &lat = LatticeFactory::Create(F_CLASS_NONE, G_CLASS_NONE);
  Float h = lat.MomHamiltonNode(mom);
  LatticeFactory::Destroy();

  dtime += dclock();
  print_flops(cname, fname, 0, dtime);

  return h;
}
Beispiel #14
0
//!< evolve method evolves the gauge field due to the momentum
void AlgMomentum::evolve(Float dt, int steps) 
{
  const char *fname = "evolve()";
  Float dtime = -dclock();

  Lattice &lat = LatticeFactory::Create(F_CLASS_NONE, G_CLASS_NONE);
  for (int i=0; i<steps; i++) lat.EvolveGfield(mom, dt);
  lat.MdTimeInc(dt*steps);
  VRB.Flow(cname,fname,"%s%f\n", md_time_str, IFloat(lat.MdTime()));
  LatticeFactory::Destroy();

  dtime += dclock();
  print_flops(cname, fname, 1968. * 4. * GJP.VolNodeSites() * steps, dtime);
}
Beispiel #15
0
int main(int argc, char *argv[]) {
  unsigned n;
  int evt;
  double *A;
  int i, j;
  double checksum = 0;
  double startTime, endTime;
  long long counter;

  if (argc < 2) {
    return -1;
  }

  n = atoi(argv[1]);
  evt = (argc > 2) ? atoi(argv[2]) : -1;

  A = randomMatrix(n);
  assert(A != NULL);

  if (evt == -1) {
    startTime = dclock();
  } else {
    papi_init(evt);
    papi_start();
  }

  if (chol(A, n)) {
    fprintf(stderr, "Error: matrix is either not symmetric or not positive definite.\n");
  } else {
    for (i = 0; i < n; i++) {
        for (j = i; j < n; j++) {
            checksum += A[IDX(i, j, n)];
        }
    }
    printf("Checksum: %f \n", checksum);
  }

  if (evt == -1) {
    endTime = dclock();
    fprintf(stderr, "%f\n", endTime - startTime);

  } else {
    counter = papi_stop();
    fprintf(stderr, "%lld\n", counter);
  }

  free(A);
  return 0;
}
void wilson_vector_hqet_src(field_offset out, field_offset in, int spin, int tB)
{
  register int i;
  register site *s; 
  int colour ; 
  double t_start ;


  t_start = dclock() ; 


  FORALLSITES(i,s) 
  {

    /*** zero the hqet source   **/
    zero_zu3_matrix( (su3_matrix *)F_PT(s,out) );

    if( s->t == tB)
    {
      for(colour = 0 ; colour < 3 ; ++colour)
	((su3_matrix *)F_PT(s,out))->e[colour][colour] 
	  = ((wilson_vector *)F_PT(s,in))->d[spin].c[colour] ;

    }



  } /** end of the loop over lattice sites ****/
Beispiel #17
0
void hopping(field_offset src, field_offset temp,
	       field_offset light_quark,
	       int nhop, Real kappa_c, int parity_of_source,
	   int color, int spin, int wallflag, FILE * fp_m_out, int fb_m_out)
{
  double dtime ;
/**  double dtime1;  ****/
  int N_iter;
  register int i;
  register site *s;
  Real size_src, size_r;
  int old_parity, new_parity = 0x00, channel;
  double **meson_prop;
  wilson_vector *light_wall = NULL, *heavy_wall = NULL;



  /* Start Hopping */

  dtime = -dclock();


  /* Normalisation  */
  size_src = 0.0;
  FORSOMEPARITY(i, s, parity_of_source)
  {
    size_src += magsq_wvec(((wilson_vector *) F_PT(s, src)));
  }
void contract_hqet_to_light(complex *corr, 
			    field_offset q_zonked, 
			    field_offset q_zonked_rot,
			    field_offset q_sequential,
			    int vel_pt,  int zonked_pt, int spect_pt)
{
  double t_start ;
  int base_pt, q_stride, op_stride ;

  t_start = dclock() ;

  /* Compute partial offset for storage of result in corr[] */

  base_pt    = HQET_FORM_WHERE(0,zonked_pt,spect_pt,0,vel_pt, 0 ) ; 
  q_stride   = HQET_FORM_WHERE(0,0,        0,       1,0,      0 ) ;
  op_stride  = HQET_FORM_WHERE(0,0,        0,       0,0,      1 ) ;

  /* First, contract zonked and sequential */

  meson_cont_mom(corr , q_zonked, q_sequential, 
		 base_pt, q_stride, op_stride, 
		 hqet_to_light, MAX_THREEPT) ;

  /* Second, contract rotated zonked and sequential 
     Results go to second half of corr */

  base_pt += op_stride*MAX_THREEPT;
  meson_cont_mom(corr , q_zonked_rot, q_sequential, 
		 base_pt, q_stride, op_stride, 
		 hqet_to_light, MAX_THREEPT) ;

  IF_VERBOSE_ON(1)
    printf("contract_hqet_to_light::Time to Wick contract hqet-->light correlators = %g sec\n",dclock() - t_start) ;
  
} 
void update_time(void)
{
  double c;

  c        = dclock();
  runtime += (c-cprev);
  cprev    = c;
}
Beispiel #20
0
SPDP dtime()
{
 SPDP q;

 q = dclock();

 return q;
}
Beispiel #21
0
//!< Heat Bath for the conjugate momentum
void AlgMomentum::heatbath() {

  const char *fname = "heatbath()";
  Float dtime = -dclock();

  Lattice &lat = LatticeFactory::Create(F_CLASS_NONE, G_CLASS_NONE);
  lat.RandGaussAntiHermMatrix(mom, 1.0);

  //!< reset MD time in Lattice (a momentum refresh means a new trajectory)
  lat.MdTime(0.0);
  VRB.Flow(cname,fname,"%s%f\n", md_time_str, IFloat(lat.MdTime()));
      
  LatticeFactory::Destroy();
  
  dtime += dclock();
  print_flops(cname, fname, 0, dtime);
}
Beispiel #22
0
double dtime()
{
 double q;

 q = dclock();

 return q;
}
Beispiel #23
0
void moveFloat(Float *b, const Float *a, int len) {
#undef PROFILE
#ifdef PROFILE
    double time  = -dclock();
#endif

#ifdef USE_OMP
#pragma omp parallel for
    for(int i =0;i<len;i++) b[i] = a[i];
#else
    memcpy(b, a, len*sizeof(Float)); 
#endif
#ifdef PROFILE
    time += dclock();
    print_flops("","moveFloat",len*sizeof(Float),time);
#endif
}
/*--------------------------------------------------------------------*/
void print_timing(double dtime, char *str){

#ifdef PRTIME
  dtime += dclock();
  node0_printf("Time for %s %e\n",str, dtime);  fflush(stdout);
#endif

}
static void
create_qop_links_from_milc_fn(ferm_links_t *fn)
{
  double remaptime;
  char myname[] = "create_qop_links_from_milc";

  remaptime = -dclock();

  DESTROY_QOP_ASQTAD_FERMION_LINKS(fn);
  fn->QOP_L = CREATE_L_FROM_FIELDS(fn->fat, fn->lng, EVENANDODD);
  remaptime += dclock();

#ifdef LLTIME
#ifdef REMAP
  node0_printf("LLREMAP:  time = %e\n",remaptime);
#endif
#endif
}
void load_fn_links_gpu(info_t *info, fn_links_t *fn, ks_action_paths *ap,
		       su3_matrix *links, int want_back)
{
  ks_component_paths *p = &ap->p;
  double final_flop = 0;
  double dtime = -dclock();

  load_fatlonglinks_gpu(info, fn->fat, fn->lng, p, links);

  if(want_back)
    load_fn_backlinks(fn);
  else
    destroy_fn_backlinks(fn);

  dtime += dclock();
  info->final_sec = dtime;
  info->final_flop = final_flop;
}
Beispiel #27
0
static void __timer_reset(mtimer_t * timer)
{
#ifdef XT3
	timer->starttime = timer->stoptime = dclock();
#else
	gettimeofday(&timer->start_time, 0);
	timer->stop_time = timer->start_time;
#endif
}
Beispiel #28
0
int main( int argc, const char* argv[] )
{

  FILE *fp = init_file(argv[0] + 2);
  int iret;
  for (SIZE = 8; SIZE <= 512; SIZE += 8) {
    int i,j;
    double first[SIZE][SIZE];
    double second[SIZE][SIZE];
    double multiply[SIZE][SIZE];
    double dtime;
    double gflops;

    for (i = 0; i < SIZE; i++) { //rows in first
      for (j = 0; j < SIZE; j++) { //columns in first
        first[i][j]=i+j;
        second[i][j]=i-j;
        multiply[i][j]=0.0;
      }
    }

    papi_init();
    dtime = dclock();
    iret = mm(first,second,multiply);
    dtime = dclock()-dtime;
    fprintf(fp, "%d, ", SIZE);
    papi_results(fp);   
 
    gflops = 2.0 * SIZE * SIZE * SIZE * 1e-9 / dtime;

    printf( "%d, %le, %f\n", SIZE, dtime, gflops);

    //double check=0.0;
    //for(i=0;i<SIZE;i++){
    //  for(j=0;j<SIZE;j++){
    //    check+=multiply[i][j];
    //  }
    //}
    //printf("check %le \n",check);
    fflush( stdout );
  }

  return iret;
}
Beispiel #29
0
/* cray timers */
void metric_read_craytimers(int tid, int idx, double values[]) {
#ifdef  CRAY_TIMERS
#ifdef TAU_CATAMOUNT /* for Cray XT3 */
  values[idx] = dclock() * 1.0e6;
#else /* for Cray X1 */
  long long tick = _rtc();
  values[idx] = (double)tick / HZ;
#endif /* TAU_CATAMOUNT */
#endif /* CRAY_TIMERS */
}
/*--------------------------------------------------------------------*/
double start_timing(void){
  double dtime;

#ifdef PRTIME
  dtime = -dclock();
#else
  dtime = 0;
#endif
  return dtime;
}