void bfmcommIroIro<Float>::comm (int result_cb, Fermion_t psi,int dag) { comm_start(result_cb,psi,dag); comm_complete(result_cb,psi); }
void wfm::dslash(Float *chi, Float *u, Float *psi, int cb, int dag) { /* * To a first approximation, we simply * remap the arguments into a form acceptable * to the assembler, then call it */ /* *Pull in the first Psi to cache early */ cache_touch(psi); cache_touch(psi+4); cache_touch(psi+8); cache_touch(psi+12); cache_touch(psi+16); cache_touch(psi+20); decom(psi,u,cb,dag); #ifdef DEBUG_BENCHMARK_COMMS double ndata = 2*2*allbound * 12 * TwoSpinSize() * 1.0E-6 * 100; struct timeval start,stop,delta; gettimeofday(&start,NULL); for(int i=0;i<100;i++) { #endif comm_start(cb); /* * Hackers: you could split here and do something else... * Such as DWF fith dimension, or a clover term etc... * Might as well pull in a few sites worth of pointer table * while we're waiting for the comms */ comm_complete(cb); #ifdef DEBUG_BENCHMARK_COMMS } gettimeofday(&stop,NULL); timersub(&stop,&start,&delta); double seconds = delta.tv_usec * 1.0E-6 + delta.tv_sec; if ( isBoss() ) printf("Comms %le MB in %le seconds = %le MB/s\n",ndata,seconds,ndata/seconds); ndata = 2*2*allbound * 12 * TwoSpinSize() ; if ( isBoss() ) printf("ndata = %d \n",ndata); #endif #ifdef DEBUG_OUTPUT_VECTORS static int file_cnt; { char buf[256]; sprintf(buf,"2spin.%d.%d",UniqueID(),file_cnt); FILE *fp = fopen(buf,"w"); for(int i=0;i<vol;i++) { for(int pm=0;pm<2;pm++){ for(int mu=0;mu<4;mu++){ int offset = interleave_site(pm,mu,i); for(int s=0;s<2;s++){ for(int c=0;c<3;c++){ for(int r=0;r<2;r++){ int scri; if ( WFM_BGL ) scri = r + s*6+c*2; else scri = r + s*2+c*4; int gbl[4]; site_to_global(cb, i, gbl, local_latt ); if ( SloppyPrecision ) { float * pointer = (float *) two_spinor; fprintf(fp,"%d %d %d %d %d %d %d %d %d %e\n",gbl[0],gbl[1],gbl[2],gbl[3], pm,mu,s,c,r,pointer[PAD_HALF_SPINOR_SIZE*offset+scri]); } else { Float * pointer = (Float *) two_spinor; fprintf(fp,"%d %d %d %d %d %d %d %d %d %e\n",gbl[0],gbl[1],gbl[2],gbl[3], pm,mu,s,c,r,pointer[PAD_HALF_SPINOR_SIZE*offset+scri]); } }}} } } } fclose(fp);} #endif cache_touch(two_spinor); cache_touch(two_spinor+4); cache_touch(two_spinor+8); recon(chi,u,cb,dag); #ifdef DEBUG_OUTPUT_VECTORS { char buf[256]; sprintf(buf,"recon.%d.%d",UniqueID(),file_cnt++); FILE *fp = fopen(buf,"w"); for(int i=0;i<vol;i++) { for(int pm=0;pm<2;pm++){ for(int mu=0;mu<4;mu++){ int offset = interleave_site(pm,mu,i); for(int s=0;s<4;s++){ for(int c=0;c<3;c++){ for(int r=0;r<2;r++){ int scri; scri = r + s*6+c*2; Float * pointer = (Float *) chi; int gbl[4]; site_to_global(cb, i, gbl, local_latt ); fprintf(fp,"%d %d %d %d %d %d %d %d %d %d %e\n",gbl[0],gbl[1],gbl[2],gbl[3], i,pm,mu,s,c,r,pointer[SPINOR_SIZE*offset+scri]); }}} } } } fclose(fp);} exit(0); #endif return; }