void wfm::recon(Float *chi, Float *u, int cb, int dag) { Float *gauge_notpar; /*Gauge args*/ int lcb = (cb + base_parity) & 1; nthread=1; if ( cb == 0 ) { gauge_notpar = (Float *)u + GAUGE_SIZE*vol; } else { gauge_notpar = (Float *)u ; } cache_touch(gauge_notpar); cb = lcb; int svol = vol/nthread; if ( (svol * nthread) != vol ) { if ( isBoss() ) printf("Bagel threading model broke\n"); exit(-1); } recon_internal_arg *args = new recon_internal_arg[nthread]; args[0].cb = cb; args[0].dag = dag; args[0].vol = svol; args[0].bgl = WFM_BGL; args[0].sloppy = SloppyPrecision; for(int i=0;i<nthread;i++){ args[i] = args[0]; args[i].chi = chi + svol*i*SPINOR_SIZE; args[i].u = gauge_notpar + svol*i*GAUGE_SIZE; args[i].two_spinor = two_spinor + (svol*i*8*PAD_HALF_SPINOR_SIZE*TwoSpinSize())/sizeof(Float); } for ( int i=1;i<nthread;i++) { thread_create(recon_internal,(void *)&args[i],i); } recon_internal(&args[0]); for ( int i=1;i<nthread;i++) { thread_join(i); } delete [] args; CPS_NAMESPACE::DiracOp::CGflops += 1320*vol; return; }
void wfm::decom(Float *psi, Float *u, int cb, int dag) { int lcb = (cb + base_parity) & 1; Float *gauge_par; /*Gauge args*/ if ( cb == 1 ) { gauge_par = (Float *)u + GAUGE_SIZE*vol; } else { gauge_par = (Float *)u ; } nthread=1; /*Turn global checkerboard into the checkerboard within this node*/ cb = lcb; int svol = vol/nthread; if ( (svol * nthread) != vol ) { if ( isBoss() ) printf("Bagel threading model broke\n"); exit(-1); } decom_internal_arg *args = new decom_internal_arg[nthread]; args[0].cb = cb; args[0].dag = dag; args[0].vol = svol; args[0].bgl = WFM_BGL; args[0].sloppy = SloppyPrecision; for(int i=0;i<nthread;i++){ args[i] = args[0]; args[i].psi = psi + svol*i*SPINOR_SIZE; args[i].u = gauge_par + svol*i*GAUGE_SIZE; args[i].shift_table = &shift_table[cb][i*svol*8]; } for ( int i=1;i<nthread;i++) { thread_create(decom_internal,(void *)&args[i],i); } decom_internal(&args[0]); for ( int i=1;i<nthread;i++) { thread_join(i); } // printf("nthread=%d\n",nthread); delete [] args; }
void wfm::init(WilsonArg *wilson_p) /* pointer to Wilson type structure */ { int spinor_words; /* size of the spinor field on the */ /* sublattice checkerboard */ int half_spinor_words; /* size of the spin-projected "half_spinors*/ /* on the sublattice checkerboard including*/ /* the communications padding */ int slx; /* x-direction size of node sublattice */ int sly; /* y-direction size of node sublattice */ int slz; /* z-direction size of node sublattice */ int slt; /* t-direction size of node sublattice */ int i; int mu; SloppyPrecision = wilson_p->SloppyPrecision; WFM_BGL = wilson_p->WFM_BGL; // if ( isBoss() ) printf("wfm::init setting up BG/L MMU state\n"); mmu_optimise(); mmu_print(); // CoreCount( wilson_p->CoreCount ); CoreCount( 1 ); if ( WFM_BGL ) PAD_HALF_SPINOR_SIZE = 12; else PAD_HALF_SPINOR_SIZE = 16; if ( WFM_BGL && (nthread > 1) && SloppyPrecision ) { if ( isBoss() ) printf("Bagel does not maintain L1 coherence in dual core + single precision mode on BlueGene\n"); if ( isBoss() ) printf("Get on to IBM to give me access to SWOA MMU options, or even better a non-cache image of DRAM\n"); if ( isBoss() ) printf("If they give me the tools, I'm happy to do the heroics of mainting sfw coherence\n"); if ( isBoss() ) printf("Bagel insanity check exiting\n"); exit(-1); } IR = wilson_p->instruction_reg_num; /*--------------------------------------------------------------------------*/ /* Set sublattice direction sizes */ /*--------------------------------------------------------------------------*/ local_latt[0] = wilson_p->local_latt[0]; local_latt[1] = wilson_p->local_latt[1]; local_latt[2] = wilson_p->local_latt[2]; local_latt[3] = wilson_p->local_latt[3]; slx = local_latt[0]; sly = local_latt[1]; slz = local_latt[2]; slt = local_latt[3]; #if (defined USE_COMMS_QMP) && (!defined UNIFORM_SEED_NO_COMMS) QMP_bool_t qmp_inited=QMP_is_initialized(); if( !qmp_inited ) { if ( isBoss() ) printf("QMP_not_initialized\n"); exit(-1); } const int *ncoor = QMP_get_logical_coordinates(); base_parity =(ncoor[0]*local_latt[0] + ncoor[1]*local_latt[1] + ncoor[2]*local_latt[2] + ncoor[3]*local_latt[3])&0x1; #else base_parity = 0; #endif /*--------------------------------------------------------------------------*/ /* Set periodic wrap back or not */ /*--------------------------------------------------------------------------*/ local_comm[0] = wilson_p->local_comm[0]; local_comm[1] = wilson_p->local_comm[1]; local_comm[2] = wilson_p->local_comm[2]; local_comm[3] = wilson_p->local_comm[3]; #ifdef UNIFORM_SEED_NO_COMMS for(int i=0;i<4;i++) if(local_comm[0]!=1){ fprintf(stderr,"wfm::local_comm[%d]=%d!\n",i,local_comm[i]); exit(-33); } #endif /*-----------------------------------------------------------------------*/ /* compute the subgrd volume of each chkbd ... at least two local dims */ /* must be even for this code to be correct. */ /*-----------------------------------------------------------------------*/ vol = (slx * sly * slz * slt)/2; nbound[0] = (sly * slz * slt)/2; nbound[1] = (slx * slz * slt)/2; nbound[2] = (slx * sly * slt)/2; nbound[3] = (slx * sly * slz)/2; allbound = nbound[0] + nbound[1] + nbound[2] + nbound[3]; if ( nbound[0] * slx * 2 != (slx*sly*slz*slt) ) { if ( isBoss() ) printf("wfm::init Even x logic bomb\n"); exit(-1); } if ( nbound[1] * sly * 2 != (slx*sly*slz*slt) ) { if ( isBoss() ) printf("wfm::init Even y logic bomb\n"); exit(-1); } if ( nbound[2] * slz * 2 != (slx*sly*slz*slt) ) { if ( isBoss() ) printf("wfm::init Even z logic bomb\n"); exit(-1); } if ( nbound[3] * slt * 2 != (slx*sly*slz*slt) ) { if ( isBoss() ) printf("wfm::init Even t logic bomb\n"); exit(-1); } /*------------------------------------------------------------------------*/ /* Check shape */ /*------------------------------------------------------------------------*/ if ( (slx&1) ) { if ( isBoss() ) printf("Bagel is refusing to run as x-sub latt is odd\n"); exit(-1); } if ( (sly&1) &&(slz&1)&&(slt&1) ) { if ( isBoss() ) printf("Bagel is refusing to run as y,z,t sub latts are all odd\n"); exit(-1); } /*--------------------------------------------------------------------------*/ /* Reserve memory for 1 temporary spinor (needed by mdagm) */ /*--------------------------------------------------------------------------*/ spinor_words = SPINOR_SIZE * vol; spinor_tmp = (Float *)ALLOC(spinor_words*sizeof(Float)*2); //printf("wfm_init::spinor_tmp=%p\n",spinor_tmp); // VRB.Flow(cname,fname,"spinor_tmp=%p\n",spinor_tmp); #ifdef USE_QALLOC // If we used QALLOC, and the ALLOC macro failed we can try // qalloc but without the QFAST flag. Even tho the spinor_tmp is // not communicated we leave the QCOMMS bit on in case it puts // spinor tmp into a better place in the memory map if(spinor_tmp == 0) { if ( isBoss() ) printf("BAGEL: Warning spinor_tmp has spilled out of Edram\n"); spinor_tmp = (Float *) qalloc(QCOMMS,spinor_words*sizeof(Float)*2); } #endif // USE QALLOC if(spinor_tmp == 0){ if ( isBoss() ) printf("wfm::spinor_tmp allocate\n"); exit(-1); } //~~ //~~ twisted mass fermions: sets WilsonArg.spinor_tmp tp //~~ address of temporary spinor in wfm class //~~ wilson_p->spinor_tmp = spinor_tmp; //~~ /*--------------------------------------------------------------------------*/ /* Reserve memory for the 4 forward and 4 backward spin projected half */ /* spinors. */ /*--------------------------------------------------------------------------*/ /*PAB 10/1/2001 */ half_spinor_words = NMinusPlus * ND * PAD_HALF_SPINOR_SIZE * vol; #ifndef USE_COMMS_QMP two_spinor = (Float *)ALLOC(half_spinor_words*sizeof(Float)); #ifdef USE_QALLOC // If we are using QALLOC and the ALLOC macro failed we can still // try to get slow memory. Leave on the QCOMMS bit for good memory map // placement if(two_spinor == 0) { if ( isBoss() ) printf("BAGEL : warning two spinors have spilled out of Edram\n"); two_spinor = (Float *)qalloc(QCOMMS,half_spinor_words*sizeof(Float)); } #endif // USE_QALLOC if(two_spinor == 0){ if ( isBoss() ) printf("wfm::two_spinor allocate\n"); exit(-1); } #else // Since two spinor is now communicated because of the Tface // receive I have to allocate it in the style of QMP two_spinor_mem_t = QMP_allocate_aligned_memory( half_spinor_words*sizeof(Float), WFM_ALIGN_ARG, (QMP_MEM_COMMS|QMP_MEM_FAST)); if( two_spinor_mem_t == 0x0 ) { // Try slow allocation two_spinor_mem_t = QMP_allocate_aligned_memory( half_spinor_words*sizeof(Float), WFM_ALIGN_ARG, QMP_MEM_COMMS); if( two_spinor_mem_t == 0x0 ) { if ( isBoss() ) printf("wfm_init::could not allocate two spinor_mem_t\n"); exit(-1); } } two_spinor = (Float *)QMP_get_memory_pointer(two_spinor_mem_t); if (two_spinor == 0x0) { if ( isBoss() ) printf("wfm::init QMP_get_memory_pointer returned NULL pointer from non NULL QMP_mem_t\n"); exit(-1); } #endif /*--------------------------------------------------------------------------*/ /* Reserve memory for the 4 forward and 4 backward spin projected half */ /* spinors. */ /*--------------------------------------------------------------------------*/ for ( int pm = 0;pm<2;pm++ ) { for ( mu = 0 ; mu < 4 ; mu++) if (local_comm[mu]==0) { half_spinor_words = PAD_HALF_SPINOR_SIZE * nbound[mu]; // These things are (potentially) communicated so need QMP Style // allocation if using QMP // // Note: I am allocating the buffers in all directions regardless // of whether we are communicating in that dir or not (Copying CPS) #ifndef USE_COMMS_QMP // Not using QMP recv_bufs[pm][mu] = (Float *)ALLOC(half_spinor_words*sizeof(Float)); #ifdef USE_QALLOC // If ALLOC fails try slow memory but with QCOMMS bit still set if( recv_bufs[pm][mu] == 0x0 ) recv_bufs[pm][mu] = (Float *)qalloc(QCOMMS, half_spinor_words*sizeof(Float)); #endif if(recv_bufs[pm][mu] == 0){ if ( isBoss() ) printf("wfm::recv_bufs allocate\n"); exit(-1); } send_bufs[pm][mu]=(Float *)SEND_ALLOC(half_spinor_words*sizeof(Float)); #ifdef USE_QALLOC // If SEND ALLOC macro fails try slow memory but with QNONCACHE bit // still set if( send_bufs[pm][mu] == 0 ) send_bufs[pm][mu]=(Float *)qalloc(QNONCACHE, half_spinor_words*sizeof(Float)); #endif if(send_bufs[pm][mu] == 0){ if ( isBoss() ) printf("wfm::send_bufs allocate\n"); exit(-1); } #else /* QMP memory allocation: A little involved */ /* Must allocate "opaque" QMP_mem_t first and then get aligned pointer out of it. It's either what is below or a very complicated send alloc */ /* Peter in the CPS allocs recv_bufs with ALLOC = QCOMMS|FAST */ recv_bufs_mem_t[pm][mu] = QMP_allocate_aligned_memory(half_spinor_words*sizeof(Float), WFM_ALIGN_ARG, (QMP_MEM_COMMS|QMP_MEM_FAST)); if( recv_bufs_mem_t[pm][mu] == 0x0 ) { // If QMP_allocate memory fails with FAST, try SLOW but keep COMMS recv_bufs_mem_t[pm][mu] = QMP_allocate_aligned_memory(half_spinor_words*sizeof(Float), WFM_ALIGN_ARG, QMP_MEM_COMMS); if( recv_bufs_mem_t[pm][mu] == 0x0 ) { if ( isBoss() ) printf("wfm::init recv_bufs_mem_t[%d][%d]: QMP_allocate_aligned_memory returned NULL\n", pm, mu); exit(-1); } } /* Now get the aligned pointer */ recv_bufs[pm][mu] =(Float *)QMP_get_memory_pointer(recv_bufs_mem_t[pm][mu]); if( recv_bufs[pm][mu] == 0x0 ) { if ( isBoss() ) printf("wfm::init recv_bufs[%d][%d]: NULL aligned pointer in non NULL QMP_mem_t struct \n", pm, mu); exit(-1); } /* Now do the same for the send bufs */ /* In CPS Peter allocates as SEND_ALLOC = QNONCACHE | QFAST */ send_bufs_mem_t[pm][mu] = QMP_allocate_aligned_memory(half_spinor_words*sizeof(Float), WFM_ALIGN_ARG, (QMP_MEM_NONCACHE|QMP_MEM_FAST)); if( send_bufs_mem_t[pm][mu] == 0x0 ) { // if allocator fails, try slow but still NONCACHE send_bufs_mem_t[pm][mu] = QMP_allocate_aligned_memory(half_spinor_words*sizeof(Float), WFM_ALIGN_ARG, QMP_MEM_NONCACHE); if( send_bufs_mem_t[pm][mu] == 0x0 ) { if ( isBoss() ) printf("wfm::init: send_bufs_mem_t[%d][%d]: QMP_allocate_aligned_memory returned NULL\n", pm, mu); exit(-1); } } /* Now get the aligned pointer */ send_bufs[pm][mu] =(Float *)QMP_get_memory_pointer(send_bufs_mem_t[pm][mu]); if( send_bufs[pm][mu] == 0x0 ) { if ( isBoss() ) printf("wfm::init send_bufs[%d][%d]: NULL aligned pointer in non NULL QMP_mem_t struct \n", pm, mu); exit(-1); } #endif } } /*----------------------------------------------------------------------*/ /* Build the pointer table */ /*----------------------------------------------------------------------*/ pointers_init(); /*----------------------------------------------------------------------*/ /* Initialise the comms */ /*----------------------------------------------------------------------*/ comm_init(); }
void wfm::pointers_init(void) { int mu; int cb, pm; int shift; int x,y,z,t; int lx,ly,lz,lt; int SizeofTwoSpin; int bound_index[Ncb][NMinusPlus][ND]; int local_p,shift_p; int local_addr[ND]; int shift_addr[ND]; int psite,shift_psite; int offset,tab,table_size; void *bit_bucket; lx = local_latt[0]; ly = local_latt[1]; lz = local_latt[2]; lt = local_latt[3]; bit_bucket = ALLOC(32); #ifndef UNIFORM_SEED_TESTING if ( SloppyPrecision ) { if ( isBoss() ) printf("Configuring for MIXED precision kernels: word sizes %d,%d\n",sizeof(Float),sizeof(float)); } else { if ( isBoss() ) printf("Configuring for uniform precision : word size %d\n",sizeof(Float)); } if ( WFM_BGL ) { if ( isBoss() ) printf("Configuring for BG/L kernels\n"); } else { if ( isBoss() ) printf("Configuring for QCDOC kernels\n"); } #endif if ( SloppyPrecision ) { SizeofTwoSpin = sizeof(float); } else { SizeofTwoSpin = sizeof(Float); } #ifndef UNIFORM_SEED_TESTING if ( isBoss() ) printf("Padded 2-spinor size is %d words, %d bytes\n",PAD_HALF_SPINOR_SIZE, PAD_HALF_SPINOR_SIZE*SizeofTwoSpin); #endif for ( cb = 0 ; cb<2 ; cb++) { //ND * plus_minus * hvol * parities table_size = NMinusPlus*(vol+1)*Nmu; shift_table[cb]=(unsigned long *)ALLOC( table_size*sizeof(unsigned long)); #ifdef USE_QALLOC // If using qalloc allocator and FAST alloc fails, try slow alloc if( shift_table[cb] == 0x0 ) shift_table[cb]=(unsigned long *)qalloc(QCOMMS,table_size*sizeof(unsigned long)); #endif if( shift_table[cb] == 0x0 ) { if ( isBoss() ) printf("wfm::pointers_init: shift_tables[%d] alloc failed\n",cb); exit(-1); } } for(cb=0; cb<2; cb++) { for(pm=0; pm<2; pm++) { for(mu=0; mu<4; mu++) { face_table[cb][pm][mu]= (unsigned long *) ALLOC(allbound*sizeof(unsigned long)); #ifdef USE_QALLOC // IF using qalloc allocator and FAST alloc fails try slow // ALLOC automatically sets the QCOMM bit. We are not // going to communicate this, but it may help in setting it // in a better place in the memory map if we also set it if( face_table[cb][pm][mu] == 0x0 ) face_table[cb][pm][mu] = (unsigned long *) qalloc(QCOMMS,table_size*sizeof(unsigned long)); #endif // Final Falure if( face_table[cb][pm][mu] == 0x0 ) { if ( isBoss() ) printf("wfm::pointers_init: shift_tables[%d] alloc failed\n",cb); exit(-1); } } } } for( mu = 0 ; mu < Nmu ; mu++ ) { bound_index[0][0][mu] = 0; bound_index[0][1][mu] = 0; bound_index[1][0][mu] = 0; bound_index[1][1][mu] = 0; } /* * For now point send buffers beyond end of body. * wfm::scale_ptr will remap this to the real send buffer, once * it gets assigned with a pointer value. */ /* This bit points to the first element after the end of the body for send_offset[Minus][0] */ send_offset[Minus][0] = 8* vol; /*We interleave the 8 2spinors*/ /* Now we do the rest of the send offsets packing densely. nbound[mu-1] sites after the previous */ send_offset[Minus][1] = send_offset[Minus][0] + nbound[0]; send_offset[Minus][2] = send_offset[Minus][1] + nbound[1]; send_offset[Minus][3] = send_offset[Minus][2] + nbound[2]; /* Now the PLUS directions. They are the same as the minus directions pushed by allbound */ send_offset[Plus][ 0 ] = send_offset[Minus][0] + allbound; send_offset[Plus][ 1 ] = send_offset[Minus][1] + allbound; send_offset[Plus][ 2 ] = send_offset[Minus][2] + allbound; send_offset[Plus][ 3 ] = send_offset[Minus][3] + allbound; /* Now work out the shifts */ for ( pm = 0; pm<2; pm++ ) { /* Shift direction */ if ( pm == Plus ) { /*forwards or backwards*/ shift = 1; } else { shift = -1; } /*Loop naively over local lattice*/ for ( t=0 ; t<lt ; t++ ) { local_addr[3] = t; for ( z=0 ; z<lz ; z++ ) { local_addr[2] = z; for ( y=0 ; y<ly ; y++ ) { local_addr[1] = y; for ( x=0 ; x<lx ; x++ ) { local_addr[0] = x; /* local_addr contains the coordinates of the point */ /* get the parity of the point */ local_p = wfm::local_parity(x,y,z,t); /* get the site of the point for that parity */ psite = wfm::local_psite(local_addr,local_latt); for ( mu = 0 ; mu < 4; mu ++) { /* Get the coordinates of the point shifted in direction +/- mu. (+/- is encoded in shift, depending on where we are in the +/- loop) */ shift_addr[0] = x; shift_addr[1] = y; shift_addr[2] = z; shift_addr[3] = t; shift_addr[mu] += shift; /* Parity of shifted site is opposite that of the unshifted one */ shift_p = 1-local_p ; /* * Offset to the source site in the 2 spinor array is common */ /* Get the interleaved site address of the local point (from which we are shifting */ tab = wfm::interleave_site(pm,mu,psite); /* * Get the offset to the destination site in the 2 spinor array * If destination is in interior, trivial. * Also implement periodic wrap if local_comm[mu]. */ if (((shift_addr[mu] >= 0 ) && (shift_addr[mu]<local_latt[mu]) ) || local_comm[mu] ) { /*Local periodicity, does nothing if interior*/ shift_addr[mu]= (shift_addr[mu] + local_latt[mu])%local_latt[mu]; shift_psite = local_psite(shift_addr,local_latt); offset = interleave_site(pm,mu,shift_psite) ; shift_table[local_p][tab] = ((unsigned long)two_spinor + offset * PAD_HALF_SPINOR_SIZE * SizeofTwoSpin); } else { /*non-local and we're on the boundary*/ /*Local periodicity, does nothing if interior*/ shift_addr[mu]= (shift_addr[mu] + local_latt[mu])%local_latt[mu]; shift_psite = local_psite(shift_addr,local_latt); offset = interleave_site(pm,mu,shift_psite) ; /* * The minus face receives something written by the * plus face. Thus the face table should contain the pointer * to the site we would have sent to in the above case. */ face_table[local_p][pm][mu][bound_index[local_p][pm][mu]] = offset; /* * And we use face_table[local_p][pm][mu] in conjunction * with the send buffer[pm][mu] and source parity local_p */ offset = bound_index[local_p][pm][mu]; shift_table[local_p][tab] = (unsigned long) send_bufs[pm][mu] + offset * PAD_HALF_SPINOR_SIZE * SizeofTwoSpin; bound_index[local_p][pm][mu] ++; } } /*mu*/ }/*x*/ }/*y*/ }/*z*/ }/*t*/ }/*pm*/ int site = (lx*ly*lz*lt)/2 ; for(pm=0; pm<2; pm++) { for(mu=0; mu<3; mu++) { tab = interleave_site(pm,mu,site); shift_table[0][tab] = (unsigned long) bit_bucket; shift_table[1][tab] = (unsigned long) bit_bucket; } } // int bound_index[Ncb][NMinusPlus][ND]; for( cb = 0 ; cb < 2 ; cb++ ) { for( pm = 0 ; pm < 2 ; pm++ ) { for( mu = 0 ; mu < Nmu ; mu++ ) { if ( !local_comm[mu] ) { if ( bound_index[cb][pm][mu] != nbound[mu] ) { printf("Boundary size mismatch[%d][%d][mu=%d] : %d != %d \b", cb,pm,mu,bound_index[cb][local_p][mu],nbound[mu]); exit(-1); } } else { if ( bound_index[cb][pm][mu] != 0 ) { printf("Boundary size mismatch[%d][%d] mu=%d\b", cb,pm,mu); exit(-1); } } } } } // scale_ptr(); return; }
void wfm::dslash(Float *chi, Float *u, Float *psi, int cb, int dag) { /* * To a first approximation, we simply * remap the arguments into a form acceptable * to the assembler, then call it */ /* *Pull in the first Psi to cache early */ cache_touch(psi); cache_touch(psi+4); cache_touch(psi+8); cache_touch(psi+12); cache_touch(psi+16); cache_touch(psi+20); decom(psi,u,cb,dag); #ifdef DEBUG_BENCHMARK_COMMS double ndata = 2*2*allbound * 12 * TwoSpinSize() * 1.0E-6 * 100; struct timeval start,stop,delta; gettimeofday(&start,NULL); for(int i=0;i<100;i++) { #endif comm_start(cb); /* * Hackers: you could split here and do something else... * Such as DWF fith dimension, or a clover term etc... * Might as well pull in a few sites worth of pointer table * while we're waiting for the comms */ comm_complete(cb); #ifdef DEBUG_BENCHMARK_COMMS } gettimeofday(&stop,NULL); timersub(&stop,&start,&delta); double seconds = delta.tv_usec * 1.0E-6 + delta.tv_sec; if ( isBoss() ) printf("Comms %le MB in %le seconds = %le MB/s\n",ndata,seconds,ndata/seconds); ndata = 2*2*allbound * 12 * TwoSpinSize() ; if ( isBoss() ) printf("ndata = %d \n",ndata); #endif #ifdef DEBUG_OUTPUT_VECTORS static int file_cnt; { char buf[256]; sprintf(buf,"2spin.%d.%d",UniqueID(),file_cnt); FILE *fp = fopen(buf,"w"); for(int i=0;i<vol;i++) { for(int pm=0;pm<2;pm++){ for(int mu=0;mu<4;mu++){ int offset = interleave_site(pm,mu,i); for(int s=0;s<2;s++){ for(int c=0;c<3;c++){ for(int r=0;r<2;r++){ int scri; if ( WFM_BGL ) scri = r + s*6+c*2; else scri = r + s*2+c*4; int gbl[4]; site_to_global(cb, i, gbl, local_latt ); if ( SloppyPrecision ) { float * pointer = (float *) two_spinor; fprintf(fp,"%d %d %d %d %d %d %d %d %d %e\n",gbl[0],gbl[1],gbl[2],gbl[3], pm,mu,s,c,r,pointer[PAD_HALF_SPINOR_SIZE*offset+scri]); } else { Float * pointer = (Float *) two_spinor; fprintf(fp,"%d %d %d %d %d %d %d %d %d %e\n",gbl[0],gbl[1],gbl[2],gbl[3], pm,mu,s,c,r,pointer[PAD_HALF_SPINOR_SIZE*offset+scri]); } }}} } } } fclose(fp);} #endif cache_touch(two_spinor); cache_touch(two_spinor+4); cache_touch(two_spinor+8); recon(chi,u,cb,dag); #ifdef DEBUG_OUTPUT_VECTORS { char buf[256]; sprintf(buf,"recon.%d.%d",UniqueID(),file_cnt++); FILE *fp = fopen(buf,"w"); for(int i=0;i<vol;i++) { for(int pm=0;pm<2;pm++){ for(int mu=0;mu<4;mu++){ int offset = interleave_site(pm,mu,i); for(int s=0;s<4;s++){ for(int c=0;c<3;c++){ for(int r=0;r<2;r++){ int scri; scri = r + s*6+c*2; Float * pointer = (Float *) chi; int gbl[4]; site_to_global(cb, i, gbl, local_latt ); fprintf(fp,"%d %d %d %d %d %d %d %d %d %d %e\n",gbl[0],gbl[1],gbl[2],gbl[3], i,pm,mu,s,c,r,pointer[SPINOR_SIZE*offset+scri]); }}} } } } fclose(fp);} exit(0); #endif return; }
bool PlayerBullet::onImpact(Entity* hit) { auto impact = GameGlobals::get()->impact; auto position = getComponent<sz::Transform>()->getPosition(); auto color = getComponent<sz::Renderer>()->getColor(); impact->setColor(color); if(bulletStance == PlayerEntity::Stance::Offensive) impact->setScale(0.6f, 1.f + (thor::random(0, 10) >= 9 ? 1.5f : 0.f)); else impact->setScale(0.3f, 0.6f + (thor::random(0, 10) >= 9 ? 0.6f : 0.f)); impact->setPosition(position); impact->emit(1); auto bits = GameGlobals::get()->impactbits; bits->setColor(color); bits->setScale(0.02f, 0.1f); bits->setPosition(position); for(int i=0; i < 50; ++i) { float v = thor::random(0.f, 250.f); bits->setVelocityCone(v, m_angle + (thor::random(0, 2) == 0 ? -PI : 0.f), thor::random(0.f, 50.f)); bits->emit(1); } if(bulletStance == PlayerEntity::Stance::Offensive) { auto close = getComponent<sz::Physics>()->queryRadius(150.f); for(auto it = close.begin(); it != close.end(); ++it) { auto enemy = dynamic_cast<EnemyEntityBase*>(*it); if(!enemy || enemy == hit || enemy->isBoss()) continue; auto enemyPos = enemy->call(&sz::Transform::getPosition); float dist = sz::distance(position, enemyPos) / 22500.f; dist = std::min(1.f, dist); float falloff = (1.f - (dist * dist)) * 0.65f; float angle = sz::getAngle(position, enemyPos); enemy->applyDamage(DamageReport(m_shooter, m_damage * falloff, angle)); } } auto enemy = dynamic_cast<EnemyEntityBase*>(hit); if(enemy) { auto pos = enemy->getTransform->getPosition(); if(!enemy->isBoss()) { impact->setColor(sf::Color::White); impact->setScale(0.3f, 0.4f + (thor::random(0, 10) >= 9 ? 0.3f : 0.f)); } else { impact->setColor(color); impact->setScale(1.f, 1.4f + (thor::random(0, 10) >= 9 ? 0.4f : 0.f)); } impact->setPosition(pos + sf::Vector2f(thor::random(-30.f, 30.f), thor::random(-30.f, 30.f))); impact->emit(1); impact->setPosition(pos + sf::Vector2f(thor::random(-30.f, 30.f), thor::random(-30.f, 30.f))); impact->emit(1); } { auto boss = dynamic_cast<EnemyBossRed*>(hit); if(boss) { boss->increaseDmgMod(0.2f); } } return true; }