Esempio n. 1
0
void wfm::recon(Float *chi, 
		Float *u, 
		int cb,
		int dag)
{
  Float *gauge_notpar;
  /*Gauge args*/

  int lcb = (cb + base_parity) & 1;
  nthread=1;

  if ( cb == 0 ) { 
    gauge_notpar = (Float *)u + GAUGE_SIZE*vol;
  } else { 
    gauge_notpar = (Float *)u ;
  }
  cache_touch(gauge_notpar);

  cb = lcb;

  int svol = vol/nthread;
  if ( (svol * nthread) != vol ) { 
    if ( isBoss() ) printf("Bagel threading model broke\n");
    exit(-1);
  }

  recon_internal_arg *args = new recon_internal_arg[nthread];

  args[0].cb     = cb;
  args[0].dag     = dag;
  args[0].vol    = svol;
  args[0].bgl    = WFM_BGL;
  args[0].sloppy = SloppyPrecision;

  for(int i=0;i<nthread;i++){
    args[i]     = args[0];
    args[i].chi = chi               + svol*i*SPINOR_SIZE;
    args[i].u   = gauge_notpar      + svol*i*GAUGE_SIZE;
    args[i].two_spinor = two_spinor + (svol*i*8*PAD_HALF_SPINOR_SIZE*TwoSpinSize())/sizeof(Float);
  }

  for ( int i=1;i<nthread;i++) {
    thread_create(recon_internal,(void *)&args[i],i);
  }

  recon_internal(&args[0]);

  for ( int i=1;i<nthread;i++) {
    thread_join(i);
  }

  delete [] args;
  CPS_NAMESPACE::DiracOp::CGflops += 1320*vol;

  return;
}
Esempio n. 2
0
void wfm::decom(Float *psi, 
		Float *u, 
		int cb,
		int dag)
{

  int lcb = (cb + base_parity) & 1;
  Float *gauge_par;
  /*Gauge args*/
  if ( cb == 1 ) { 
    gauge_par = (Float *)u + GAUGE_SIZE*vol;
  } else { 
    gauge_par = (Float *)u ;
  }

  nthread=1;
  /*Turn global checkerboard into the checkerboard within this node*/
  cb = lcb;

  int svol = vol/nthread;
  if ( (svol * nthread) != vol ) { 
    if ( isBoss() ) printf("Bagel threading model broke\n");
    exit(-1);
  }

  decom_internal_arg *args = new decom_internal_arg[nthread];

  args[0].cb = cb;
  args[0].dag = dag;
  args[0].vol = svol;
  args[0].bgl = WFM_BGL;
  args[0].sloppy = SloppyPrecision;

  for(int i=0;i<nthread;i++){
    args[i] = args[0];
    args[i].psi = psi         + svol*i*SPINOR_SIZE;
    args[i].u   = gauge_par   + svol*i*GAUGE_SIZE;
    args[i].shift_table = &shift_table[cb][i*svol*8];
  }

  for ( int i=1;i<nthread;i++) {
    thread_create(decom_internal,(void *)&args[i],i);
  }

  decom_internal(&args[0]);

  for ( int i=1;i<nthread;i++) {
    thread_join(i);
  }

//  printf("nthread=%d\n",nthread);
  delete [] args;
}
Esempio n. 3
0
void wfm::init(WilsonArg *wilson_p)  /* pointer to Wilson type structure    */
{
  int spinor_words;             /* size of the spinor field on the         */
				/* sublattice checkerboard                 */

  int half_spinor_words;        /* size of the spin-projected "half_spinors*/
                                /* on the sublattice checkerboard including*/
                                /* the communications padding              */

  int slx;                          /* x-direction size of node sublattice */
  int sly;                          /* y-direction size of node sublattice */
  int slz;                          /* z-direction size of node sublattice */
  int slt;                          /* t-direction size of node sublattice */
  int i;
  int mu;

  SloppyPrecision = wilson_p->SloppyPrecision;
  WFM_BGL             = wilson_p->WFM_BGL;


//  if ( isBoss() ) printf("wfm::init setting up BG/L MMU state\n");
  mmu_optimise();
  mmu_print();

//  CoreCount( wilson_p->CoreCount );
  CoreCount( 1 );

  if ( WFM_BGL ) PAD_HALF_SPINOR_SIZE = 12;
  else  PAD_HALF_SPINOR_SIZE = 16;

  if ( WFM_BGL && (nthread > 1) && SloppyPrecision ) { 
    if ( isBoss() ) printf("Bagel does not maintain L1 coherence in dual core + single precision mode on BlueGene\n");
    if ( isBoss() ) printf("Get on to IBM to give me access to SWOA MMU options, or even better a non-cache image of DRAM\n");
    if ( isBoss() ) printf("If they give me the tools, I'm happy to do the heroics of mainting sfw coherence\n");
    if ( isBoss() ) printf("Bagel insanity check exiting\n");
    exit(-1);
  }

  IR = wilson_p->instruction_reg_num;
/*--------------------------------------------------------------------------*/
/* Set sublattice direction sizes                                           */
/*--------------------------------------------------------------------------*/
  local_latt[0] =  wilson_p->local_latt[0];
  local_latt[1] =  wilson_p->local_latt[1];
  local_latt[2] =  wilson_p->local_latt[2];
  local_latt[3] =  wilson_p->local_latt[3];
  slx = local_latt[0];
  sly = local_latt[1];
  slz = local_latt[2];
  slt = local_latt[3];

#if (defined USE_COMMS_QMP) && (!defined UNIFORM_SEED_NO_COMMS)
  QMP_bool_t qmp_inited=QMP_is_initialized();
  if( !qmp_inited ) { 
	if ( isBoss() ) printf("QMP_not_initialized\n");
        exit(-1);
  }
  const int *ncoor = QMP_get_logical_coordinates();
  base_parity =(ncoor[0]*local_latt[0] 
              + ncoor[1]*local_latt[1]
              + ncoor[2]*local_latt[2]
              + ncoor[3]*local_latt[3])&0x1;

#else
  base_parity = 0;
#endif


/*--------------------------------------------------------------------------*/
/* Set periodic wrap back or not                                            */
/*--------------------------------------------------------------------------*/
  local_comm[0] = wilson_p->local_comm[0];
  local_comm[1] = wilson_p->local_comm[1];
  local_comm[2] = wilson_p->local_comm[2];
  local_comm[3] = wilson_p->local_comm[3];
#ifdef  UNIFORM_SEED_NO_COMMS
  for(int i=0;i<4;i++)
    if(local_comm[0]!=1){
       fprintf(stderr,"wfm::local_comm[%d]=%d!\n",i,local_comm[i]);
       exit(-33);
    }
#endif



/*-----------------------------------------------------------------------*/
/* compute the subgrd volume of each chkbd ... at least two local dims   */
/* must be even for this code to be correct.                             */
/*-----------------------------------------------------------------------*/
  vol = (slx * sly * slz * slt)/2;
  
  nbound[0] = (sly * slz * slt)/2; 
  nbound[1] = (slx * slz * slt)/2;
  nbound[2] = (slx * sly * slt)/2;
  nbound[3] = (slx * sly * slz)/2;


  allbound  = nbound[0]
            + nbound[1]
            + nbound[2]
            + nbound[3];

  if ( nbound[0] * slx * 2 != (slx*sly*slz*slt) ) {
    if ( isBoss() ) printf("wfm::init Even x logic bomb\n");
    exit(-1);
  }
  if ( nbound[1] * sly * 2 != (slx*sly*slz*slt) ) {
    if ( isBoss() ) printf("wfm::init Even y logic bomb\n");
    exit(-1);
  }
  if ( nbound[2] * slz * 2 != (slx*sly*slz*slt) ) {
    if ( isBoss() ) printf("wfm::init Even z logic bomb\n");
    exit(-1);
  }
  if ( nbound[3] * slt * 2 != (slx*sly*slz*slt) ) {
    if ( isBoss() ) printf("wfm::init Even t logic bomb\n");
    exit(-1);
  }

  /*------------------------------------------------------------------------*/
  /* Check shape                                                            */
  /*------------------------------------------------------------------------*/
  if ( (slx&1)  ) {
    if ( isBoss() ) printf("Bagel is refusing to run as x-sub latt is odd\n");
    exit(-1);
  }
  if ( (sly&1) &&(slz&1)&&(slt&1)  ) {
    if ( isBoss() ) printf("Bagel is refusing to run as y,z,t sub latts are all odd\n");
    exit(-1);
  }


/*--------------------------------------------------------------------------*/
/* Reserve memory for 1  temporary spinor (needed by mdagm)                 */
/*--------------------------------------------------------------------------*/
  spinor_words = SPINOR_SIZE * vol;

  spinor_tmp = (Float *)ALLOC(spinor_words*sizeof(Float)*2);
//printf("wfm_init::spinor_tmp=%p\n",spinor_tmp);
//  VRB.Flow(cname,fname,"spinor_tmp=%p\n",spinor_tmp);
#ifdef USE_QALLOC
  // If we used QALLOC, and the ALLOC macro failed we can try 
  // qalloc but without the QFAST flag. Even tho the spinor_tmp is
  // not communicated we leave the QCOMMS bit on in case it puts 
  // spinor tmp into a better place in the memory map
  if(spinor_tmp == 0) {
     if ( isBoss() ) printf("BAGEL: Warning spinor_tmp has spilled out of Edram\n");
     spinor_tmp = (Float *) qalloc(QCOMMS,spinor_words*sizeof(Float)*2); 
  }
#endif  // USE QALLOC

  if(spinor_tmp == 0){
    if ( isBoss() ) printf("wfm::spinor_tmp allocate\n");
    exit(-1);
  }

//~~
//~~ twisted mass fermions:  sets WilsonArg.spinor_tmp tp 
//~~ address of temporary spinor in wfm class
//~~    
  wilson_p->spinor_tmp = spinor_tmp;
//~~
/*--------------------------------------------------------------------------*/
/* Reserve memory for the 4 forward and 4 backward spin projected half      */ 
/* spinors.                                                                 */
/*--------------------------------------------------------------------------*/


  /*PAB 10/1/2001 */
  half_spinor_words = NMinusPlus * ND * PAD_HALF_SPINOR_SIZE * vol;

#ifndef USE_COMMS_QMP  
  two_spinor = (Float *)ALLOC(half_spinor_words*sizeof(Float));

#ifdef USE_QALLOC
  // If we are using QALLOC and the ALLOC macro failed we can still 
  // try to get slow memory. Leave on the QCOMMS bit for good memory map
  // placement
  if(two_spinor == 0) {
     if ( isBoss() ) printf("BAGEL : warning two spinors have spilled out of Edram\n");
     two_spinor = (Float *)qalloc(QCOMMS,half_spinor_words*sizeof(Float));
  }
#endif // USE_QALLOC

  if(two_spinor == 0){
    if ( isBoss() ) printf("wfm::two_spinor allocate\n");
    exit(-1);
  }

#else

  // Since two spinor is now communicated because of the Tface 
  // receive I have to allocate it in the style of QMP
  two_spinor_mem_t = QMP_allocate_aligned_memory(
                                             half_spinor_words*sizeof(Float),
	                                     WFM_ALIGN_ARG,
					     (QMP_MEM_COMMS|QMP_MEM_FAST));

  if( two_spinor_mem_t == 0x0 ) { 
    // Try slow allocation
    two_spinor_mem_t = QMP_allocate_aligned_memory(
	                                      half_spinor_words*sizeof(Float),
					      WFM_ALIGN_ARG,
                                              QMP_MEM_COMMS);

    if( two_spinor_mem_t == 0x0 ) { 
      if ( isBoss() ) printf("wfm_init::could not allocate two spinor_mem_t\n");
      exit(-1);
    }
  }
  two_spinor = (Float *)QMP_get_memory_pointer(two_spinor_mem_t);
  if (two_spinor == 0x0) { 
    if ( isBoss() ) printf("wfm::init QMP_get_memory_pointer returned NULL pointer from non NULL QMP_mem_t\n");
    exit(-1);
  } 
#endif

 /*--------------------------------------------------------------------------*/
 /* Reserve memory for the 4 forward and 4 backward spin projected half      */
 /* spinors.                                                                 */
 /*--------------------------------------------------------------------------*/
  for ( int pm = 0;pm<2;pm++ ) {
    for ( mu = 0 ; mu < 4 ; mu++)
    if (local_comm[mu]==0) {

      half_spinor_words = PAD_HALF_SPINOR_SIZE * nbound[mu];

      // These things are (potentially) communicated so need QMP Style 
      // allocation if using QMP
      //
      // Note: I am allocating the buffers in all directions regardless
      // of whether we are communicating in that dir or not (Copying CPS)
#ifndef USE_COMMS_QMP

      // Not using QMP
      recv_bufs[pm][mu] = (Float *)ALLOC(half_spinor_words*sizeof(Float));
#ifdef  USE_QALLOC

      // If ALLOC fails try slow memory but with QCOMMS bit still set
      if( recv_bufs[pm][mu] == 0x0 ) 
	  recv_bufs[pm][mu] = (Float *)qalloc(QCOMMS, half_spinor_words*sizeof(Float));
#endif
      if(recv_bufs[pm][mu] == 0){
	  if ( isBoss() ) printf("wfm::recv_bufs allocate\n");
	  exit(-1);
      }

      send_bufs[pm][mu]=(Float *)SEND_ALLOC(half_spinor_words*sizeof(Float));
#ifdef USE_QALLOC

      // If SEND ALLOC macro fails try slow memory but with QNONCACHE bit
      // still set
      if( send_bufs[pm][mu] == 0 ) 
        send_bufs[pm][mu]=(Float *)qalloc(QNONCACHE, half_spinor_words*sizeof(Float));
#endif

      if(send_bufs[pm][mu] == 0){
        if ( isBoss() ) printf("wfm::send_bufs allocate\n");
        exit(-1);
      }
#else
      /* QMP memory allocation: A little involved */
      /* Must allocate "opaque" QMP_mem_t first and then get 
         aligned pointer out of it. It's either what is below or a 
         very complicated send alloc */

      /* Peter in the CPS allocs recv_bufs with ALLOC = QCOMMS|FAST */
      recv_bufs_mem_t[pm][mu] = QMP_allocate_aligned_memory(half_spinor_words*sizeof(Float),
	                                                      WFM_ALIGN_ARG,
							      (QMP_MEM_COMMS|QMP_MEM_FAST));
      if( recv_bufs_mem_t[pm][mu] == 0x0 ) {
        // If QMP_allocate memory fails with FAST, try SLOW but keep COMMS
        recv_bufs_mem_t[pm][mu] = QMP_allocate_aligned_memory(half_spinor_words*sizeof(Float),
								 WFM_ALIGN_ARG,
								 QMP_MEM_COMMS);
        if( recv_bufs_mem_t[pm][mu] == 0x0 ) { 
	  if ( isBoss() ) printf("wfm::init recv_bufs_mem_t[%d][%d]: QMP_allocate_aligned_memory returned NULL\n", pm, mu);
	  exit(-1);
        }
      }
	
      /* Now get the aligned pointer */
      recv_bufs[pm][mu] =(Float *)QMP_get_memory_pointer(recv_bufs_mem_t[pm][mu]);
	
      if( recv_bufs[pm][mu] == 0x0 ) { 
        if ( isBoss() ) printf("wfm::init recv_bufs[%d][%d]: NULL aligned pointer in non NULL QMP_mem_t struct \n", pm, mu);
	exit(-1);
      }

      /* Now do the same for the send bufs */
      /* In CPS Peter allocates as SEND_ALLOC = QNONCACHE | QFAST */
     send_bufs_mem_t[pm][mu] = QMP_allocate_aligned_memory(half_spinor_words*sizeof(Float),
                                                             WFM_ALIGN_ARG,
                                                             (QMP_MEM_NONCACHE|QMP_MEM_FAST));
     if( send_bufs_mem_t[pm][mu] == 0x0 ) {
       // if allocator fails, try slow but still NONCACHE
       send_bufs_mem_t[pm][mu] = QMP_allocate_aligned_memory(half_spinor_words*sizeof(Float),
                                                               WFM_ALIGN_ARG,
                                                               QMP_MEM_NONCACHE);
       if( send_bufs_mem_t[pm][mu] == 0x0 ) {
         if ( isBoss() ) printf("wfm::init: send_bufs_mem_t[%d][%d]: QMP_allocate_aligned_memory returned NULL\n", pm, mu);
         exit(-1);
       }
     }
            
     /* Now get the aligned pointer */
     send_bufs[pm][mu] =(Float *)QMP_get_memory_pointer(send_bufs_mem_t[pm][mu]);
     if( send_bufs[pm][mu] == 0x0 ) {
       if ( isBoss() ) printf("wfm::init send_bufs[%d][%d]: NULL aligned pointer in non NULL QMP_mem_t struct \n", pm, mu);
	exit(-1);
      }

#endif	

    }
  }



/*----------------------------------------------------------------------*/
/* Build the pointer table                                              */
/*----------------------------------------------------------------------*/
  pointers_init();
  
/*----------------------------------------------------------------------*/
/* Initialise the comms                                                 */
/*----------------------------------------------------------------------*/

  comm_init();

}
Esempio n. 4
0
void wfm::pointers_init(void)
{
    int mu;
    int cb, pm;
    int shift;
    int x,y,z,t;
    int lx,ly,lz,lt;

    int SizeofTwoSpin;

    int bound_index[Ncb][NMinusPlus][ND];
    int local_p,shift_p;
    int local_addr[ND];
    int shift_addr[ND];

    int psite,shift_psite;
    int offset,tab,table_size;
    void *bit_bucket;

    lx = local_latt[0];
    ly = local_latt[1];
    lz = local_latt[2];
    lt = local_latt[3];

    bit_bucket = ALLOC(32);


#ifndef UNIFORM_SEED_TESTING
    if ( SloppyPrecision ) {
        if ( isBoss() ) printf("Configuring for MIXED precision kernels: word sizes %d,%d\n",sizeof(Float),sizeof(float));
    } else {
        if ( isBoss() ) printf("Configuring for uniform precision : word size %d\n",sizeof(Float));
    }
    if ( WFM_BGL ) {
        if ( isBoss() ) printf("Configuring for BG/L kernels\n");
    } else {
        if ( isBoss() ) printf("Configuring for QCDOC kernels\n");
    }
#endif
    if ( SloppyPrecision ) {
        SizeofTwoSpin = sizeof(float);
    } else {
        SizeofTwoSpin = sizeof(Float);
    }
#ifndef UNIFORM_SEED_TESTING
    if ( isBoss() ) printf("Padded 2-spinor size is %d words, %d bytes\n",PAD_HALF_SPINOR_SIZE,
                               PAD_HALF_SPINOR_SIZE*SizeofTwoSpin);
#endif

    for ( cb = 0 ; cb<2 ; cb++) {
        //ND * plus_minus * hvol * parities
        table_size = NMinusPlus*(vol+1)*Nmu;
        shift_table[cb]=(unsigned long *)ALLOC( table_size*sizeof(unsigned long));
#ifdef USE_QALLOC
        // If using qalloc allocator and FAST alloc fails, try slow alloc
        if( shift_table[cb] == 0x0 )
            shift_table[cb]=(unsigned long *)qalloc(QCOMMS,table_size*sizeof(unsigned long));
#endif
        if( shift_table[cb] == 0x0 ) {
            if ( isBoss() ) printf("wfm::pointers_init: shift_tables[%d] alloc failed\n",cb);
            exit(-1);
        }
    }

    for(cb=0; cb<2; cb++) {
        for(pm=0; pm<2; pm++) {
            for(mu=0; mu<4; mu++) {

                face_table[cb][pm][mu]= (unsigned long *)
                                        ALLOC(allbound*sizeof(unsigned long));

#ifdef USE_QALLOC
                // IF using qalloc allocator and FAST alloc fails try slow
                // ALLOC automatically sets the QCOMM bit. We are not
                // going to communicate this, but it may help in setting it
                // in a better place in the memory map if we also set it

                if( face_table[cb][pm][mu] == 0x0 )
                    face_table[cb][pm][mu] = (unsigned long *)
                                             qalloc(QCOMMS,table_size*sizeof(unsigned long));
#endif
                // Final Falure
                if( face_table[cb][pm][mu] == 0x0 ) {
                    if ( isBoss() ) printf("wfm::pointers_init: shift_tables[%d] alloc failed\n",cb);
                    exit(-1);
                }
            }
        }
    }

    for( mu = 0 ; mu < Nmu ; mu++ ) {
        bound_index[0][0][mu] = 0;
        bound_index[0][1][mu] = 0;
        bound_index[1][0][mu] = 0;
        bound_index[1][1][mu] = 0;
    }

    /*
     * For now point send buffers beyond end of body.
     * wfm::scale_ptr will remap this to the real send buffer, once
     * it gets assigned with a pointer value.
     */

    /* This bit points to the first element after the end of the
       body for send_offset[Minus][0] */
    send_offset[Minus][0]  = 8* vol; /*We interleave the 8 2spinors*/

    /* Now we do the rest of the send offsets packing densely.
       nbound[mu-1] sites after the previous */
    send_offset[Minus][1]  = send_offset[Minus][0] + nbound[0];
    send_offset[Minus][2]  = send_offset[Minus][1] + nbound[1];
    send_offset[Minus][3]  = send_offset[Minus][2] + nbound[2];

    /* Now the PLUS directions. They are the same as the minus directions
       pushed by allbound */
    send_offset[Plus][ 0 ] = send_offset[Minus][0] + allbound;
    send_offset[Plus][ 1 ] = send_offset[Minus][1] + allbound;
    send_offset[Plus][ 2 ] = send_offset[Minus][2] + allbound;
    send_offset[Plus][ 3 ] = send_offset[Minus][3] + allbound;


    /* Now work out the shifts */
    for ( pm = 0; pm<2; pm++ ) {

        /* Shift direction */
        if ( pm == Plus ) { /*forwards or backwards*/
            shift = 1;
        } else {
            shift = -1;
        }

        /*Loop naively over local lattice*/
        for ( t=0 ; t<lt ; t++ ) {
            local_addr[3] = t;
            for ( z=0 ; z<lz ; z++ ) {
                local_addr[2] = z;
                for ( y=0 ; y<ly ; y++ ) {
                    local_addr[1] = y;
                    for ( x=0 ; x<lx ; x++ ) {
                        local_addr[0] = x;

                        /* local_addr contains the coordinates of the point */

                        /* get the parity of the point */
                        local_p = wfm::local_parity(x,y,z,t);

                        /* get the site of the point for that parity */
                        psite   = wfm::local_psite(local_addr,local_latt);

                        for ( mu = 0 ; mu < 4; mu ++) {

                            /* Get the coordinates of the point shifted in direction
                               +/- mu. (+/- is encoded in shift, depending on where
                               we are in the +/- loop) */

                            shift_addr[0] = x;
                            shift_addr[1] = y;
                            shift_addr[2] = z;
                            shift_addr[3] = t;

                            shift_addr[mu] += shift;

                            /* Parity of shifted site is opposite that of the unshifted one */
                            shift_p = 1-local_p ;

                            /*
                            * Offset to the source site in the 2 spinor array is common
                            */

                            /* Get the interleaved site address of the local point
                               (from which we are shifting */
                            tab = wfm::interleave_site(pm,mu,psite);

                            /*
                            * Get the offset to the destination site in the 2 spinor array
                            * If destination is in interior, trivial.
                            * Also implement periodic wrap if local_comm[mu].
                            */


                            if (((shift_addr[mu] >= 0 ) && (shift_addr[mu]<local_latt[mu]) )
                                    || local_comm[mu] ) {

                                /*Local periodicity, does nothing if interior*/
                                shift_addr[mu]= (shift_addr[mu] + local_latt[mu])%local_latt[mu];
                                shift_psite   = local_psite(shift_addr,local_latt);
                                offset = interleave_site(pm,mu,shift_psite) ;
                                shift_table[local_p][tab] = ((unsigned long)two_spinor +
                                                             offset * PAD_HALF_SPINOR_SIZE * SizeofTwoSpin);

                            } else { /*non-local and we're on the boundary*/


                                /*Local periodicity, does nothing if interior*/
                                shift_addr[mu]= (shift_addr[mu] + local_latt[mu])%local_latt[mu];
                                shift_psite   = local_psite(shift_addr,local_latt);
                                offset = interleave_site(pm,mu,shift_psite) ;
                                /*
                                 * The minus face receives something written by the
                                 * plus face. Thus the face table should contain the pointer
                                 * to the site we would have sent to in the above case.
                                 */
                                face_table[local_p][pm][mu][bound_index[local_p][pm][mu]]
                                    = offset;

                                /*
                                 * And we use face_table[local_p][pm][mu] in conjunction
                                 * with the send buffer[pm][mu] and source parity local_p
                                 */
                                offset = bound_index[local_p][pm][mu];

                                shift_table[local_p][tab] = (unsigned long) send_bufs[pm][mu]
                                                            + offset * PAD_HALF_SPINOR_SIZE * SizeofTwoSpin;

                                bound_index[local_p][pm][mu] ++;
                            }



                        } /*mu*/
                    }/*x*/
                }/*y*/
            }/*z*/
        }/*t*/
    }/*pm*/

    int site = (lx*ly*lz*lt)/2 ;
    for(pm=0; pm<2; pm++) {
        for(mu=0; mu<3; mu++) {
            tab = interleave_site(pm,mu,site);
            shift_table[0][tab] = (unsigned long) bit_bucket;
            shift_table[1][tab] = (unsigned long) bit_bucket;
        }
    }

    //  int bound_index[Ncb][NMinusPlus][ND];
    for( cb = 0 ; cb < 2 ; cb++ ) {
        for( pm = 0 ; pm < 2 ; pm++ ) {
            for( mu = 0 ; mu < Nmu ; mu++ ) {
                if ( !local_comm[mu] ) {
                    if ( bound_index[cb][pm][mu] != nbound[mu] ) {
                        printf("Boundary size mismatch[%d][%d][mu=%d] : %d != %d \b",
                               cb,pm,mu,bound_index[cb][local_p][mu],nbound[mu]);
                        exit(-1);
                    }
                } else {
                    if ( bound_index[cb][pm][mu] != 0 ) {
                        printf("Boundary size mismatch[%d][%d] mu=%d\b",
                               cb,pm,mu);
                        exit(-1);
                    }
                }
            }
        }
    }
//  scale_ptr();
    return;
}
Esempio n. 5
0
void wfm::dslash(Float *chi, 
		 Float *u, 
		 Float *psi, 
		 int cb,
		 int dag)
{

  /*
   * To a first approximation, we simply
   * remap the arguments into a form acceptable
   * to the assembler, then call it
   */
  /*
   *Pull in the first Psi to cache early
   */
  cache_touch(psi);
  cache_touch(psi+4);
  cache_touch(psi+8);
  cache_touch(psi+12);
  cache_touch(psi+16);
  cache_touch(psi+20);
  decom(psi,u,cb,dag);

#ifdef DEBUG_BENCHMARK_COMMS
  double ndata = 2*2*allbound * 12 * TwoSpinSize() * 1.0E-6 * 100;
  struct timeval start,stop,delta;
  gettimeofday(&start,NULL);

  for(int i=0;i<100;i++) {
#endif
  comm_start(cb); 
  /*
   * Hackers: you could split here and do something else...
   * Such as DWF fith dimension, or a clover term etc...
   * Might as well pull in a few sites worth of pointer table 
   * while we're waiting for the comms
   */
  comm_complete(cb);
#ifdef DEBUG_BENCHMARK_COMMS
  }
  gettimeofday(&stop,NULL);
  timersub(&stop,&start,&delta);
  double seconds = delta.tv_usec * 1.0E-6 + delta.tv_sec;
  if ( isBoss() ) printf("Comms %le MB in %le seconds = %le MB/s\n",ndata,seconds,ndata/seconds);
  ndata = 2*2*allbound * 12 * TwoSpinSize() ;
  if ( isBoss() ) printf("ndata = %d \n",ndata);
#endif
#ifdef DEBUG_OUTPUT_VECTORS 
  static int file_cnt;
  {
    char buf[256];
  sprintf(buf,"2spin.%d.%d",UniqueID(),file_cnt);
  FILE *fp = fopen(buf,"w");
  for(int i=0;i<vol;i++) {
    for(int pm=0;pm<2;pm++){
      for(int mu=0;mu<4;mu++){
        int offset = interleave_site(pm,mu,i);
        for(int s=0;s<2;s++){
        for(int c=0;c<3;c++){
        for(int r=0;r<2;r++){
	  int scri;
          if ( WFM_BGL ) scri = r + s*6+c*2;        
	  else scri = r + s*2+c*4;        
          int gbl[4];
          site_to_global(cb, i, gbl, local_latt );
          if ( SloppyPrecision ) {
            float * pointer = (float *) two_spinor;
            fprintf(fp,"%d %d %d %d %d %d %d %d %d %e\n",gbl[0],gbl[1],gbl[2],gbl[3],
              pm,mu,s,c,r,pointer[PAD_HALF_SPINOR_SIZE*offset+scri]);
          } else { 
            Float * pointer = (Float *) two_spinor;
            fprintf(fp,"%d %d %d %d %d %d %d %d %d %e\n",gbl[0],gbl[1],gbl[2],gbl[3],
                    pm,mu,s,c,r,pointer[PAD_HALF_SPINOR_SIZE*offset+scri]);
          }
        }}}
      }
    }
  }
  fclose(fp);}
#endif 

  cache_touch(two_spinor);
  cache_touch(two_spinor+4);
  cache_touch(two_spinor+8); 
  recon(chi,u,cb,dag);
#ifdef DEBUG_OUTPUT_VECTORS
  {
  char buf[256];
  sprintf(buf,"recon.%d.%d",UniqueID(),file_cnt++);
  FILE *fp = fopen(buf,"w");
  for(int i=0;i<vol;i++) {
    for(int pm=0;pm<2;pm++){
      for(int mu=0;mu<4;mu++){
        int offset = interleave_site(pm,mu,i);
        for(int s=0;s<4;s++){
        for(int c=0;c<3;c++){
        for(int r=0;r<2;r++){
	  int scri;
          scri = r + s*6+c*2;        
	  Float * pointer = (Float *) chi;
          int gbl[4];
          site_to_global(cb, i, gbl, local_latt );
	  fprintf(fp,"%d %d %d %d %d %d %d %d %d %d %e\n",gbl[0],gbl[1],gbl[2],gbl[3],
                   i,pm,mu,s,c,r,pointer[SPINOR_SIZE*offset+scri]);
        }}}
      }
    }
  }
  fclose(fp);}
  exit(0);
#endif 
  return;
}
Esempio n. 6
0
bool PlayerBullet::onImpact(Entity* hit)
{
	auto impact = GameGlobals::get()->impact;

	auto position = getComponent<sz::Transform>()->getPosition();

	auto color = getComponent<sz::Renderer>()->getColor();

	impact->setColor(color);
	if(bulletStance == PlayerEntity::Stance::Offensive)
		impact->setScale(0.6f, 1.f + (thor::random(0, 10) >= 9 ? 1.5f : 0.f));
	else
		impact->setScale(0.3f, 0.6f + (thor::random(0, 10) >= 9 ? 0.6f : 0.f));

	impact->setPosition(position);
	impact->emit(1);

	auto bits = GameGlobals::get()->impactbits;

	bits->setColor(color);
	bits->setScale(0.02f, 0.1f);
	bits->setPosition(position);

	for(int i=0; i < 50; ++i)
	{
		float v = thor::random(0.f, 250.f);

		bits->setVelocityCone(v, m_angle + (thor::random(0, 2) == 0 ? -PI : 0.f), thor::random(0.f, 50.f));
		bits->emit(1);
	}

	if(bulletStance == PlayerEntity::Stance::Offensive)
	{
		auto close = getComponent<sz::Physics>()->queryRadius(150.f);

		for(auto it = close.begin(); it != close.end(); ++it)
		{
			auto enemy = dynamic_cast<EnemyEntityBase*>(*it);
			if(!enemy || enemy == hit || enemy->isBoss()) continue;

			auto enemyPos = enemy->call(&sz::Transform::getPosition);
			float dist = sz::distance(position, enemyPos) / 22500.f;
			dist = std::min(1.f, dist);
			float falloff = (1.f - (dist * dist)) * 0.65f;

			float angle = sz::getAngle(position, enemyPos);

			enemy->applyDamage(DamageReport(m_shooter, m_damage * falloff, angle)); 
		}
	}

	auto enemy = dynamic_cast<EnemyEntityBase*>(hit);

	if(enemy)
	{
		auto pos = enemy->getTransform->getPosition();

		if(!enemy->isBoss())
		{
			impact->setColor(sf::Color::White);
			impact->setScale(0.3f, 0.4f + (thor::random(0, 10) >= 9 ? 0.3f : 0.f));
		}
		else
		{
			impact->setColor(color);
			impact->setScale(1.f, 1.4f + (thor::random(0, 10) >= 9 ? 0.4f : 0.f));
		}

		impact->setPosition(pos + sf::Vector2f(thor::random(-30.f, 30.f), thor::random(-30.f, 30.f)));
		impact->emit(1);

		impact->setPosition(pos + sf::Vector2f(thor::random(-30.f, 30.f), thor::random(-30.f, 30.f)));
		impact->emit(1);
	}

	{
		auto boss = dynamic_cast<EnemyBossRed*>(hit);
		if(boss)
		{
			boss->increaseDmgMod(0.2f);
		}
	}

	return true;
}