Пример #1
0
static inline void atomic_add(int me, int iterations, int T)
{

    int i;

    if (me == 0)
        pre_op_check(__func__, target[T], iterations, 0);

    target[T] = 0;
    shmem_barrier_all();

    if (me == 1) {
        for (i = 0; i < iterations; i++) {
            shmem_int_add(&target[T], 1, 0);
            shmem_fence();
        }
        shmem_int_add(&target[T], 1, 0);

        if (debug)
            printf("PE 1 done with operation\n");

    } else
        wait_until(&target[T], (iterations+1), 0);

    if (verbose) {
        if (me == 1)
            printf("SHMEM %s finished\n", __func__);
    }
}
double
benchmark_add (struct pe_vars v, union data_types *buffer,
                unsigned long iterations)
{
    int64_t begin, end; 
    int i;
    static double rate = 0, sum_rate = 0, lat = 0, sum_lat = 0;    

    /*
     * Touch memory
     */
    memset(buffer, CHAR_MAX * drand48(), sizeof(union data_types
                [ITERATIONS]));

    shmem_barrier_all();

    if (v.me < v.pairs) {
        int value = INT_MAX * drand48();

        begin = TIME();
        for (i = 0; i < iterations; i++) {
            shmem_int_add(&(buffer[i].int_type), value, v.nxtpe);
        }
        end = TIME();

        rate = ((double)iterations * 1e6) / (end - begin);
        lat = (end - begin) / (double)iterations;
    }

    shmem_double_sum_to_all(&sum_rate, &rate, 1, 0, 0, v.npes, pwrk1, psync1);
    shmem_double_sum_to_all(&sum_lat, &lat, 1, 0, 0, v.npes, pwrk2, psync2);
    print_operation_rate(v.me, "shmem_int_add", sum_rate/1e6, sum_lat/v.pairs);

    return 0;
}
Пример #3
0
int
main (int argc, char *argv[])
{
  int me;

  start_pes (0);
  me = _my_pe ();

  if (me > 0)
    {
      shmem_int_add (&counter, me, 0);
    }

  shmem_barrier_all ();

  if (me == 0)
    {
      printf ("counter = %d\n", counter);
    }

  return 0;
}
Пример #4
0
void FORTRANIFY (shmem_int4_add) (int *target, int *value, int *pe)
{
    shmem_int_add (target, *value, *pe);
}
Пример #5
0
void communicateSingleAtomData(LSMSCommunication &comm, int from, int to, int &local_id, AtomData &atom, int tag)
{
  //The buffers used in this func are pre-allocated within initializeCommunication() of size 's' below 
  //int s=sizeof(AtomData)+sizeof(Real)*(2*3*MAXPTS+2*MAXCORE)+sizeof(int)*3*2*MAXCORE+sizeof(int);
  // 304 bytes transferred in each of the ITER_MAX iterations
  const int maxPts=MAXPTS;
  const int maxCore=MAXCORE;
  int t,i;
  static int count=0;
  const int ITER_MAX=1;
  int sec_id;

  if(comm.comm.rank==from)
  {

   for (i=0;i<ITER_MAX;i++){
    int pos=0;

    memcpy(&p2p_buf[pos],&local_id,int_size); pos+=int_size;
    memcpy(&p2p_buf[pos],&atom.jmt,int_size); pos+=int_size;
    memcpy(&p2p_buf[pos],&atom.jws,int_size); pos+=int_size;
    memcpy(&p2p_buf[pos],&atom.xstart,double_size); pos+=double_size;
    memcpy(&p2p_buf[pos],&atom.rmt,double_size); pos+=double_size;
    memcpy(&p2p_buf[pos],atom.header,80*char_size); pos+=80*char_size;
    memcpy(&p2p_buf[pos],&atom.alat,double_size); pos+=double_size;
    memcpy(&p2p_buf[pos],&atom.efermi,double_size); pos+=double_size;
    memcpy(&p2p_buf[pos],&atom.vdif,double_size); pos+=double_size;
    memcpy(&p2p_buf[pos],&atom.ztotss,double_size); pos+=double_size;
    memcpy(&p2p_buf[pos],&atom.zcorss,double_size); pos+=double_size;
    memcpy(&p2p_buf[pos],atom.evec,3*double_size); pos+=3*double_size;
    memcpy(&p2p_buf[pos],&atom.nspin,int_size); pos+=int_size;
    memcpy(&p2p_buf[pos],&atom.numc,int_size); pos+=int_size;

    t=atom.vr.n_row();

    memcpy(&p2p_buf[pos],&t,int_size); pos+=int_size;
    memcpy(&p2p_buf[pos],&atom.vr(0,0),2*t*double_size); pos+=2*t*double_size;
    memcpy(&p2p_buf[pos],&atom.rhotot(0,0),2*t*double_size); pos+=2*t*double_size;
    memcpy(&p2p_buf[pos],&atom.corden(0,0),2*t*double_size); pos+=2*t*double_size;

    t=atom.ec.n_row();

    memcpy(&p2p_buf[pos],&t,int_size); pos+=int_size;
    memcpy(&p2p_buf[pos],&atom.ec(0,0),2*t*double_size); pos+=2*t*double_size;
    memcpy(&p2p_buf[pos],&atom.nc(0,0),2*t*int_size); pos+=2*t*int_size;
    memcpy(&p2p_buf[pos],&atom.lc(0,0),2*t*int_size); pos+=2*t*int_size;
    memcpy(&p2p_buf[pos],&atom.kc(0,0),2*t*int_size); pos+=2*t*int_size;

    shmem_int_wait_until((sync_send_flag+to),_SHMEM_CMP_EQ,1);
    shmem_putmem(p2p_buf, p2p_buf, 1048576, to);
    shmem_int_add((sync_send_flag+to),-1,comm.comm.rank);
    shmem_int_add((sync_recv_flag+comm.comm.rank),1,to);
    shmem_quiet();

   }// end of false for loop
    
  }
  if(comm.comm.rank==to)
  {
for(i=0;i<ITER_MAX;i++) {
    int pos=0;

    sync_recv_flag[from]=0;
    shmem_int_add((sync_send_flag+comm.comm.rank),1,from);
    shmem_quiet();
    shmem_int_wait_until((sync_recv_flag+from),_SHMEM_CMP_EQ,1);
  
    memcpy(&local_id,&p2p_buf[pos],int_size); pos+=int_size;
    memcpy(&atom.jmt,&p2p_buf[pos],int_size); pos+=int_size;
    memcpy(&atom.jws,&p2p_buf[pos],int_size); pos+=int_size;
    memcpy(&atom.xstart,&p2p_buf[pos],double_size); pos+=double_size;
    memcpy(&atom.rmt,&p2p_buf[pos],double_size); pos+=double_size;
    memcpy(atom.header,&p2p_buf[pos],80*char_size); pos+=80*char_size;
    memcpy(&atom.alat,&p2p_buf[pos],double_size); pos+=double_size;
    memcpy(&atom.efermi,&p2p_buf[pos],double_size); pos+=double_size;
    memcpy(&atom.vdif,&p2p_buf[pos],double_size); pos+=double_size;
    memcpy(&atom.ztotss,&p2p_buf[pos],double_size); pos+=double_size;
    memcpy(&atom.zcorss,&p2p_buf[pos],double_size); pos+=double_size;
    memcpy(atom.evec,&p2p_buf[pos],3*double_size); pos+=3*double_size;
    memcpy(&atom.nspin,&p2p_buf[pos],int_size); pos+=int_size;
    memcpy(&atom.numc,&p2p_buf[pos],int_size); pos+=int_size;

    memcpy(&t,&p2p_buf[pos],int_size); pos+=int_size;

    if(t!=atom.vr.n_row()) atom.resizePotential(t);

    memcpy(&atom.vr(0,0),&p2p_buf[pos],2*t*double_size); pos+=2*t*double_size;
    memcpy(&atom.rhotot(0,0),&p2p_buf[pos],2*t*double_size); pos+=2*t*double_size;
    memcpy(&atom.corden(0,0),&p2p_buf[pos],2*t*double_size); pos+=2*t*double_size;
    memcpy(&t,&p2p_buf[pos],int_size); pos+=int_size;

    if(t!=atom.nc.n_row()) atom.resizeCore(t);

    memcpy(&atom.ec(0,0),&p2p_buf[pos],2*t*double_size); pos+=2*t*double_size;
    memcpy(&atom.nc(0,0),&p2p_buf[pos],2*t*int_size); pos+=2*t*int_size;
    memcpy(&atom.lc(0,0),&p2p_buf[pos],2*t*int_size); pos+=2*t*int_size;
    memcpy(&atom.kc(0,0),&p2p_buf[pos],2*t*int_size); pos+=2*t*int_size;
    shmem_int_add((sync_recv_flag+from),-1,comm.comm.rank);
    shmem_quiet();    
   }
  }

 
}