コード例 #1
0
ファイル: ddi_armci.c プロジェクト: ryanolson/ddi
void DDI_ARMCI_Memory_init(size_t size) {
  int code;
  const DDI_Comm *comm = (const DDI_Comm *) Comm_find(DDI_COMM_WORLD);
  
  // malloc ARMCI memory
  code = ARMCI_Malloc((void*)gv(armci_mem_addr),size);
  if (code > 0) {
    ARMCI_Error("ARMCI_Malloc failed",code);
    Fatal_error(911);
  }
  gv(dda_index) = (DDA_Index*)gv(armci_mem_addr)[comm->me];

  // malloc ARMCI counter block and set addresses
  code = ARMCI_Malloc((void*)gv(armci_cnt_addr),sizeof(armci_counter_t)*2);
  if (code > 0) {
    ARMCI_Error("ARMCI_Malloc failed",code);
    Fatal_error(911);
  }
  ARMCI_PutValueLong(0, (void*)(gv(armci_cnt_addr)[comm->me]+0), comm->me);
  ARMCI_PutValueLong(0, (void*)(gv(armci_cnt_addr)[comm->me]+1), comm->me);
  DDI_ARMCI_DLB_addr();
  DDI_ARMCI_GDLB_addr();
  
  // create mutexes
  code = ARMCI_Create_mutexes(MAX_DD_ARRAYS+1);
  if (code > 0) {
    ARMCI_Error("ARMCI_Create_mutexes failed",code);
    Fatal_error(911);
  }
  gv(dlb_access) = MAX_DD_ARRAYS;
}
コード例 #2
0
int main(int argc, char ** argv) {
  int    rank, nproc, i;
  int   *buf;

  MPI_Init(&argc, &argv);
  ARMCI_Init();

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);

  if (rank == 0) printf("Starting ARMCI GOP test with %d processes\n", nproc);

  buf = malloc(DATA_SZ*sizeof(int));

  if (rank == 0) printf(" - Testing ABSMIN\n");

  for (i = 0; i < DATA_SZ; i++)
    buf[i] = (rank+1) * ((i % 2) ? -1 : 1);

  armci_msg_igop(buf, DATA_SZ, "absmin");

  for (i = 0; i < DATA_SZ; i++)
    if (buf[i] != 1) {
      printf("Err: buf[%d] = %d expected 1\n", i, buf[i]);
      ARMCI_Error("Fail", 1);
    }

  if (rank == 0) printf(" - Testing ABSMAX\n");

  for (i = 0; i < DATA_SZ; i++)
    buf[i] = (rank+1) * ((i % 2) ? -1 : 1);

  armci_msg_igop(buf, DATA_SZ, "absmax");

  for (i = 0; i < DATA_SZ; i++)
    if (buf[i] != nproc) {
      printf("Err: buf[%d] = %d expected %d\n", i, buf[i], nproc);
      ARMCI_Error("Fail", 1);
    }

  free(buf);

  if (rank == 0) printf("Pass.\n");

  ARMCI_Finalize();
  MPI_Finalize();

  return 0;
}
コード例 #3
0
static void get_data(int n, int start, int end, double *vec_local,
                     double **vec) {
    int i, j, rc, bytes, offset;
    int proc_start, proc_end, idx_start, idx_end;

    proc_start = proc_end = -1;
    for(i=0; i<nproc; i++) {
        if(proc_start<0 && proc_row_list[i]>start) proc_start = i;
        if(proc_end<0 && proc_row_list[i]>end) proc_end = i;
    }
    if(proc_start<0 || proc_end<0) ARMCI_Error("Invalid Process Ids", -1);

    for(i=proc_start; i<=proc_end; i++) {
        if(i==proc_start) idx_start = start;
        else {
            if(i==0) idx_start=0;
            else idx_start = proc_row_list[i-1];
        }
        if(i==proc_end) idx_end = end;
        else idx_end = proc_row_list[i]-1;

        if(i!=prev_proc) {
            ++count;
            prev_proc = i;
            ARMCI_INIT_HANDLE(&gHandle[count]);
            ARMCI_SET_AGGREGATE_HANDLE(&gHandle[count]);
        }

        if(i==0) offset=0;
        else offset = proc_row_list[i-1];
        if(i==me) { /* local */
            for(j=idx_start; j<=idx_end; j++) vec_local[j] = vec[me][j-offset];
        }
        else {     /* remote */
            bytes = (idx_end-idx_start+1)*sizeof(double);
            vec_local[idx_start] = -1;
#if 0
            if((rc=ARMCI_Get(&vec[i][idx_start-offset], &vec_local[idx_start],
                             bytes, i)))
#else
            if((rc=ARMCI_NbGet(&vec[i][idx_start-offset], &vec_local[idx_start],
                               bytes, i, &gHandle[count])))
#endif
                ARMCI_Error("armci_nbget failed\n",rc);
        }
    }
}
コード例 #4
0
ファイル: perf_nb.c プロジェクト: dmlb2000/nwchem-cml
void verify_results(int op, int *elems) {
    int i, j;
      
    switch(op) {

    case PUT:
      if(!(me==0))
	for(j=0; j<elems[1]; j++) {
	  if( ARMCI_ABS(ddst[me][j]-j*1.001) > 0.1) {
	    ARMCI_Error("put failed...Invalid Value Obtained..1", 0);
	  }
	}
      MP_BARRIER();
      if(DEBUG) if(me==0) printf("  verifying put ..O.K.\n");
      break;
      
    case GET:
      if(me==0) {
	for(i=1; i<nproc; i++) {
	  for(j=0; j<elems[1]; j++) {
	    if( ARMCI_ABS(ddst[me][i*elems[1]+j]-j*1.001*(i+1)) > 0.1) 
	      ARMCI_Error("get failed...Invalid Value Obtained..1", 0);
	  }
	}
      }
      MP_BARRIER();
      if(DEBUG) if(me==0) printf("  verifying get ..O.K.\n\n");
      break;
      
    case ACC: 
      if(me==0)
	for(j=0; j<elems[1]; j++) {
	  /*printf("ddst[%d][%d] = %lf\n", me, j, ddst[me][j]);
	    fflush(stdout); */
	  if( ARMCI_ABS(ddst[me][j]-(double)nproc) > 0.1) {
	    ARMCI_Error("accumulate failed...Invalid Value Obtained..1", 0);
	  }
	}
      MP_BARRIER();
      if(DEBUG)if(me==0) printf("  verifying accumulate ..O.K.\n"); 
      break;
      
    default:
      ARMCI_Error("Invalid Operation", 0);
    }
    fflush(stdout);
}
コード例 #5
0
static void gather_solution_vector(double **svec) {
#if 0
    double y[COL];
    if((rc=ARMCI_Get(&vec[i][idx_start-offset], &vec_local[idx_start],
                     bytes, i)))
        ARMCI_Error("armci_nbget failed\n",rc);
#endif
}
コード例 #6
0
ファイル: test_groups.c プロジェクト: dmlb2000/nwchem-cml
void test_one_group(ARMCI_Group *group, int *pid_list) {
  int grp_me, grp_size;
  int i,j,src_proc,dst_proc;
  double *ddst_put[MAXPROC];
  double dsrc[ELEMS];
  int elems[2] = {MAXPROC,ELEMS};
  int value = -1, bytes, world_me;
  
  MP_MYID(&world_me);
  ARMCI_Group_rank(group, &grp_me);
  ARMCI_Group_size(group, &grp_size);
  if(grp_me==0) printf("GROUP SIZE = %d\n", grp_size);
  printf("%d:group rank = %d\n", me, grp_me);

  src_proc = 0; dst_proc = grp_size-1;
       
  bytes = ELEMS*sizeof(double);       
  ARMCI_Malloc_group((void **)ddst_put, bytes, group);
       
  for(i=0; i<ELEMS; i++) dsrc[i]=i*1.001*(grp_me+1); 
  for(i=0; i<ELEMS; i++) ddst_put[grp_me][i]=-1.0;
       
  armci_msg_group_barrier(group);
       
  if(grp_me==src_proc) {
    /* NOTE: make sure to specify absolute ids in ARMCI calls */
    ARMCI_Put(dsrc, &ddst_put[dst_proc][0], bytes,
	      ARMCI_Absolute_id(group,dst_proc));
  }
       
  armci_msg_group_barrier(group);
  /* NOTE: make sure to specify absolute ids in ARMCI calls */
  ARMCI_Fence(ARMCI_Absolute_id(group,dst_proc));
  sleep(1);
       
       
  /* Verify*/
  if(grp_me==dst_proc) {
    for(j=0; j<ELEMS; j++) {
      if(ARMCI_ABS(ddst_put[grp_me][j]-j*1.001*(src_proc+1)) > 0.1) {
	printf("\t%d: ddst_put[%d][%d] = %lf and expected value is %lf\n",
	       me, grp_me, j, ddst_put[grp_me][j], j*1.001*(src_proc+1));
	ARMCI_Error("groups: armci put failed...1", 0);
      }
    }
    printf("\n%d(%d): Test O.K. Verified\n", dst_proc, world_me);
  }
  armci_msg_group_barrier(group);
  ARMCI_Free_group(ddst_put[grp_me], group);
}
コード例 #7
0
ファイル: test_groups.c プロジェクト: dmlb2000/nwchem-cml
int main(int argc, char* argv[])
{

    MP_INIT(argc, argv);
    MP_PROCS(&nproc);
    MP_MYID(&me);

/*    printf("nproc = %d, me = %d\n", nproc, me);*/
    
    if( (nproc<MINPROC || nproc>MAXPROC) && me==0)
       ARMCI_Error("Test works for up to %d processors\n",MAXPROC);

    if(me==0){
       printf("ARMCI test program (%d processes)\n",nproc); 
       fflush(stdout);
       sleep(1);
    }
    
    ARMCI_Init();

    if(me==0){
      printf("\n Testing ARMCI Groups!\n\n");
      fflush(stdout);
    }

    test_groups();
    
    ARMCI_AllFence();
    MP_BARRIER();
    if(me==0){printf("\n Collective groups: Success!!\n"); fflush(stdout);}
    sleep(2);

#ifdef ARMCI_GROUP
    test_groups_noncollective();

    ARMCI_AllFence();
    MP_BARRIER();
    if(me==0){printf("\n Non-collective groups: Success!!\n"); fflush(stdout);}
    sleep(2);
#endif
	
    MP_BARRIER();
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}
コード例 #8
0
ファイル: testnotify.c プロジェクト: bcernohous/ga
void compare_patches(double eps, int ndim, double *patch1, int lo1[], int hi1[],
                     int dims1[],double *patch2, int lo2[], int hi2[], 
                     int dims2[])
                               
{
int i,j, elems=1;    
int subscr1[MAXDIMS], subscr2[MAXDIMS];
double diff,max;

    for(i=0;i<ndim;i++){
    int diff = hi1[i]-lo1[i];
      assert(diff == (hi2[i]-lo2[i]));
      assert(diff < dims1[i]);
      assert(diff < dims2[i]);
      elems *= diff+1;
      subscr1[i]= lo1[i];
      subscr2[i]=lo2[i];
    }
    for(j=0; j< elems; j++){ 
    int idx1=0, idx2=0, offset1=0, offset2=0;
      idx1 = Index(ndim, subscr1, dims1);
      idx2 = Index(ndim, subscr2, dims2);
      if(j==0){
        offset1 =idx1;
    offset2 =idx2;
      }
      idx1 -= offset1;
      idx2 -= offset2;
      diff = patch1[idx1] - patch2[idx2];
      max  = ARMCI_MAX(ARMCI_ABS(patch1[idx1]),ARMCI_ABS(patch2[idx2]));
      if(max == 0. || max <eps) max = 1.; 
      if(eps < ARMCI_ABS(diff)/max){
       char msg[48];
         sprintf(msg,"(proc=%d):%f",me,patch1[idx1]);
     print_subscript("ERROR: a",ndim,subscr1,msg);
     sprintf(msg,"%f\n",patch2[idx2]);
     print_subscript(" b",ndim,subscr2,msg);
         fflush(stdout);
         sleep(1);
         ARMCI_Error("Bailing out",0);
      }
      update_subscript(ndim, subscr1, lo1,hi1, dims1);
      update_subscript(ndim, subscr2, lo2,hi2, dims2);
    }
}
コード例 #9
0
int main(int argc, char* argv[])
{

    armci_msg_init(&argc, &argv);
    nproc = armci_msg_nproc();
    me = armci_msg_me();

    /*    printf("nproc = %d, me = %d\n", nproc, me);*/

    if(nproc>MAXPROC && me==0)
        ARMCI_Error("Test works for up to %d processors\n",MAXPROC);

    if(me==0) {
        printf("ARMCI test program (%d processes)\n",nproc);
        fflush(stdout);
        sleep(1);
    }

    ARMCI_Init();

    if(me==0) {
        printf("\n  Performing Sparse Matrix-Vector Multiplication ...\n\n");
        fflush(stdout);
    }
    test_sparse();

    ARMCI_AllFence();
    armci_msg_barrier();
    if(me==0) {
        printf("\nSuccess!!\n");
        fflush(stdout);
    }
    sleep(2);

    armci_msg_barrier();
    ARMCI_Finalize();
    armci_msg_finalize();
    return(0);
}
コード例 #10
0
ファイル: perf.c プロジェクト: arnolda/scafacos
void check_result(double *src_buf, double *dst_buf, int *stride, int *count,
                  int stride_levels)
{
    int i, j, size;
    long idx;
    int n1dim;  /* number of 1 dim block */
    int bvalue[ARMCI_MAX_STRIDE_LEVEL], bunit[ARMCI_MAX_STRIDE_LEVEL];

    /* number of n-element of the first dimension */
    n1dim = 1;
    for(i=1; i<=stride_levels; i++)
        n1dim *= count[i];

    /* calculate the destination indices */
    bvalue[0] = 0; bvalue[1] = 0; bunit[0] = 1; bunit[1] = 1;
    for(i=2; i<=stride_levels; i++) {
        bvalue[i] = 0;
        bunit[i] = bunit[i-1] * count[i-1];
    }

    for(i=0; i<n1dim; i++) {
        idx = 0;
        for(j=1; j<=stride_levels; j++) {
            idx += bvalue[j] * stride[j-1];
            if((i+1) % bunit[j] == 0) bvalue[j]++;
            if(bvalue[j] > (count[j]-1)) bvalue[j] = 0;
        }
        
        size = count[0] / sizeof(double);
        for(j=0; j<size; j++)
            if(ARMCI_ABS(((double *)((char *)src_buf+idx))[j] - 
               ((double *)((char *)dst_buf+idx))[j]) > 0.000001 ){
                fprintf(stdout,"Error:%s comparison failed: (%d) (%f :%f) %d\n",
                        check_type, j, ((double *)((char *)src_buf+idx))[j],
                        ((double *)((char *)dst_buf+idx))[j], count[0]);
                ARMCI_Error("failed",0);
            }
    }
}
コード例 #11
0
ファイル: perf_aggr.c プロジェクト: dmlb2000/nwchem-cml
int main(int argc, char *argv[])
{
  ARMCI_Init_args(&argc, &argv);
  nproc = armci_msg_nproc();
  me = armci_msg_me();

  /*    printf("nproc = %d, me = %d\n", nproc, me);*/

  if (nproc > MAXPROC && me == 0) {
    ARMCI_Error("Test works for up to %d processors\n", MAXPROC);
  }

  if (me == 0) {
    printf("ARMCI test program (%d processes)\n", nproc);
    fflush(stdout);
    sleep(1);
  }

  if (me == 0) {
    printf("\nAggregate put/get requests\n\n");
    fflush(stdout);
  }
  test_aggregate(1); /* cold start */
  test_aggregate(0); /* warm start */

  ARMCI_AllFence();
  ARMCI_Barrier();
  if (me == 0) {
    printf("\nSuccess!!\n");
    fflush(stdout);
  }
  sleep(2);

  ARMCI_Barrier();
  ARMCI_Finalize();
  armci_msg_finalize();
  return(0);
}
コード例 #12
0
ファイル: simple.c プロジェクト: dmlb2000/nwchem-cml
int main(int argc, char* argv[])
{

    MP_INIT(argc, argv);
    MP_PROCS(&nproc);
    MP_MYID(&me);

/*    printf("nproc = %d, me = %d\n", nproc, me);*/
    
    if(nproc>MAXPROC && me==0)
       ARMCI_Error("Test works for up to %d processors\n",MAXPROC);

    if(me==0){
       printf("ARMCI test program (%d processes)\n",nproc); 
       fflush(stdout);
       sleep(1);
    }
    
    ARMCI_Init();

    if(me==0){
      printf("\nAggregate put/get requests\n\n");
      fflush(stdout);
    }
    test_aggregate(1); /* cold start */
    test_aggregate(0); /* warm start */
    
    ARMCI_AllFence();
    MP_BARRIER();
    if(me==0){printf("\nSuccess!!\n"); fflush(stdout);}
    sleep(2);
	
    MP_BARRIER();
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}
コード例 #13
0
static void load_balance(int n, int non_zero, int *row_ind_tmp) {

    int proc_id, i, local_nz, local_nz_acc, A, B;

    local_nz = local_nz_acc = non_zero/nproc;

    /* number of rows owned by each process is stored in proc_row_list. This
       is supposed to be well load balanced, so that each process has almost
       same number of non-zero elements */
    proc_id = 0;
    if(me==0) printf("local_nz = %d\n", local_nz);
    for(i=0; i<n; i++) { /* as # of entries in row_ind_tmp = n+1 */
        if(row_ind_tmp[i] < local_nz_acc && row_ind_tmp[i+1] >= local_nz_acc) {
            proc_row_list[proc_id++] = i+1;
            local_nz_acc = local_nz*(proc_id+1);
            if(proc_id == nproc-1) local_nz_acc = non_zero;
            if(me==0 && proc_id<nproc) printf("local_nz = %d\n", local_nz_acc);
        }
    }

    proc_row_list[nproc-1] = n;

    for(i=0; i<nproc; i++) {
        A = (i==0) ? 0: proc_row_list[i-1];/* # of entries in row_ind_tmp is n+1*/
        B = proc_row_list[i];
        proc_nz_list[i] = row_ind_tmp[B]-row_ind_tmp[A];
    }

    if(proc_id != nproc)
        ARMCI_Error("Error while preparing Process Row list", proc_id-1);

#if 1
    if(me==0) verify_list(proc_row_list);
#endif

}
コード例 #14
0
ファイル: simple.c プロジェクト: dmlb2000/nwchem-cml
void test_aggregate(int dryrun) {
  
    int i, j, rc, bytes, elems[2] = {MAXPROC, MAXELEMS};
    double *ddst_put[MAXPROC];
    double *ddst_get[MAXPROC];
    double *dsrc[MAXPROC];
    armci_hdl_t aggr_hdl_put[MAXPROC];
    armci_hdl_t aggr_hdl_get[MAXPROC];
    armci_hdl_t hdl_put[MAXELEMS];
    armci_hdl_t hdl_get[MAXELEMS];
    armci_giov_t darr;
    void *src_ptr[MAX_REQUESTS], *dst_ptr[MAX_REQUESTS];
    int start = 0, end = 0;
    double start_time;
        
    create_array((void**)ddst_put, sizeof(double),2, elems);
    create_array((void**)ddst_get, sizeof(double),2, elems);
    create_array((void**)dsrc, sizeof(double),1, &elems[1]);
    
    for(i=0; i<elems[1]; i++) dsrc[me][i]=i*1.001*(me+1);
    for(i=0; i<elems[0]*elems[1]; i++) {
      ddst_put[me][i]=0.0;
      ddst_get[me][i]=0.0;
    }
    
    MP_BARRIER();

    /* only proc 0 does the work */
    if(me == 0) {
      if(!dryrun)printf("Transferring %d doubles (Not an array of %d doubles)\n", MAXELEMS, MAXELEMS);
      
      /* initializing non-blocking handles */
      for(i=0; i<elems[1]; i++) ARMCI_INIT_HANDLE(&hdl_put[i]);
      for(i=0; i<elems[1]; i++) ARMCI_INIT_HANDLE(&hdl_get[i]);
      
      /* aggregate handles */
      for(i=0; i<nproc; i++) ARMCI_INIT_HANDLE(&aggr_hdl_put[i]);
      for(i=0; i<nproc; i++) ARMCI_INIT_HANDLE(&aggr_hdl_get[i]);
      for(i=0; i<nproc; i++) ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_put[i]);
      for(i=0; i<nproc; i++) ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_get[i]);    
      
      bytes = sizeof(double);
      
      /* **************** PUT **************** */    
      /* register put */
      start_time=MP_TIMER();
      start = 0; end = elems[1]; 
      for(i=1; i<nproc; i++) {
	for(j=start; j<end; j++) {  
	  ARMCI_NbPutValueDouble(dsrc[me][j], &ddst_put[i][me*elems[1]+j], i, 
				 &hdl_put[j]);
	}
	for(j=start; j<end; j++) ARMCI_Wait(&hdl_put[j]);
      }
      if(!dryrun)printf("%d: Value Put time      = %.2es\n", me, MP_TIMER()-start_time);
 
      /* vector put */
      start_time=MP_TIMER();
      for(i=1; i<nproc; i++) {
	for(j=start; j<end; j++) {
	  src_ptr[j] = (void *)&dsrc[me][j];
	  dst_ptr[j] = (void *)&ddst_put[i][me*elems[1]+j];
	}
	darr.src_ptr_array = src_ptr;
	darr.dst_ptr_array = dst_ptr;
	darr.bytes = sizeof(double);
	darr.ptr_array_len = elems[1];
	if((rc=ARMCI_NbPutV(&darr, 1, i, &hdl_put[i])))
	  ARMCI_Error("armci_nbputv failed\n",rc);
      }
      for(i=1; i<nproc; i++) ARMCI_Wait(&hdl_put[i]);
      if(!dryrun)printf("%d: Vector Put time     = %.2es\n", me, MP_TIMER()-start_time);
      
      /* regular put */
      start_time=MP_TIMER();    
      for(i=1; i<nproc; i++) {
	for(j=start; j<end; j++) {  
	  if((rc=ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1]+j], bytes,
			     i, &hdl_put[j])))
	    ARMCI_Error("armci_nbput failed\n",rc);
	}
	for(j=start; j<end; j++) ARMCI_Wait(&hdl_put[j]);
      }
      if(!dryrun)printf("%d: Regular Put time    = %.2es\n", me, MP_TIMER()-start_time);
      
      /* aggregate put */
      start_time=MP_TIMER();
      for(i=1; i<nproc; i++) {
	for(j=start; j<end; j++) {  
	  if((rc=ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1]+j], bytes,
			     i,  &aggr_hdl_put[i])))
	    ARMCI_Error("armci_nbput failed\n",rc);
	}
      }
      for(i=1; i<nproc; i++) ARMCI_Wait(&aggr_hdl_put[i]);
      if(!dryrun)printf("%d: Aggregate Put time  = %.2es\n\n", me, MP_TIMER()-start_time);
      
      
      /* **************** GET **************** */    
      
      /* vector get */
      start_time=MP_TIMER();
      for(i=1; i<nproc; i++) {
	for(j=start; j<end; j++) {
	  src_ptr[j] = (void *)&dsrc[i][j];
	  dst_ptr[j] = (void *)&ddst_get[me][i*elems[1]+j];
	}
	darr.src_ptr_array = src_ptr;
	darr.dst_ptr_array = dst_ptr;
	darr.bytes = sizeof(double);
	darr.ptr_array_len = elems[1];
	if((rc=ARMCI_NbGetV(&darr, 1, i, &hdl_get[i])))
	  ARMCI_Error("armci_nbgetv failed\n",rc);
	ARMCI_Wait(&hdl_get[i]);
      }
      if(!dryrun)printf("%d: Vector Get time     = %.2es\n", me, MP_TIMER()-start_time);
      
      /* regular get */
      start_time=MP_TIMER();    
      for(i=1; i<nproc; i++) {
	for(j=start; j<end; j++) {  
	  if((rc=ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1]+j], bytes,
			     i, &hdl_get[j])))
	    ARMCI_Error("armci_nbget failed\n",rc);
	}
	for(j=start; j<end; j++) ARMCI_Wait(&hdl_get[j]);
      }
      if(!dryrun)printf("%d: Regular Get time    = %.2es\n", me, MP_TIMER()-start_time);
      
      /* aggregate get */
      start_time=MP_TIMER();
      for(i=1; i<nproc; i++) {
	for(j=start; j<end; j++) {  
	  ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1]+j], bytes,
		      i, &aggr_hdl_get[i]);
	}
      }
      for(i=1; i<nproc; i++) ARMCI_Wait(&aggr_hdl_get[i]);
      if(!dryrun)printf("%d: Aggregate Get time  = %.2es\n", me, MP_TIMER()-start_time);
    }

    MP_BARRIER();
    ARMCI_AllFence();
    MP_BARRIER();

    /* Verify */
    if(!(me==0))
      for(j=0; j<elems[1]; j++) {
	if( ARMCI_ABS(ddst_put[me][j]-j*1.001) > 0.1) {
	  ARMCI_Error("aggregate put failed...1", 0);
	}
      }
    MP_BARRIER();
    if(!dryrun)if(me==0) printf("\n  aggregate put ..O.K.\n"); fflush(stdout);

    if(me==0) {
      for(i=1; i<nproc; i++) {
	for(j=0; j<elems[1]; j++) {
	  if( ARMCI_ABS(ddst_get[me][i*elems[1]+j]-j*1.001*(i+1)) > 0.1) {
	    ARMCI_Error("aggregate get failed...1", 0);
	  }
	}
      }
    }
    MP_BARRIER();
    if(!dryrun)if(me==0) printf("  aggregate get ..O.K.\n"); fflush(stdout);


    ARMCI_AllFence();
    MP_BARRIER();
    
    if(!dryrun)if(me==0){printf("O.K.\n"); fflush(stdout);}
    destroy_array((void **)ddst_put);
    destroy_array((void **)ddst_get);
    destroy_array((void **)dsrc);
}
コード例 #15
0
ファイル: perf_nb.c プロジェクト: dmlb2000/nwchem-cml
void test_perf_nb(int dry_run) {
  
    int i, j, loop, rc, bytes, elems[2] = {MAXPROC, MAXELEMS};
    int stride, k=0, ntimes;
    double stime, t1, t2, t3, t4, t5, t6, t7, t8, t9;
    double *dsrc[MAXPROC], scale=1.0;
    armci_hdl_t hdl_get, hdl_put, hdl_acc;
        
    create_array((void**)ddst, sizeof(double),2, elems);
    create_array((void**)dsrc, sizeof(double),1, &elems[1]);

    if(!dry_run)if(me == 0) {
      printf("\n\t\t\tRemote 1-D Array Section\n");
      printf("section    get      nbget    wait     put     nbput  ");
      printf("   wait     acc     nbacc     wait\n");
      printf("-------  -------- -------- -------- -------- --------");
      printf(" -------- -------- -------- --------\n");
      fflush(stdout);
    }

    for(loop=1; loop<=MAXELEMS; loop*=2, k++) {

      elems[1] = loop;
      ntimes = (int)sqrt((double)(MAXELEMS/elems[1]));
      if(ntimes <1) ntimes=1;

      /* -------------------------- SETUP --------------------------- */
      /*initializing non-blocking handles,time,src & dst buffers*/
      ARMCI_INIT_HANDLE(&hdl_put);
      ARMCI_INIT_HANDLE(&hdl_get);
      ARMCI_INIT_HANDLE(&hdl_acc);
      t1 = t2 = t3 = t4 = t5 = t6 = t7 = t8 = t9 = 0.0;
      for(i=0; i<elems[1]; i++) dsrc[me][i]=i*1.001*(me+1);
      for(i=0; i<elems[0]*elems[1]; i++) ddst[me][i]=0.0;    
      MP_BARRIER();
      
      /* bytes transfered */
      bytes = sizeof(double)*elems[1]; 
      MP_BARRIER();
      
      /* -------------------------- PUT/GET -------------------------- */    
      if(me == 0) {
	for(i=1; i<nproc; i++) {
	  stime=MP_TIMER();
	  for(j=0; j<ntimes; j++)
	    if((rc=ARMCI_Put(&dsrc[me][0], &ddst[i][me*elems[1]], bytes,i)))
	      ARMCI_Error("armci_nbput failed\n",rc);
	  t1 += MP_TIMER()-stime;
	}
      }
      MP_BARRIER(); ARMCI_AllFence(); MP_BARRIER();
      if(VERIFY) verify_results(PUT, elems);
      for(i=0; i<elems[0]*elems[1]; i++) ddst[me][i]=0.0;
      MP_BARRIER();
      
      if(me == 0) { 
	for(i=1; i<nproc; i++) {
	  stime=MP_TIMER();    
	  for(j=0; j<ntimes; j++)
	    if((rc=ARMCI_Get(&dsrc[i][0], &ddst[me][i*elems[1]], bytes,i)))
	      ARMCI_Error("armci_nbget failed\n",rc);
	  t4 += MP_TIMER()-stime;	
	}
      }    
      MP_BARRIER(); ARMCI_AllFence(); MP_BARRIER();
      if(VERIFY) verify_results(GET, elems);
      for(i=0; i<elems[0]*elems[1]; i++) ddst[me][i]=0.0;
      MP_BARRIER();

      /* ------------------------ nb PUT/GET ------------------------- */    
      if(me == 0) {
	for(i=1; i<nproc; i++) {
	  for(j=0; j<ntimes; j++) {
	    stime=MP_TIMER();    
	    if((rc=ARMCI_NbPut(&dsrc[me][0], &ddst[i][me*elems[1]], bytes,
			       i, &hdl_put)))
	      ARMCI_Error("armci_nbput failed\n",rc);
	    t2 += MP_TIMER()-stime;	stime=MP_TIMER();
	    ARMCI_Wait(&hdl_put);
	    t3 += MP_TIMER()-stime;
	  } 
	}
      }
      MP_BARRIER(); ARMCI_AllFence(); MP_BARRIER();
      if(VERIFY) verify_results(PUT, elems);
      for(i=0; i<elems[0]*elems[1]; i++) ddst[me][i]=0.0;
      MP_BARRIER();

      if(me == 0) {
	for(i=1; i<nproc; i++) {
	  for(j=0; j<ntimes; j++) {
	    stime=MP_TIMER();    
	    if((rc=ARMCI_NbGet(&dsrc[i][0], &ddst[me][i*elems[1]], bytes,
			       i, &hdl_get)))
	      ARMCI_Error("armci_nbget failed\n",rc);
	    t5 += MP_TIMER()-stime;	stime=MP_TIMER();
	    ARMCI_Wait(&hdl_get);
	    t6 += MP_TIMER()-stime;
	  }
	}
      }
      MP_BARRIER(); ARMCI_AllFence(); MP_BARRIER();
      if(VERIFY) verify_results(GET, elems);
      for(i=0; i<elems[0]*elems[1]; i++) ddst[me][i]=0.0; 
      MP_BARRIER();


      /* ------------------------ Accumulate ------------------------- */    
      for(i=0; i<elems[1]; i++) dsrc[me][i]=1.0;  MP_BARRIER();
      stride = elems[1]*sizeof(double); scale  = 1.0;
      for(j=0; j<ntimes; j++) {
	stime=MP_TIMER();
	if((rc=ARMCI_AccS(ARMCI_ACC_DBL, &scale, &dsrc[me][0], &stride, 
			  &ddst[0][0], &stride, &bytes, 0, 0)))
	  ARMCI_Error("armci_acc failed\n",rc);
	t7 += MP_TIMER()-stime;
	
	MP_BARRIER(); ARMCI_AllFence(); MP_BARRIER();
	if(VERIFY) verify_results(ACC, elems);
	for(i=0; i<elems[0]*elems[1]; i++) ddst[me][i]=0.0;
	MP_BARRIER();
      }

#if 1
      /* See the note below why this part is disabled */
      /* ---------------------- nb-Accumulate ------------------------ */    
      for(i=0; i<elems[1]; i++) dsrc[me][i]=1.0;  MP_BARRIER();
      stride = elems[1]*sizeof(double); scale  = 1.0;
      for(j=0; j<ntimes; j++) {
	stime=MP_TIMER();    
	if((rc=ARMCI_NbAccS(ARMCI_ACC_DBL, &scale, &dsrc[me][0], &stride, 
			    &ddst[0][0], &stride, &bytes, 0, 0, &hdl_acc)))
	  ARMCI_Error("armci_nbacc failed\n",rc);
	t8 += MP_TIMER()-stime; stime=MP_TIMER();
	ARMCI_Wait(&hdl_acc);
	t9 += MP_TIMER()-stime;
      
	MP_BARRIER(); ARMCI_AllFence(); MP_BARRIER();
	if(VERIFY) verify_results(ACC, elems);
	for(i=0; i<elems[0]*elems[1]; i++) ddst[me][i]=0.0;
	MP_BARRIER();
      }
#endif

      /* print timings */
     if(!dry_run) if(me==0) printf("%d\t %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e %.2e\n", 
		       bytes, t4/ntimes, t5/ntimes, t6/ntimes, t1/ntimes, 
		       t2/ntimes, t3/ntimes, t7/ntimes, t8/ntimes, t9/ntimes);
    }

    ARMCI_AllFence();
    MP_BARRIER();
    
    if(!dry_run)if(me==0){printf("O.K.\n"); fflush(stdout);}
    destroy_array((void **)ddst);
    destroy_array((void **)dsrc);
}
コード例 #16
0
ファイル: testnotify.c プロジェクト: jeffhammond/ga
void compare_patches(double eps, int ndim, double *patch1, int lo1[], int hi1[],
                     int dims1[], double *patch2, int lo2[], int hi2[],
                     int dims2[])

{
  int i, j, elems = 1;
  int subscr1[MAXDIMS], subscr2[MAXDIMS];
  double diff, max;
  int idx1, idx2, offset1, offset2;

  for (i = 0; i < ndim; i++) { /* count # of elements & verify consistency of both patches */
    int diff = hi1[i] - lo1[i];
    assert(diff == (hi2[i] - lo2[i]));
    assert(diff < dims1[i]);
    assert(diff < dims2[i]);
    elems *= diff + 1;
    subscr1[i] = lo1[i];
    subscr2[i] = lo2[i];
  }


  /* compare element values in both patches */
  for (j = 0; j < elems; j++) {
    idx1 = Index(ndim, subscr1, dims1);  /* calculate element Index from a subscript */
    idx2 = Index(ndim, subscr2, dims2);

    if (j == 0) {
      offset1 = idx1;
      offset2 = idx2;
    }
    idx1 -= offset1;
    idx2 -= offset2;


    diff = patch1[idx1] - patch2[idx2];
    max  = ARMCI_MAX(ARMCI_ABS(patch1[idx1]), ARMCI_ABS(patch2[idx2]));
    if (max == 0. || max < eps) {
      max = 1.;
    }

    if (eps < ARMCI_ABS(diff) / max) {
      char msg[48];
      sprintf(msg, "(proc=%d):%f", me, patch1[idx1]);
      print_subscript("ERROR: a", ndim, subscr1, msg);
      sprintf(msg, "%f\n", patch2[idx2]);
      print_subscript(" b", ndim, subscr2, msg);
      fflush(stdout);
      sleep(1);
      ARMCI_Error("Bailing out", 0);
    }

    { /* update subscript for the patches */
      update_subscript(ndim, subscr1, lo1, hi1, dims1);
      update_subscript(ndim, subscr2, lo2, hi2, dims2);
    }
  }



  /* make sure we reached upper limit */
  /*for(i=0;i<ndim;i++){
    assert(subscr1[i]==hi1[i]);
    assert(subscr2[i]==hi2[i]);
  }*/
}
コード例 #17
0
ファイル: simple.c プロジェクト: v4m4/armci-mpi
int main(int argc, char **argv) {
  int i;
  double **myptrs;
  double t0, t1, tnbget=0, tnbwait=0, t2=0;

  MP_INIT(argc,argv);
  ARMCI_Init();

  MP_PROCS(&nprocs);
  MP_MYID(&me);

  if (nprocs < 2)
    ARMCI_Error("This program requires at least to processes", 1);

  myptrs = (double **)malloc(sizeof(double *)*nprocs);
  ARMCI_Malloc((void **)myptrs, LOOP*sizeof(double)); 
  
  MP_BARRIER();
  
  if(me == 0) {
    for(i = 0; i < 10; i++) {
      // This is a bug:
      // ARMCI_Get(myptrs[me]+i,myptrs[me+1]+i,sizeof(double),me+1);
      ARMCI_Get(myptrs[me+1]+i, myptrs[me]+i, sizeof(double), me+1);
    }

    t0 = MP_TIMER(); 
    for(i = 0; i < LOOP; i++) {
      // This is a bug:
      // ARMCI_Get(myptrs[me]+i,myptrs[me+1]+i,sizeof(double),me+1);
      ARMCI_Get(myptrs[me+1]+1, myptrs[me]+i, sizeof(double), me+1);
    }
    t1 = MP_TIMER(); 

    printf("\nGet Latency=%lf\n", 1e6*(t1-t0)/LOOP);
    fflush(stdout);

    t1 = t0 = 0;

    for(i = 0; i < LOOP; i++) {
      armci_hdl_t nbh;
      ARMCI_INIT_HANDLE(&nbh);

      t0 = MP_TIMER(); 
      //ARMCI_NbGet(myptrs[me]+i, myptrs[me+1]+i, sizeof(double), me+1, &nbh);
      ARMCI_NbGet(myptrs[me+1]+i, myptrs[me]+i, sizeof(double), me+1, &nbh);
      t1 = MP_TIMER(); 
      ARMCI_Wait(&nbh);
      t2 = MP_TIMER();

      tnbget  += (t1-t0);
      tnbwait += (t2-t1);
    }

    printf("\nNb Get Latency=%lf Nb Wait=%lf\n",1e6*tnbget/LOOP,1e6*tnbwait/LOOP);fflush(stdout);
  }

  else
    sleep(1);

  MP_BARRIER();

  ARMCI_Finalize();
  MP_FINALIZE();

  return 0;
}
コード例 #18
0
ファイル: aggregate.c プロジェクト: dmlb2000/nwchem-cml
void armci_agg_complete(armci_ihdl_t nb_handle, int condition) {
    int i, index=0, rc;

    /* get the buffer index for this handle */
    for(i=ulist.size-1; i>=0; i--) {
      index = ulist.index[i];
      if(aggr[index]->tag == nb_handle->tag && 
	 aggr[index]->proc == nb_handle->proc)	
	break;
    }
    if(i<0) return; /* implies this handle has no requests at all */
    
#if 0
    printf("%d: Aggregation Complete to remote process %d (%d:%d requests)\n", 
	   armci_me, nb_handle->proc, index, aggr[index]->request_len);
#endif

    /* complete the data transfer. NOTE: in LAPI, Non-blocking calls 
       (followed by wait) performs better than blocking put/get */
    if(aggr[index]->request_len) {
      switch(nb_handle->op) {
#ifdef LAPI
	armci_hdl_t usr_hdl;
      case PUT:
	ARMCI_INIT_HANDLE(&usr_hdl);       
	if((rc=ARMCI_NbPutV(aggr[index]->darr, aggr[index]->request_len, 
			    nb_handle->proc, (armci_hdl_t*)&usr_hdl)))
	  ARMCI_Error("armci_agg_complete: nbputv failed",rc);
	ARMCI_Wait((armci_hdl_t*)&usr_hdl);
	break;
      case GET:
	ARMCI_INIT_HANDLE(&usr_hdl);       
	if((rc=ARMCI_NbGetV(aggr[index]->darr, aggr[index]->request_len, 
			    nb_handle->proc, (armci_hdl_t*)&usr_hdl)))
	  ARMCI_Error("armci_agg_complete: nbgetv failed",rc);  
	ARMCI_Wait((armci_hdl_t*)&usr_hdl);
	break;
#else
      case PUT:
	if((rc=ARMCI_PutV(aggr[index]->darr, aggr[index]->request_len, 
			  nb_handle->proc)))
	  ARMCI_Error("armci_agg_complete: putv failed",rc);
	break;
      case GET:
	if((rc=ARMCI_GetV(aggr[index]->darr, aggr[index]->request_len, 
			  nb_handle->proc)))
	  ARMCI_Error("armci_agg_complete: getv failed",rc);  
	break;
#endif
      }
    }
    
    /* setting request length to zero, as the requests are completed */
    aggr[index]->request_len   = 0;
    aggr[index]->ptr_array_len = 0;
    aggr[index]->buf_pos_end   = _MAX_AGG_BUFSIZE;
    
    /* If armci_agg_complete() is called ARMCI_Wait(), then unset nb_handle*/
    if(condition==UNSET) { 
      nb_handle->proc = -1;
      _armci_agg_update_lists(index);
    }
}
コード例 #19
0
ファイル: msgcheck.c プロジェクト: arnolda/scafacos
void TestGlobals()
{
#define MAXLENG 256*1024
  double *dtest;
  int *itest;
  long *ltest;
  int len;
  int ifrom=nproc-1,lfrom=1,dfrom=1;

  if (me == 0) {
    printf("Global test ... broadcast and reduction for int, long, double\n----------\n");
    fflush(stdout);
  }

  if (!(dtest = (double *) malloc((unsigned) (MAXLENG*sizeof(double)))))
    ARMCI_Error("TestGlobals: failed to allocated dtest", MAXLENG);
  if (!(ltest = (long *) malloc((unsigned) (MAXLENG*sizeof(long)))))
    ARMCI_Error("TestGlobals: failed to allocated ltest", MAXLENG);
  if (!(itest = (int *) malloc((unsigned) (MAXLENG*sizeof(int)))))
    ARMCI_Error("TestGlobals: failed to allocated itest", MAXLENG);

  for (len=1; len<MAXLENG; len*=2) {
    int ilen = len*sizeof(int);
    int dlen = len*sizeof(double);
    int llen = len*sizeof(long);
    int i;
   
    ifrom = (ifrom+1)%nproc;
    lfrom = (lfrom+1)%nproc; 
    dfrom = (lfrom+1)%nproc;

#if 0
    printf("%d:ifrom=%d lfrom=%d dfrom=%d\n",me,ifrom,lfrom,dfrom);fflush(stdout);
#endif

    if (me == 0) {
      printf("Test length = %d ... ", len);
      fflush(stdout);
    }

    if(me == ifrom)for (i=0; i<len; i++)itest[i]=i;
    else for (i=0; i<len; i++)itest[i]=0;
    if(me == lfrom)for (i=0; i<len; i++)ltest[i]=(long)i;
    else for (i=0; i<len; i++)ltest[i]=0L;
    if(me == dfrom)for (i=0; i<len; i++)dtest[i]=(double)i;
    else for (i=0; i<len; i++)dtest[i]=0.0;
    
    /* Test broadcast */
    armci_msg_brdcst(itest, ilen, ifrom);
    armci_msg_brdcst(ltest, llen, lfrom);
    armci_msg_brdcst(dtest, dlen, dfrom);
   
    for (i=0; i<len; i++){
      if (itest[i] != i) armci_die2("int broadcast failed", i,itest[i]);
      if (ltest[i] != (long)i) 
                      armci_die2("long broadcast failed", i,(int)ltest[i]);
      if (dtest[i] != (double)i) 
                      armci_die2("double broadcast failed", i,(int)dtest[i]);
    }
      
    if (me == 0) {
      printf("broadcast OK ...");
      fflush(stdout);
    }

    /* Test global sum */
    for (i=0; i<len; i++) {
      itest[i] = i*me;
      ltest[i] = (long) itest[i];
      dtest[i] = (double) itest[i];
    }


    armci_msg_igop(itest, len, "+");
    armci_msg_lgop(ltest, len, "+");
    armci_msg_dgop(dtest, len, "+");
 

    for (i=0; i<len; i++) {
      int iresult = i*nproc*(nproc-1)/2;
      if (itest[i] != iresult || ltest[i] != (long)iresult || 
          dtest[i] != (double) iresult)
        ARMCI_Error("TestGlobals: global sum failed", (int) i);
    }


    if (me == 0) {
      printf("global sums OK\n");
      fflush(stdout);
    }
  }


  /* now we get timing data */
  time_gop(dtest,MAXLENG);
  time_reduce(dtest,MAXLENG);
     
  free((char *) itest);
  free((char *) ltest);
  free((char *) dtest);
}
コード例 #20
0
ファイル: test2.c プロジェクト: arnolda/scafacos
void test_acc_type(const int datatype)
{
    int i = 0;
    int datatype_size = 0;
    void * scale;
    void * a;
    void *b[MAXPROC];
    int elems = ELEMS;
    int dim = 1;
    int count = 0;
    int strideA = 0;
    int strideB = 0;

    switch(datatype)
    {
       case ARMCI_ACC_INT:
          datatype_size = sizeof(int);
          scale = malloc(datatype_size);
          *((int *) scale) = 1;
          a = malloc(elems * datatype_size);
          create_array((void**)b, datatype_size, dim, &elems);
          for(i = 0; i < elems; i++)
          {
             ((int *) a)[i] = i + me;
             ((int *) b[me])[i] = 0;
          }
          break;
       case ARMCI_ACC_LNG:
          datatype_size = sizeof(long);
          scale = malloc(datatype_size);
          *((long *) scale) = 1;
          a = malloc(elems * datatype_size);
          create_array((void**)b, datatype_size, dim, &elems);
          for(i = 0; i < elems; i++)
          {
             ((long *) a)[i] = i + me;
             ((long *) b[me])[i] = 0;
          }
          break;
       case ARMCI_ACC_FLT:
          datatype_size = sizeof(float);
          scale = malloc(datatype_size);
          *((float *) scale) = 1.0;
          a = malloc(elems * datatype_size);
          create_array((void**)b, datatype_size, dim, &elems);
          for(i = 0; i < elems; i++)
          {
             ((float *) a)[i] = (float) i + me;
             ((float *) b[me])[i] = 0.0;
          }
          break;
       case ARMCI_ACC_DBL:
          datatype_size = sizeof(double);
          scale = malloc(datatype_size);
          *((double *) scale) = 1.0;
          a = malloc(elems * datatype_size);
          create_array((void**)b, datatype_size, dim, &elems);
          for(i = 0; i < elems; i++)
          {
             ((double *) a)[i] = (double) i + me;
             ((double *) b[me])[i] = 0.0;
          }
          break;
       case ARMCI_ACC_CPL:
          datatype_size = sizeof(cmpl_t);
          scale = malloc(datatype_size);
          ((cmpl_t *) scale)->real = 2.0;
          ((cmpl_t *) scale)->imag = 1.0;
          a = malloc(elems * datatype_size);
          create_array((void**)b, datatype_size, dim, &elems);
          for(i = 0; i < elems; i++)
          {
             ((cmpl_t *) a)[i].real = ((float) i + me);
             ((cmpl_t *) a)[i].imag = ((float) i + me);
             ((cmpl_t *) b[me])[i].real = 0.0;
             ((cmpl_t *) b[me])[i].imag = 0.0;
          }
          break;
       case ARMCI_ACC_DCP:
          datatype_size = sizeof(dcmpl_t);
          scale = malloc(datatype_size);
          ((dcmpl_t *) scale)->real = 2.0;
          ((dcmpl_t *) scale)->imag = 1.0;
          a = malloc(elems * datatype_size);
          create_array((void**)b, datatype_size, dim, &elems);
          for(i = 0; i < elems; i++)
          {
             ((dcmpl_t *) a)[i].real = ((double) i + me);
             ((dcmpl_t *) a)[i].imag = ((double) i + me);
             ((dcmpl_t *) b[me])[i].real = 0.0;
             ((dcmpl_t *) b[me])[i].imag = 0.0;
          }
          break;
       default:
          return;
          break;
    }

    count = elems * datatype_size;
    strideA = elems * datatype_size;
    strideB = elems * datatype_size;

    ARMCI_AllFence();
    MP_BARRIER();

    for(i = 0; i < nproc; i++)
       ARMCI_AccS(datatype, scale, a, &strideA, b[(me + i) % nproc], &strideB, &count, 0, (me + i) % nproc);

    ARMCI_AllFence();
    MP_BARRIER();

    switch(datatype)
    {
       case ARMCI_ACC_INT:
          for(i = 0; i < elems; i++)
          {
             int compare = (i * nproc) + nproc / 2 * (nproc - 1);
             if(((int *)b[me])[i] != compare) 
             {
                printf("ERROR accumulate ARMCI_ACC_INT [%d] = %d != %d\n", i, ((int *)b[me])[i], compare);
                ARMCI_Error("test_acc_type failed\n",0);
             }
          }
          break;
       case ARMCI_ACC_LNG:
          for(i = 0; i < elems; i++)
          {
             long compare = (i * nproc) + nproc / 2 * (nproc - 1);
             if(((long *)b[me])[i] != compare) 
             {
                printf("ERROR accumulate ARMCI_ACC_LNG [%d] = %d != %ld\n", i, ((int *)b[me])[i], compare);
                ARMCI_Error("test_acc_type failed\n",0);
             }
          }
          break;
       case ARMCI_ACC_FLT:
          for(i = 0; i < elems; i++)
          {
             float compare = (float) ((i * nproc) + nproc / 2 * (nproc - 1));
             if(((float *)b[me])[i] != compare) 
             {
                printf("ERROR accumulate ARMCI_ACC_FLT [%d] = %f != %f\n", i, ((float *)b[me])[i], compare);
                ARMCI_Error("test_acc_type failed\n",0);
             }
          }
          break;
       case ARMCI_ACC_DBL:
          for(i = 0; i < elems; i++)
          {
             double compare = (double) ((i * nproc) + nproc / 2 * (nproc - 1));
             if(((double *)b[me])[i] != (double) ((i * nproc) + nproc / 2 * (nproc - 1))) 
             {
                printf("ERROR accumulate ARMCI_ACC_DBL [%d] = %f != %f \n", i, ((double *)b[me])[i], compare);
                ARMCI_Error("test_acc_type failed\n",0);
             }
          }
          break;
       case ARMCI_ACC_CPL:
          for(i = 0; i < elems; i++)
          {
             float compare = (float) ((i * nproc) + nproc / 2 * (nproc - 1));
             if(((cmpl_t *)b[me])[i].real != compare && ((cmpl_t *)b[me])[i].imag != 3 * compare) 
             {
                printf("ERROR accumulate ARMCI_ACC_CPL [%d] = %f + %fj != %f + %fj\n", i, ((cmpl_t *)b[me])[i].real, ((cmpl_t *)b[me])[i].imag, compare, 3 * compare);
                ARMCI_Error("test_acc_type failed\n",0);
             }
          }
          break;
       case ARMCI_ACC_DCP:
          for(i = 0; i < elems; i++)
          {
             double compare = (double) ((i * nproc) + nproc / 2 * (nproc - 1));
             if(((dcmpl_t *)b[me])[i].real != compare && ((dcmpl_t *)b[me])[i].imag != 3 * compare) 
             {
                printf("ERROR accumulate ARMCI_ACC_DCP [%d] = %f + %fj != %f + %fj\n", i, ((dcmpl_t *)b[me])[i].real, ((dcmpl_t *)b[me])[i].imag, compare, 3 * compare);
                ARMCI_Error("test_acc_type failed\n",0);
             }
          }
          break;
       default:
          break;
    }

    MP_BARRIER();
    ARMCI_AllFence();
    MP_BARRIER();
    
    if(me==0){printf("O.K.\n\n"); fflush(stdout);}    
    destroy_array((void**)b);
    free(a);
    free(scale);
}
コード例 #21
0
ファイル: test2.c プロジェクト: arnolda/scafacos
void test_brdcst(int datatype)
{
    void *a[6];
    int len[6] = {1, 10, 100, 1000, 10000, 100000};
    int datatype_size = 0;
    int i, j;

    switch(datatype)
    {
       case ARMCI_INT:
          datatype_size = sizeof(int);
          for(i = 0; i < 6; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < 6; i++)
             if(me == 0)
                for(j = 0; j < len[i]; j++)
                   ((int *) a[i])[j] = (int) j;
             else
                memset(a[i], 0x0, len[i] * datatype_size);
          break;
       case ARMCI_LONG:
          datatype_size = sizeof(long);
          for(i = 0; i < 6; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < 6; i++)
             if(me == 0)
                for(j = 0; j < len[i]; j++)
                   ((long *) a[i])[j] = (long) j;
             else
                memset(a[i], 0x0, len[i] * datatype_size);
          break;
       case ARMCI_FLOAT:
          datatype_size = sizeof(float);
          for(i = 0; i < 6; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < 6; i++)
             if(me == 0)
                for(j = 0; j < len[i]; j++)
                   ((float *) a[i])[j] = (float) j;
             else
                memset(a[i], 0x0, len[i] * datatype_size);
          break;
       case ARMCI_DOUBLE:
          datatype_size = sizeof(double);
          for(i = 0; i < 6; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < 6; i++)
             if(me == 0)
                for(j = 0; j < len[i]; j++)
                   ((double *) a[i])[j] = (double) j;
             else
                memset(a[i], 0x0, len[i] * datatype_size);
          break;
       default:
          break;
    }
    for(i = 0; i < 6; i++)
       armci_msg_brdcst(a[i], len[i] * datatype_size, 0);

    switch(datatype)
    {
       case ARMCI_INT:
          for(i = 0; i < 6; i++)
             for(j = 0; j < len[i]; j++)
                if(((int *) a[i])[j] != (int) j) 
                {
                   printf("ERROR a[%d][%d] = %d != %d\n", i, j, ((int *) a[i])[j], (int) j);
                   ARMCI_Error("armci_brdcst failed (int)\n",0);
                }
          break;
       case ARMCI_LONG:
          for(i = 0; i < 6; i++)
             for(j = 0; j < len[i]; j++)
                if(((long *) a[i])[j] != (long) j)
                {
                   printf("ERROR a[%d][%d] = %ld != %ld\n", i, j, ((long *) a[i])[j], (long) j);
                   ARMCI_Error("armci_brdcst failed (long)\n",0);
                }
          break;
       case ARMCI_FLOAT:
          for(i = 0; i < 6; i++)
             for(j = 0; j < len[i]; j++)
                if(((float *) a[i])[j] != (float) j)
                {
                   printf("ERROR a[%d][%d] = %f != %f\n", i, j, ((float *) a[i])[j], (float) j);
                   ARMCI_Error("armci_brdcst failed (float)\n",0);
                }
          break;
       case ARMCI_DOUBLE:
          for(i = 0; i < 6; i++)
             for(j = 0; j < len[i]; j++)
                if(((double *) a[i])[j] != (double) j)
                {
                   printf("ERROR a[%d][%d] = %f != %f\n", i, j, ((double *) a[i])[j], (double) j);
                   ARMCI_Error("armci_brdcst failed (double)\n",0);
                }
          break;
       default:
          break;
    }

    for(i = 0; i < 6; i++)
       free(a[i]);
}
コード例 #22
0
ファイル: read_input.c プロジェクト: jeffhammond/armci-mpi
void read_and_create(int argc, char **argv)
{
int ri,i,nread;
int tmp1,idealelementsperproc;
void **amatptrs,**xvecptrs;

    na = atoi(argv[1]);
    nz = atoi(argv[2]);

    if(strncmp("random",argv[3],6)){
       if(me==0){
         fd = fopen(argv[3], "r");
         if(fd==NULL)ARMCI_Error("unable to open given file",0);
       }
    }
    else{
       if(na==0 || nz==0){
         printf("\nERROR:exiting-no input file given and na or nz is 0");
         fflush(stdout);
         ARMCI_Finalize();
         MP_FINALIZE();
         return;
       }
       if(me==0){
         generate_random_file(na,nz);
         fd = fopen("randominput.dat", "r");
       }
    }
    if(me==0){
       if(na==0)
         nread = fread(&na, sizeof(na), 1, fd);
       if(nz==0)
         nread = fread(&nz, sizeof(nz), 1, fd);
       printf("\nReading CG input\n");
       printf("Number of rows: %d\n", na);
       printf("Number of non-zeros: %d\n", nz);
    }

    armci_msg_bcast(&nz,sizeof(int),0);
    armci_msg_bcast(&na,sizeof(int),0);
    MP_BARRIER();

    amatptrs = (void **)malloc(sizeof(void *)*nproc); 
    xvecptrs = (void **)malloc(sizeof(void *)*nproc);
    if(xvecptrs==NULL || amatptrs==NULL)
      ARMCI_Error("xvecptrs amatptrs malloc failed",sizeof(void *)*nproc);

    if(ARMCI_Malloc(amatptrs,((me==0)?(sizeof(double)*nz):0)))
      ARMCI_Error("amat malloc failed",sizeof(double)*nz);
    amat = (double *)amatptrs[0];
    
    if(ARMCI_Malloc(amatptrs,((me==0)?(sizeof(int)*(nz+1)):0)))
      ARMCI_Error("icol malloc failed",sizeof(int)*(nz+1));
    cidx = (int *)amatptrs[0];
    
    ARMCI_Malloc(xvecptrs,((me==0)?(sizeof(int)*(na+1)):0)); /*+1 for end of last row*/
    ridx = (int *)xvecptrs[0];

    ARMCI_Malloc(xvecptrs,((me==0)?(sizeof(double)*(na+1)):0));
    xvec = (double *)xvecptrs[0];

    ARMCI_Malloc(xvecptrs,((me==0)?(sizeof(double)*(na+1)):0));
    bvec = (double *)xvecptrs[0];

    if(me==0){

      for (i = 0; i < na + 1; i++)
        xvec[i] = 0.0;

      nread = fread(amat, sizeof(double), nz, fd);
      nread = fread(ridx, sizeof(int), (na+1), fd);
      ridx[na]=nz;
      nread = fread(cidx, sizeof(int), (nz+1), fd);
      nread = fread(bvec, sizeof(double), (na+1), fd);

      /* the c adjustment */
      for (i = 0; i < na; i++)
        ridx[i] -= 1;
         
      for (i = 0; i < nz; i++)
        cidx[i] -= 1;
    }
   
    MP_BARRIER();
    /*acg_matvecmul(amat,xvec,bvec,ridx,cidx);*/
    if(0){
    for(i=0;i<nz+1;i++)
      printf("\n%d:amat[%d]=%f icol[%d]=%d",me,i,amat[i],i,cidx[i]);
    for(i=0;i<na+1;i++)
      printf("\n%d:irow[%d]=%d bvec[%d]=%f",me,i,ridx[i],i,bvec[i]);
    }
    allfirstrow = (int *)malloc(sizeof(int)*nproc);
    alllastrow = (int *)malloc(sizeof(int)*nproc);
    columnmap = (int *)malloc(sizeof(int)*nproc);
    if(!allfirstrow || !alllastrow || !columnmap)
      ARMCI_Error("malloc failed allfirstrow ",0);
    MP_BARRIER();
    /* 
     * next decide who works on which rows, this will decide the
     * distribution of a,d,r,q,x,and ax
     */
    /*create the mapping for all vectors, row matrix and column matrix*/
    if(me==0){
       idealelementsperproc = nz/nproc;
       tmp1=0;
       for(i=0;i<nproc;i++){
         int elementsperproc=0;
         allfirstrow[i]=tmp1;
         for(ri=tmp1;ri<na;ri++,tmp1++){
           elementsperproc+=(ridx[ri+1]-ridx[ri]);
       if(elementsperproc>=idealelementsperproc){
             if((elementsperproc-idealelementsperproc) > 
                idealelementsperproc-(elementsperproc-(ridx[ri+1]-ridx[ri]))){
               alllastrow[i] = ri-1;  
           if((ri-1)<0)ARMCI_Error("run on a smaller processor count",0);
               /*tmp1--;*/
             }
             else{
               alllastrow[i] = ri;  
               if(ri<0)ARMCI_Error("run on a smaller processor count",0);
               tmp1++;
             }
             elementsperproc=0;
             break;
       }
         }
       }
       alllastrow[nproc-1]=na-1;
       for(i=0;i<nproc;i++)columnmap[i]=ridx[allfirstrow[i]];
    }
    armci_msg_bcast(columnmap,nproc*sizeof(int),0);
    armci_msg_bcast(allfirstrow,nproc*sizeof(int),0);
    armci_msg_bcast(alllastrow,nproc*sizeof(int),0);
    myfirstrow = allfirstrow[me];
    mylastrow = alllastrow[me];
    if(me==0)for(i=0;i<nproc;i++){
      printf("\nDISTRIBUTION:first row of process\t%d is %d last row of process\t%d is %d",i,allfirstrow[i],i,alllastrow[i]);
    }
    /*
    for(i=myfirstrow;i<mylastrow;i++){
            xvec[i]=0.0;
    }
    */
    ARMCI_Malloc(xvecptrs,((me==0)?(sizeof(double)*na):0));
    rvec = (double *)xvecptrs[0];

    ARMCI_Malloc(xvecptrs,((me==0)?(sizeof(double)*na):0));
    dvec = (double *)xvecptrs[0];

    ARMCI_Malloc(xvecptrs,((me==0)?(sizeof(double)*na):0));
    svec = (double *)xvecptrs[0];

    ARMCI_Malloc(xvecptrs,((me==0)?(sizeof(double)*na):0));
    dmvec = (double *)xvecptrs[0];

    ARMCI_Malloc(xvecptrs,((me==0)?(sizeof(double)*na):0));
    qvec = (double *)xvecptrs[0];

    ARMCI_Malloc(xvecptrs,((me==0)?(sizeof(double)*na):0));
    axvec = (double *)xvecptrs[0];

    if(me==0)fclose(fd);
    /*dont forget to free mallocs*/
    free(allfirstrow);
    free(alllastrow);
    free(columnmap);
}
コード例 #23
0
ファイル: ddi_armci.c プロジェクト: ryanolson/ddi
void DDI_ARMCI_Error(char *message, int code) {
  ARMCI_Error(message,code);
}
コード例 #24
0
static int sparse_initialize(int *n, int *non_zero, int **row_ind,
                             int **col_ind, double **values, double **vec,
                             double **svec) {

    int i, j, rc, max, *row_ind_tmp=NULL, *tmp_indices=NULL;
    double *tmp_values=NULL;
    unsigned long len;
    FILE *fp=NULL;

    /* Broadcast order of matrix */
    if(me==0) {
        if((fp=fopen("Sparse-MPI/av41092.rua.data", "r")) == NULL)
            ARMCI_Error("Error: Input file not found", me);
        fortran_indexing = 1; /* This is 1 for Harwell-Boeing format matrices */
        fscanf(fp, "%d", n);
        if(*n%nproc)
            ARMCI_Error("# of rows is not divisible by # of processors", nproc);
        if(*n > ROW)
            ARMCI_Error("order is greater than defined variable ROW", ROW);
    }
    len = sizeof(int);
    armci_msg_brdcst(n, len, 0);

    /* Broad cast number of non_zeros */
    if(me==0) fscanf(fp, "%d", non_zero);
    armci_msg_brdcst(non_zero, len, 0);

    /* Broadcast row indices */
    len = (*n+1)*sizeof(int);
    row_ind_tmp = (int *)malloc(len);
    if(me==0)for(i=0; i<*n+1; i++) {
            fscanf(fp, "%d", &row_ind_tmp[i]);
            if(fortran_indexing) --row_ind_tmp[i];
        }
    armci_msg_brdcst(row_ind_tmp, len, 0);

    load_balance(*n, *non_zero, row_ind_tmp);

    /* find how much temporary storage is needed at the maximum */
    if(me==0) {
        for(max=-1,j=0; j<nproc; j++) if(max<proc_nz_list[j]) max=proc_nz_list[j];
        if(max<0) ARMCI_Error(" max cannot be negative", max);
    }

    /* Broadcast the maximum number of elements */
    len = sizeof(int);
    armci_msg_brdcst(&max, len, 0);

    /* create the Sparse MAtrix Array */
    if(me==0) printf("  Creating ValueArray (CompressedSparseMatrix) ...\n\n");
    create_array((void**)col_ind, sizeof(int), 1, &max);

    /* create the column subscript array */
    if(me==0) printf("  Creating Column Subscript Array ... \n\n");
    create_array((void**)values, sizeof(double), 1, &max);

    /* create the x-vector and the solution vector */
    if(me==0) printf("  Creating Vectors ... \n\n");
    create_array((void**)vec,  sizeof(double),1, &max);
    create_array((void**)svec, sizeof(double),1, &max);
    armci_msg_barrier();


    /* Process 0 distributes the column indices and non_zero values to
       respective processors*/
    if(me == 0) {
        tmp_indices = (int *)malloc(max*sizeof(int));
        tmp_values  = (double *)malloc(max*sizeof(double));

        for(j=0; j<nproc; j++) {
            for(i=0; i<proc_nz_list[j]; i++) {
                fscanf(fp, "%d", &tmp_indices[i]);
                if(fortran_indexing) --tmp_indices[i];
            }
            /* rc = fread(tmp_indices, sizeof(int), proc_nz_list[j], fp); */
            if((rc=ARMCI_Put(tmp_indices, col_ind[j], proc_nz_list[j]*sizeof(int), j)))
                ARMCI_Error("armci_nbput failed\n",rc);
        }
        for(j=0; j<nproc; j++) {
            for(i=0; i<proc_nz_list[j]; i++) fscanf(fp, "%lf", &tmp_values[i]);
            if((rc=ARMCI_Put(tmp_values, values[j], proc_nz_list[j]*sizeof(double), j)))
                ARMCI_Error("armci_nbput failed\n",rc);
        }
    }
    ARMCI_AllFence();
    armci_msg_barrier();
    ARMCI_AllFence();

    /* initializing x-vector */
    if(me==0) for(i=0; i<proc_nz_list[me]; i++) vec[me][i] = (i+1);
    else for(i=0; i<proc_nz_list[me]; i++) vec[me][i]=me*proc_nz_list[me-1]+(i+1);

#if 0
    if(me==0) {
        printf("max = %d\n", max);
        for(i=0; i<max; i++)  printf("%.1f ", values[me][i]);
        printf("\n");
    }
#endif

    *row_ind = row_ind_tmp;
    if(me==0) {
        free(tmp_indices);
        free(tmp_values);
        fclose(fp);
    }
    return 0;
}
コード例 #25
0
ファイル: matmul.c プロジェクト: arnolda/scafacos
int main(int argc, char **argv)
{
	int me,nproc;
    int status;
    int rank;

    /* initialization */
    MPI_Init(&argc, &argv);
    ARMCI_Init();

#ifdef HPC_PROFILING
    HPM_Init();
#endif

    MPI_Comm_rank(MPI_COMM_WORLD,&me);
    MPI_Comm_size(MPI_COMM_WORLD,&nproc);

#ifdef DEBUG
    if(me == 0){
       printf("The result of MPI_Comm_size is %d\n",nproc);
       fflush(stdout);
    }
#endif

    /* get the matrix parameters */
    if (argc > 1){
        rank = atoi(argv[1]);
    } else {
        rank = 8;
    }
    if (me == 0){
        printf("Running matmul.x with rank = %d\n",rank);
        fflush(stdout);
    }

    /* register remote pointers */
    double** addr_A = (double **) ARMCI_Malloc_local(sizeof(double *) * nproc);
    if (addr_A == NULL) ARMCI_Error("malloc A failed at line",0);

    double** addr_B = (double **) ARMCI_Malloc_local(sizeof(double *) * nproc);
    if (addr_B == NULL) ARMCI_Error("malloc B failed at line",0);

    double** addr_C = (double **) ARMCI_Malloc_local(sizeof(double *) * nproc);
    if (addr_C == NULL) ARMCI_Error("malloc C failed at line",0);

#ifdef DEBUG
    if(me == 0) printf("ARMCI_Malloc A requests %lu bytes\n",rank*rank*sizeof(double));
    fflush(stdout);
#endif
    status = ARMCI_Malloc((void **) addr_A, rank*rank*sizeof(double));
    if (status != 0) ARMCI_Error("ARMCI_Malloc A failed",status);

#ifdef DEBUG
    if(me == 0) printf("ARMCI_Malloc B requests %lu bytes\n",rank*rank*sizeof(double));
    fflush(stdout);
#endif
    status = ARMCI_Malloc((void **) addr_B, rank*rank*sizeof(double));
    if (status != 0) ARMCI_Error("ARMCI_Malloc B failed",status);

#ifdef DEBUG
    if(me == 0) printf("ARMCI_Malloc C requests %lu bytes\n",rank*rank*sizeof(double));
    fflush(stdout);
#endif
    status = ARMCI_Malloc((void **) addr_C, rank*rank*sizeof(double));
    if (status != 0) ARMCI_Error("ARMCI_Malloc C failed",status);

    MPI_Barrier(MPI_COMM_WORLD);

    /* free ARMCI pointers */
    ARMCI_Free_local(addr_C);
    ARMCI_Free_local(addr_B);
    ARMCI_Free_local(addr_A);

#ifdef HPC_PROFILING
    HPM_Print();
#endif

    /* the end */
    ARMCI_Finalize();
    MPI_Finalize();

    return(0);
}
コード例 #26
0
int main(int argc, char *argv[]) {

   int i, j, rank, nranks;
   int xdim, ydim;
   long bufsize;
   double **buffer;
   double t_start=0.0, t_stop=0.0;
   int count[2], src_stride, trg_stride, stride_level, peer;
   double expected, actual;
   int provided;

   MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    if (nranks < 2) {
        printf("%s: Must be run with at least 2 processes\n", argv[0]);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

   ARMCI_Init_args(&argc, &argv);
   
   bufsize = MAX_XDIM * MAX_YDIM * sizeof(double);
   buffer = (double **) malloc(sizeof(double *) * nranks);
   ARMCI_Malloc((void **) buffer, bufsize);

   for(i=0; i< bufsize/sizeof(double); i++) {
       *(buffer[rank] + i) = 1.0 + rank;
   }

   if(rank == 0) {
     printf("ARMCI_PutS Latency - local and remote completions - in usec \n");
     printf("%30s %22s %22s\n", "Dimensions(array of doubles)", "Latency-LocalCompeltion", "Latency-RemoteCompletion");
     fflush(stdout);
   }

   src_stride = MAX_YDIM*sizeof(double);
   trg_stride = MAX_YDIM*sizeof(double);
   stride_level = 1;

   ARMCI_Barrier();

   for(xdim=1; xdim<=MAX_XDIM; xdim*=2) {

      count[1] = xdim;

      for(ydim=1; ydim<=MAX_YDIM; ydim*=2) {

        count[0] = ydim*sizeof(double); 
      
        if(rank == 0) 
        {
          peer = 1;          
 
          for(i=0; i<ITERATIONS+SKIP; i++) { 

             if(i == SKIP)
                 t_start = MPI_Wtime();

             ARMCI_PutS((void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, peer); 
 
          }
          t_stop = MPI_Wtime();
          ARMCI_Fence(peer);
          char temp[10]; 
          sprintf(temp,"%dX%d", xdim, ydim);
          printf("%30s %20.2f", temp, ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          ARMCI_Barrier();

          ARMCI_Barrier();

          for(i=0; i<ITERATIONS+SKIP; i++) {
  
             if(i == SKIP)
                t_start = MPI_Wtime();

             ARMCI_PutS((void *) buffer[rank], &src_stride, (void *) buffer[peer], &trg_stride, count, stride_level, peer); 
             ARMCI_Fence(peer);

          }
          t_stop = MPI_Wtime();
          printf("%20.2f \n", ((t_stop-t_start)*1000000)/ITERATIONS);
          fflush(stdout);

          ARMCI_Barrier();

          ARMCI_Barrier();
        }
        else
        {
            peer = 0;

            expected = (1.0 + (double) peer);

            ARMCI_Barrier();
            if (rank == 1)
            {
              for(i=0; i<xdim; i++)
              {
                for(j=0; j<ydim; j++)
                {
                  actual = *(buffer[rank] + i*MAX_YDIM + j);
                  if(actual != expected)
                  {
                    printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                        i, j, expected, actual);
                    fflush(stdout);
                    ARMCI_Error("Bailing out", 1);
                  }
                }
              }
            }
            for(i=0; i< bufsize/sizeof(double); i++) {
              *(buffer[rank] + i) = 1.0 + rank;
            }

            ARMCI_Barrier();

            ARMCI_Barrier();
            if (rank == 1)
            {
              for(i=0; i<xdim; i++)
              {
                for(j=0; j<ydim; j++)
                {
                  actual = *(buffer[rank] + i*MAX_YDIM + j);
                  if(actual != expected)
                  {
                    printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                        i, j, expected, actual);
                    fflush(stdout);
                    ARMCI_Error("Bailing out", 1);
                  }
                }
              }

              for(i=0; i< bufsize/sizeof(double); i++) {
                *(buffer[rank] + i) = 1.0 + rank;
              }
            }
            ARMCI_Barrier();

        }
        
      }

   }

   ARMCI_Barrier();

   ARMCI_Free((void *) buffer[rank]);
   free(buffer);

   ARMCI_Finalize();

   MPI_Finalize();

   return 0;
}
コード例 #27
0
ファイル: test2.c プロジェクト: arnolda/scafacos
void test_gop2_or_reduce(const int datatype, char * op, const int reduce_test)
{
    void *a[6];
    int len[6] = {1, 10, 100, 1000, 10000, 100000};
    int len_length = 3;
    int datatype_size = 0;
    int i, j;
    char * test_type;
    int verbose = 0;
    if(reduce_test == 0)
       test_type = "gop2";
    else
       test_type = "reduce";

    switch(datatype)
    {
       case ARMCI_INT:
          datatype_size = sizeof(int);
          for(i = 0; i < len_length; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < len_length; i++)
             for(j = 0; j < len[i]; j++)
                ((int *) a[i])[j] = (int) (me + j) * (((me + j) % 2 == 0) ? 1 : -1);
          for(i = 0; i < len_length; i++)
          {
             if(me == 0 && verbose != 0)
                printf("testing %s %s message size = %d op = %s\n", test_type, "ARMCI_INT", len[i], op);
             if(reduce_test == 0)
                armci_msg_igop(a[i], len[i], op);
             else
                armci_msg_reduce(a[i], len[i], op, datatype);
          }
          if(me == 0 || reduce_test == 0)
             for(i = 0; i < len_length; i++)
             {
                if(me == 0 && verbose != 0)
                   printf("checking %s %s message size = %d op = %s\n", test_type, "ARMCI_INT", len[i], op);
                for(j = 0; j < len[i]; j++)
                   if(strncmp(op, "+", 1) == 0)
                   {
                      int compare = 0;
                      if(nproc % 2 == 0)
                      {
                         if(j % 2 == 0)
                            compare = -nproc / 2;
                         else
                            compare = nproc / 2;
                      }
                      else
                      {
                         if(j % 2 == 0)
                            compare = j + nproc / 2;
                         else
                            compare = -(j + nproc / 2);
                      }
                      if(((int *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %d != %d\n", test_type, "ARMCI_INT", op, i, j, ((int *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "*", 1) == 0)
                   {
                      int compare = 1;
                      int k = 0;
                      for(k = 0; k < nproc; k++)
                         compare *= (k + j) * (((k + j) % 2 == 0) ? 1 : -1);
                      if(((int *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %d != %d\n", test_type, "ARMCI_INT", op, i, j, ((int *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "min", 3) == 0)
                   {
                      int compare = -(j + nproc - 1);
                      if(compare % 2 == 0 && nproc > 1)
                         compare = -(j + nproc - 2);
                      if(((int *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %d != %d\n", test_type, "ARMCI_INT", op, i, j, ((int *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "max", 3) == 0)
                   {
                      int compare = j + nproc - 1;
                      if(compare % 2 != 0 && nproc > 1)
                         compare = j + nproc - 2;
                      if(((int *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %d != %d\n", test_type, "ARMCI_INT", op, i, j, ((int *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "absmax", 6) == 0)
                   {
                      int compare = j + nproc - 1;
                      if(((int *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %d != %d\n", test_type, "ARMCI_INT", op, i, j, ((int *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "absmin", 6) == 0)
                   {
                      int compare = j;
                      if(((int *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %d != %d\n", test_type, "ARMCI_INT", op, i, j, ((int *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "or", 2) == 0)
                   {
                   }
             }
          break;
       case ARMCI_LONG:
          datatype_size = sizeof(long);
          for(i = 0; i < len_length; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < len_length; i++)
             for(j = 0; j < len[i]; j++)
                ((long *) a[i])[j] = (long) (me + j) * (((me + j) % 2 == 0) ? 1 : -1);
          for(i = 0; i < len_length; i++)
          {
             if(me == 0 && verbose != 0)
                printf("testing %s %s message size = %d op = %s\n", test_type, "ARMCI_LONG", len[i], op);
             if(reduce_test == 0)
                armci_msg_lgop(a[i], len[i], op);
             else
                armci_msg_reduce(a[i], len[i], op, datatype);
          }
          if(me == 0 || reduce_test == 0)
             for(i = 0; i < len_length; i++)
             {
                if(me == 0 && verbose != 0)
                   printf("checking %s %s message size = %d op = %s\n", test_type, "ARMCI_LONG", len[i], op);
                for(j = 0; j < len[i]; j++)
                   if(strncmp(op, "+", 1) == 0)
                   {
                      int compare = 0;
                      if(nproc % 2 == 0)
                      {
                         if(j % 2 == 0)
                            compare = -nproc / 2;
                         else
                            compare = nproc / 2;
                      }
                      else
                      {
                         if(j % 2 == 0)
                            compare = j + nproc / 2;
                         else
                            compare = -(j + nproc / 2);
                      }
                      if(((long *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %ld != %d\n", test_type, "ARMCI_LONG", op, i, j, ((long *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "*", 1) == 0)
                   {
                      int compare = 1;
                      int k = 0;
                      for(k = 0; k < nproc; k++)
                         compare *= (k + j) * (((k + j) % 2 == 0) ? 1 : -1);
                      if(((long *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %ld != %d\n", test_type, "ARMCI_LONG", op, i, j, ((long *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "min", 3) == 0)
                   {
                      int compare = -(j + nproc - 1);
                      if(compare % 2 == 0 && nproc > 1)
                         compare = -(j + nproc - 2);
                      if(((long *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %ld != %d\n", test_type, "ARMCI_LONG", op, i, j, ((long *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "max", 3) == 0)
                   {
                      int compare = j + nproc - 1;
                      if(compare % 2 != 0 && nproc > 1)
                         compare = j + nproc - 2;
                      if(((long *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %ld != %d\n", test_type, "ARMCI_LONG", op, i, j, ((long *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "absmax", 6) == 0)
                   {
                      int compare = j + nproc - 1;
                      if(((long *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %ld != %d\n", test_type, "ARMCI_LONG", op, i, j, ((long *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "absmin", 6) == 0)
                   {
                      int compare = j;
                      if(((long *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %ld != %d\n", test_type, "ARMCI_LONG", op, i, j, ((long *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "or", 2) == 0)
                   {
                   }
             }
          break;
       case ARMCI_FLOAT:
          datatype_size = sizeof(float);
          for(i = 0; i < len_length; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < len_length; i++)
             for(j = 0; j < len[i]; j++)
                ((float *) a[i])[j] = (float) (me + j) * (((me + j) % 2 == 0) ? 1.0 / nproc : -1.0 / nproc);
          for(i = 0; i < len_length; i++)
          {
             if(me == 0 && verbose != 0)
                printf("testing %s ARMCI_FLOAT message size = %d op = %s\n", test_type, len[i], op);
             if(reduce_test == 0)
                armci_msg_fgop(a[i], len[i], op);
             else
                armci_msg_reduce(a[i], len[i], op, datatype);
          }
          if(me == 0 || reduce_test == 0)
             for(i = 0; i < len_length; i++)
             {
                if(me == 0 && verbose != 0)
                   printf("checking %s ARMCI_FLOAT message size = %d op = %s\n", test_type, len[i], op);
                for(j = 0; j < len[i]; j++)
                   if(strncmp(op, "+", 1) == 0)
                   {
                      float compare = 0.0;
                      if(nproc % 2 == 0)
                      {
                         if(j % 2 == 0)
                            compare = -(((int)nproc / 2) / (float) nproc);
                         else
                            compare = ((int)nproc / 2) / (float) nproc;
                      }
                      else
                      {
                         if(j % 2 == 0)
                            compare = ((int) j + nproc / 2) / (float) nproc;
                         else
                            compare = -(((int) j + nproc / 2) / (float) nproc);
                      }
                      if(ARMCI_ABS(((float *) a[i])[j] - compare) > ARMCI_ABS(compare) * FLOAT_EPS)
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_FLOAT", op, i, j, ((float *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "*", 1) == 0)
                   {
                      float compare = 1.0;
                      int k = 0;
                      for(k = 0; k < nproc; k++)
                         compare *= ((float) k + j) / (float) nproc;
                      if((nproc / 2) % 2 != 0)
                         if(nproc % 2 != 0)
                            if(j % 2 == 0)
                               compare *= -1.0;

                      if(ARMCI_ABS(((float *) a[i])[j] - compare) > ARMCI_ABS(compare) * FLOAT_EPS) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_FLOAT", op, i, j, ((float *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "min", 3) == 0)
                   {
                      float compare = -((float) j + nproc - 1) / nproc;
                      if((j + nproc - 1)% 2 == 0 && nproc > 1)
                         compare = -((float) j + nproc - 2) / nproc;
                      if(((float *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_FLOAT", op, i, j, ((float *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "max", 3) == 0)
                   {
                      float compare = ((float) j + nproc - 1) / nproc;
                      if((j + nproc - 1) % 2 != 0 && nproc > 1)
                         compare = ((float) j + nproc - 2) / nproc;
                      if(((float *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_FLOAT", op, i, j, ((float *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "absmax", 6) == 0)
                   {
                      float compare = ((float) j + nproc - 1) / nproc;
                      if(((float *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_FLOAT", op, i, j, ((float *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "absmin", 6) == 0)
                   {
                      float compare = (float) j / nproc;
                      if(((float *) a[i])[j] != compare) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_FLOAT", op, i, j, ((float *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
             }
          break;
       case ARMCI_DOUBLE:
          datatype_size = sizeof(double);
          for(i = 0; i < len_length; i++)
             a[i] = malloc(len[i] * datatype_size);
          for(i = 0; i < len_length; i++)
             for(j = 0; j < len[i]; j++)
                ((double *) a[i])[j] = (double) (me + j) * (((me + j) % 2 == 0) ? 1.0 / nproc : -1.0 / nproc);
          for(i = 0; i < len_length; i++)
          {
             if(me == 0 && verbose != 0)
                printf("testing %s ARMCI_DOUBLE message size = %d op = %s\n", test_type, len[i], op);
             if(reduce_test == 0)
                armci_msg_dgop(a[i], len[i], op);
             else
                armci_msg_reduce(a[i], len[i], op, datatype);
          }
          if(me == 0 || reduce_test == 0)
             for(i = 0; i < len_length; i++)
             {
                if(me == 0 && verbose != 0)
                   printf("checking %s ARMCI_DOUBLE message size = %d op = %s\n", test_type, len[i], op);
                for(j = 0; j < len[i]; j++)
                   if(strncmp(op, "+", 1) == 0)
                   {
                      double compare = 0.0;
                      if(nproc % 2 == 0)
                      {
                         if(j % 2 == 0)
                            compare = -(((int)nproc / 2) / (double) nproc);
                         else
                            compare = ((int)nproc / 2) / (double) nproc;
                      }
                      else
                      {
                         if(j % 2 == 0)
                            compare = ((int) j + nproc / 2) / (double) nproc;
                         else
                            compare = -(((int) j + nproc / 2) / (double) nproc);
                      }
                      if(ARMCI_ABS(((double *) a[i])[j] - compare) > ARMCI_ABS(compare) * DOUBLE_EPS) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_DOUBLE", op, i, j, ((double *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "*", 1) == 0)
                   {
                      double compare = 1.0;
                      int k = 0;
                      for(k = 0; k < nproc; k++)
                         compare *= ((float) k + j) / (float) nproc;
                      if((nproc / 2) % 2 != 0)
                         if(nproc % 2 != 0)
                            if(j % 2 == 0)
                               compare *= -1.0;
                      if(ARMCI_ABS(((double *) a[i])[j] - compare) > ARMCI_ABS(compare) * DOUBLE_EPS) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_DOUBLE", op, i, j, ((double *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "min", 3) == 0)
                   {
                      double compare = -((double) j + nproc - 1) / nproc;
                      if((j + nproc - 1)% 2 == 0 && nproc > 1)
                         compare = -((double) j + nproc - 2) / nproc;
                      if(ARMCI_ABS(((double *) a[i])[j] - compare) > ARMCI_ABS(compare) * DOUBLE_EPS) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_DOUBLE", op, i, j, ((double *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "max", 3) == 0)
                   {
                      double compare = ((double) j + nproc - 1) / nproc;
                      if((j + nproc - 1) % 2 != 0 && nproc > 1)
                         compare = ((double) j + nproc - 2) / nproc;
                      if(ARMCI_ABS(((double *) a[i])[j] - compare) > ARMCI_ABS(compare) * DOUBLE_EPS) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_DOUBLE", op, i, j, ((double *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "absmax", 6) == 0)
                   {
                      double compare = ((double) j + nproc - 1) / nproc;
                      if(ARMCI_ABS(((double *) a[i])[j] - compare) > ARMCI_ABS(compare) * DOUBLE_EPS) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_DOUBLE", op, i, j, ((double *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
                   else if(strncmp(op, "absmin", 6) == 0)
                   {
                      double compare = (double) j / nproc;
                      if(ARMCI_ABS(((double *) a[i])[j] - compare) > ARMCI_ABS(compare) * DOUBLE_EPS) 
                      {
                         printf("ERROR %s %s %s a[%d][%d] = %f != %f\n", test_type, "ARMCI_DOUBLE", op, i, j, ((double *) a[i])[j], compare);
                         ARMCI_Error("test_gop2_or_reduce failed\n",0);
                      }
                   }
             }
          break;
       default:
          break;
    }
    for(i = 0; i < len_length; i++)
       free(a[i]);
}
コード例 #28
0
int main(int argc, char **argv)
{

    int i, j, rank, nranks, peer;
    size_t xdim, ydim;
    unsigned long bufsize;
    double **buffer, *src_buf;
    double t_start=0.0, t_stop;
    int count[2], src_stride, trg_stride, stride_level;
    double scaling;
    int provided;

    MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    if (nranks < 2) {
        printf("%s: Must be run with at least 2 processes\n", argv[0]);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    ARMCI_Init_args(&argc, &argv);

    buffer = (double **) malloc(sizeof(double *) * nranks);

    bufsize = MAX_XDIM * MAX_YDIM * sizeof(double);
    ARMCI_Malloc((void **) buffer, bufsize);
    src_buf = ARMCI_Malloc_local(bufsize);

    if (rank == 0)
    {
        printf("ARMCI_AccS Latency - local and remote completions - in usec \n");
        printf("%30s %22s %22s\n",
               "Dimensions(array of double)",
               "Local Completion",
               "Remote completion");
        fflush(stdout);
    }

    ARMCI_Access_begin(buffer[rank]);
    for (i = 0; i < bufsize / sizeof(double); i++)
    {
      *(buffer[rank] + i) = 1.0 + rank;
      *(src_buf + i) = 1.0 + rank;
    }
    ARMCI_Access_end(buffer[rank]);

    scaling = 2.0;

    src_stride = MAX_YDIM * sizeof(double);
    trg_stride = MAX_YDIM * sizeof(double);
    stride_level = 1;

    ARMCI_Barrier();

    for (xdim = 1; xdim <= MAX_XDIM; xdim *= 2)
    {

        count[1] = xdim;

        for (ydim = 1; ydim <= MAX_YDIM; ydim *= 2)
        {

            count[0] = ydim * sizeof(double);

            if (rank == 0)
            {

                peer = 1;

                for (i = 0; i < ITERATIONS + SKIP; i++)
                {

                    if (i == SKIP) t_start = MPI_Wtime();

                    ARMCI_AccS(ARMCI_ACC_DBL,
                               (void *) &scaling,
                               /* (void *) buffer[rank] */ src_buf,
                               &src_stride,
                               (void *) buffer[peer],
                               &trg_stride,
                               count,
                               stride_level,
                               1);

                }
                t_stop = MPI_Wtime();
                ARMCI_Fence(1);

                char temp[10];
                sprintf(temp, "%dX%d", (int) xdim, (int) ydim);
                printf("%30s %20.2f ", temp, ((t_stop - t_start) * 1000000)
                        / ITERATIONS);
                fflush(stdout);

                ARMCI_Barrier();

                ARMCI_Barrier();

                for (i = 0; i < ITERATIONS + SKIP; i++)
                {

                    if (i == SKIP) t_start = MPI_Wtime();

                    ARMCI_AccS(ARMCI_ACC_DBL,
                               (void *) &scaling,
                               /* (void *) buffer[rank] */ src_buf,
                               &src_stride,
                               (void *) buffer[peer],
                               &trg_stride,
                               count,
                               stride_level,
                               1);
                    ARMCI_Fence(1);

                }
                t_stop = MPI_Wtime();
                printf("%20.2f \n", ((t_stop - t_start) * 1000000) / ITERATIONS);
                fflush(stdout);

                ARMCI_Barrier();

                ARMCI_Barrier();

            }
            else
            {

                peer = 0;

                ARMCI_Barrier();

                if (rank == 1) 
                {
                  ARMCI_Access_begin(buffer[rank]);
                  for (i = 0; i < xdim; i++)
                  {
                    for (j = 0; j < ydim; j++)
                    {
                      if (*(buffer[rank] + i * MAX_XDIM + j) != ((1.0 + rank)
                            + scaling * (1.0 + peer) * (ITERATIONS + SKIP)))
                      {
                        printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                            i,
                            j,
                            ((1.0 + rank) + scaling * (1.0 + peer)),
                            *(buffer[rank] + i * MAX_YDIM + j));
                        fflush(stdout);
                        ARMCI_Error("Bailing out", 1);
                      }
                    }
                  }

                  for (i = 0; i < bufsize / sizeof(double); i++)
                  {
                    *(buffer[rank] + i) = 1.0 + rank;
                  }
                  ARMCI_Access_end(buffer[rank]);
                }

                ARMCI_Barrier();

                ARMCI_Barrier();

                if (rank == 1) 
                {
                  ARMCI_Access_begin(buffer[rank]);

                  for (i = 0; i < xdim; i++)
                  {
                    for (j = 0; j < ydim; j++)
                    {
                      if (*(buffer[rank] + i * MAX_XDIM + j) != ((1.0 + rank)
                            + scaling * (1.0 + peer) * (ITERATIONS + SKIP)))
                      {
                        printf("Data validation failed at X: %d Y: %d Expected : %f Actual : %f \n",
                            i,
                            j,
                            ((1.0 + rank) + scaling * (1.0 + peer)),
                            *(buffer[rank] + i * MAX_YDIM + j));
                        fflush(stdout);
                        ARMCI_Error("Bailing out", 1);
                      }
                    }
                  }

                  for (i = 0; i < bufsize / sizeof(double); i++)
                  {
                    *(buffer[rank] + i) = 1.0 + rank;
                  }

                  ARMCI_Access_end(buffer[rank]);
                }
                ARMCI_Barrier();

            }

        }

    }

    ARMCI_Barrier();

    ARMCI_Free((void *) buffer[rank]);
    ARMCI_Free_local(src_buf);
    free(buffer);

    ARMCI_Finalize();

    MPI_Finalize();

    return 0;
}
コード例 #29
0
ファイル: test2.c プロジェクト: arnolda/scafacos
int main(int argc, char* argv[])
{
    int i;
    struct timeval start_time[14];
    struct timeval stop_time[14];
    /*
      char * test_name[14] = {
      "dim", "nbdim", "vec_small", "acc",
      "vector", "vector_acc", "fetch_add",
      "swap", "rput", "aggregate", "implicit",
      "memlock", "acc_type", "collective"
      };
      int test_flags[14] = {
      1, 1, 1, 1,
      1, 1, 1,
      1, 1, 0, 1,
      1, 1, 1
      };
    */
    char * test_name[2] = { "acc_type", "collective" };
    int test_flags[2]   = { 1, 1 };

#define TEST_ACC_TYPE   0
#define TEST_COLLECTIVE 1

    MP_INIT(argc, argv);
    ARMCI_Init();
    MP_PROCS(&nproc);
    MP_MYID(&me);

    if(nproc > MAXPROC && me == 0)
       ARMCI_Error("Test works for up to %d processors\n",MAXPROC);

    if(me == 0)
    {
       printf("ARMCI test program (%d processes)\n",nproc); 
       fflush(stdout);
       sleep(1);
    }    

    gettimeofday(&start_time[TEST_ACC_TYPE],NULL);
    if(test_flags[TEST_ACC_TYPE] == 1)
    {
       if(me == 0)
       {
          printf("\nTesting Accumulate Types\n");
          fflush(stdout);
       }
       
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_INT\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_INT);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_LNG\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_LNG);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_FLT\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_FLT);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_DBL\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_DBL);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_CPL\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_CPL);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_DCP\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_DCP);
       ARMCI_AllFence();
       MP_BARRIER();
    }
    gettimeofday(&stop_time[TEST_ACC_TYPE],NULL);

    gettimeofday(&start_time[TEST_COLLECTIVE],NULL);
    if(test_flags[TEST_COLLECTIVE] == 1)
    {
       if(me == 0)
       {
          printf("\nTesting Collective Types\n");
          fflush(stdout);
       }
       if(me == 0)
       {
          printf("Test Collective ARMCI_INT\n");
          fflush(stdout);
       }
       MP_BARRIER();
       test_collective(ARMCI_INT);
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Collective ARMCI_LONG\n");
          fflush(stdout);
       }
       MP_BARRIER();
       test_collective(ARMCI_LONG);
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Collective ARMCI_FLOAT\n");
          fflush(stdout);
       }
       MP_BARRIER();
       test_collective(ARMCI_FLOAT);
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Collective ARMCI_DOUBLE\n");
          fflush(stdout);
       }
       MP_BARRIER();
       test_collective(ARMCI_DOUBLE);
       MP_BARRIER();
    }
    gettimeofday(&stop_time[TEST_COLLECTIVE],NULL);
    
    if(me == 0)
    {
       printf("Accumulate and Collective tests passed\n");
       fflush(stdout);
    }

    if(me == 0)
    {
       printf("Testcase runtime\n");
       printf("Name,Time(seconds)\n");
       for(i = 0; i < 2; i++)
          if(test_flags[i] == 1)
          {
             double time_spent = (stop_time[i].tv_sec - start_time[i].tv_sec) + ((double) stop_time[i].tv_usec - start_time[i].tv_usec) / 1E6;
             printf("%s,%.6f\n", test_name[i], time_spent);
          }
    }

    MP_BARRIER();
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}
コード例 #30
0
ファイル: perf2.c プロジェクト: jeffhammond/ga
static void contig_test(size_t buffer_size, int op)
{
    void **dst_ptr;
    void **put_buf;
    void **get_buf;
    double *times;

    dst_ptr = (void*)malloc(nproc * sizeof(void*));
    put_buf = (void*)malloc(nproc * sizeof(void*));
    get_buf = (void*)malloc(nproc * sizeof(void*));
    times = (double*)malloc(nproc * sizeof(double));
    ARMCI_Malloc(dst_ptr, buffer_size);
    ARMCI_Malloc(put_buf, buffer_size);
    ARMCI_Malloc(get_buf, buffer_size);

    /* initialize what we're putting */
    fill_array((double*)put_buf[me], buffer_size/sizeof(double), me);

    size_t msg_size;

    int dst = 1;
    double scale = 1.0;
    for (msg_size = 16; msg_size <= buffer_size; msg_size *= 2) {

        int j;
        int iter = msg_size > MEDIUM_MESSAGE_SIZE ? ITER_LARGE : ITER_SMALL;

        double t_start, t_end;
        if (0 == me) {
            for (j= 0; j < iter + WARMUP; ++j) {

                if (WARMUP == j) {
                    t_start = dclock();
                }

                switch (op) {
                    case PUT:
                        ARMCI_Put(put_buf[me], dst_ptr[dst], msg_size,
                                dst);
                        break;
                    case GET:
                        ARMCI_Get(dst_ptr[dst], get_buf[me], msg_size,
                                dst);
                        break;
                    case ACC:
                        ARMCI_Acc(ARMCI_ACC_DBL, &scale, 
                                put_buf[me], dst_ptr[dst], msg_size,
                                dst);
                        break;
                    default:
                        ARMCI_Error("oops", 1);
                }

            }
        }
        /* calculate total time and average time */
        t_end = dclock();
        ARMCI_Barrier();


        if (0 == me) {
            printf("%8zu\t\t%6.2f\t\t%10.2f\n",
                    msg_size,
                    ((t_end  - t_start))/iter,
                    msg_size*iter/((t_end - t_start)));
        }
    }
    ARMCI_Free(dst_ptr[me]);
    ARMCI_Free(put_buf[me]);
    ARMCI_Free(get_buf[me]);
    free(dst_ptr);
    free(put_buf);
    free(get_buf);
    free(times);
}