Ejemplo n.º 1
0
int main( int argc, char **argv)
{   
    
  MP_INIT(argc,argv);
  MP_MYID(&me);
  MP_PROCS(&nproc);
    
    if(nproc < 2) {
        if(me == 0)
            fprintf(stderr,
                    "USAGE: 2 <= processes < %d\n", nproc);
        MP_BARRIER();
        MP_FINALIZE();
        exit(0);
    }

    if(me == 0){
       printf("Test of ARMCI Wrappers to Basic Message Passing Operations\n");
       fflush(stdout);
    }
   
    /* initialize ARMCI */
    ARMCI_Init();

    MP_BARRIER();
   
    TestGlobals();
   
    /* done */
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}   
Ejemplo n.º 2
0
int main(int argc, char **argv)
{

    ARMCI_NetInit();
  MP_INIT(argc,argv);
  MP_MYID(&me);
  MP_PROCS(&nproc);

    if(nproc < 2 || nproc> MAXPROC) {
        if(me == 0)
            fprintf(stderr,
                    "USAGE: 2 <= processes < %d - got %d\n", MAXPROC, nproc);
        MP_BARRIER();
        MP_FINALIZE();
        exit(0);
    }
    
    /* initialize ARMCI */
    ARMCI_Init();

    if(!me)printf("\n             Performance of Basic Blocking Communication Operations\n");
    MP_BARRIER();
    
    CHECK_RESULT=1;
    if(!me)printf("\n\t\t\tContiguous Data Transfer\n");
    test_1D();
    CHECK_RESULT=0;

    /* test 1 dimension array */
    if(!me)printf("\n\t\t\tContiguous Data Transfer\n");
    test_1D();
    
    /* test 2 dimension array */
    if(!me)printf("\n\t\t\tStrided Data Transfer\n");
    test_2D();

    MP_BARRIER();
    if(me == 0){
       if(warn_accuracy) 
          printf("\nWARNING: Your timer does not have sufficient accuracy for this test (%d)\n",warn_accuracy);
       printf("\n\n------------ Testing the same data transfer for correctness ----------\n");
       fflush(stdout);
    }

    MP_BARRIER();
    CHECK_RESULT=1;
    if(!me)printf("\n\t\t\tContiguous Data Transfer\n");
    test_1D();
    if(me == 0) printf("OK\n");
    MP_BARRIER();
    if(!me)printf("\n\t\t\tStrided Data Transfer\n");
    test_2D();
    if(me == 0) printf("OK\n\n\nTests Completed.\n");
    MP_BARRIER();

    /* done */
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}    
Ejemplo n.º 3
0
int main(int argc, char* argv[])
{
    int ndim;

    MP_INIT(argc, argv);
    MP_PROCS(&nproc);
    MP_MYID(&me);

    if(me==0){
       printf("ARMCI test program for lock(%d processes)\n",nproc); 
       fflush(stdout);
       sleep(1);
    }
    
    ARMCI_Init();

    test_lock();

    MP_BARRIER();
    if(me==0){printf("test passed\n"); fflush(stdout);}
    sleep(2);

    MP_BARRIER();
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}
Ejemplo n.º 4
0
void test_one_group(ARMCI_Group *group, int *pid_list) {
  int grp_me, grp_size;
  int i,j,src_proc,dst_proc;
  double *ddst_put[MAXPROC];
  double dsrc[ELEMS];
  int elems[2] = {MAXPROC,ELEMS};
  int value = -1, bytes, world_me;
  
  MP_MYID(&world_me);
  ARMCI_Group_rank(group, &grp_me);
  ARMCI_Group_size(group, &grp_size);
  if(grp_me==0) printf("GROUP SIZE = %d\n", grp_size);
  printf("%d:group rank = %d\n", me, grp_me);

  src_proc = 0; dst_proc = grp_size-1;
       
  bytes = ELEMS*sizeof(double);       
  ARMCI_Malloc_group((void **)ddst_put, bytes, group);
       
  for(i=0; i<ELEMS; i++) dsrc[i]=i*1.001*(grp_me+1); 
  for(i=0; i<ELEMS; i++) ddst_put[grp_me][i]=-1.0;
       
  armci_msg_group_barrier(group);
       
  if(grp_me==src_proc) {
    /* NOTE: make sure to specify absolute ids in ARMCI calls */
    ARMCI_Put(dsrc, &ddst_put[dst_proc][0], bytes,
	      ARMCI_Absolute_id(group,dst_proc));
  }
       
  armci_msg_group_barrier(group);
  /* NOTE: make sure to specify absolute ids in ARMCI calls */
  ARMCI_Fence(ARMCI_Absolute_id(group,dst_proc));
  sleep(1);
       
       
  /* Verify*/
  if(grp_me==dst_proc) {
    for(j=0; j<ELEMS; j++) {
      if(ARMCI_ABS(ddst_put[grp_me][j]-j*1.001*(src_proc+1)) > 0.1) {
	printf("\t%d: ddst_put[%d][%d] = %lf and expected value is %lf\n",
	       me, grp_me, j, ddst_put[grp_me][j], j*1.001*(src_proc+1));
	ARMCI_Error("groups: armci put failed...1", 0);
      }
    }
    printf("\n%d(%d): Test O.K. Verified\n", dst_proc, world_me);
  }
  armci_msg_group_barrier(group);
  ARMCI_Free_group(ddst_put[grp_me], group);
}
Ejemplo n.º 5
0
void test_groups_noncollective() {
  int *pid_lists[MAX_GROUPS];
  int pids[MAXPROC];
  int i, nprocs, world_me;
  ARMCI_Group group;
  int *my_pid_list=NULL, my_grp_size=0;
  int ngrps;

  MP_BARRIER();
  MP_PROCS(&nprocs);
  MP_MYID(&world_me);

  random_permute(pids, nproc);

  ngrps = nprocs/GROUP_SIZE;
  
  for(i=0; i<nprocs/GROUP_SIZE; i++) {
    pid_lists[i] = pids + (i*GROUP_SIZE);
  }

  for(i=0; i<nprocs; i++) {
    if(pids[i] == world_me) {
      int grp_id = ARMCI_MIN(i/GROUP_SIZE, ngrps-1);
      my_pid_list = pid_lists[grp_id];
      if(grp_id == ngrps-1)
	my_grp_size =  GROUP_SIZE + (nprocs%GROUP_SIZE);
      else
	my_grp_size = GROUP_SIZE;
    }
  }

  qsort(my_pid_list, my_grp_size, sizeof(int), int_compare);
  
  MP_BARRIER();
  /*now create all these disjoint groups and test them in parallel*/
  
  ARMCI_Group_create(my_grp_size, my_pid_list, &group);

  test_one_group(&group, my_pid_list);

  ARMCI_Group_free(&group);

  ARMCI_AllFence();
  MP_BARRIER();
  
  if(world_me==0){printf("O.K.\n"); fflush(stdout);}
}
Ejemplo n.º 6
0
int main(int argc, char* argv[])
{

    MP_INIT(argc, argv);
    MP_PROCS(&nproc);
    MP_MYID(&me);

/*    printf("nproc = %d, me = %d\n", nproc, me);*/
    
    if( (nproc<MINPROC || nproc>MAXPROC) && me==0)
       ARMCI_Error("Test works for up to %d processors\n",MAXPROC);

    if(me==0){
       printf("ARMCI test program (%d processes)\n",nproc); 
       fflush(stdout);
       sleep(1);
    }
    
    ARMCI_Init();

    if(me==0){
      printf("\n Testing ARMCI Groups!\n\n");
      fflush(stdout);
    }

    test_groups();
    
    ARMCI_AllFence();
    MP_BARRIER();
    if(me==0){printf("\n Collective groups: Success!!\n"); fflush(stdout);}
    sleep(2);

#ifdef ARMCI_GROUP
    test_groups_noncollective();

    ARMCI_AllFence();
    MP_BARRIER();
    if(me==0){printf("\n Non-collective groups: Success!!\n"); fflush(stdout);}
    sleep(2);
#endif
	
    MP_BARRIER();
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}
Ejemplo n.º 7
0
int main(int argc, char* argv[])
{
ARMCI_NetInit();

    MP_INIT(argc, argv);
    MP_PROCS(&nproc);
    MP_MYID(&me);

    if(nproc < 2 || nproc> MAXPROC) {
      if(me == 0)
	fprintf(stderr,
		"USAGE: 2 <= processes < %d - got %d\n", MAXPROC, nproc);
      MP_BARRIER();
      MP_FINALIZE();
      exit(0);
    }

    if(me==0){
       printf("ARMCI test program (%d processes)\n",nproc); 
       fflush(stdout);
       sleep(1);
    }
    
    ARMCI_Init();

    if(me==0){
      printf("\n put/get/acc requests (Time in secs)\n\n");
      fflush(stdout);
    }

    test_perf_nb(1);
    test_perf_nb(0);
    
    ARMCI_AllFence();
    MP_BARRIER();
    if(me==0){printf("\nSuccess!!\n"); fflush(stdout);}
    sleep(2);
	
    MP_BARRIER();
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}
Ejemplo n.º 8
0
int main(int argc, char* argv[])
{

    MP_INIT(argc, argv);
    MP_PROCS(&nproc);
    MP_MYID(&me);

/*    printf("nproc = %d, me = %d\n", nproc, me);*/
    
    if(nproc>MAXPROC && me==0)
       ARMCI_Error("Test works for up to %d processors\n",MAXPROC);

    if(me==0){
       printf("ARMCI test program (%d processes)\n",nproc); 
       fflush(stdout);
       sleep(1);
    }
    
    ARMCI_Init();

    if(me==0){
      printf("\nAggregate put/get requests\n\n");
      fflush(stdout);
    }
    test_aggregate(1); /* cold start */
    test_aggregate(0); /* warm start */
    
    ARMCI_AllFence();
    MP_BARRIER();
    if(me==0){printf("\nSuccess!!\n"); fflush(stdout);}
    sleep(2);
	
    MP_BARRIER();
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}
Ejemplo n.º 9
0
main(int argc, char *argv[])
{
    int i, j;
    int ch;
    extern char *optarg;
    int edge;
    int size;
    int nloop=5;
    double **ptr_loc;

    MP_INIT(arc,argv);
    MP_PROCS(&nproc);
    MP_MYID(&me);

    while ((ch = getopt(argc, argv, "n:b:p:h")) != -1) {
        switch(ch) {
        case 'n':
            n = atoi(optarg);
            break;
        case 'b':
            block_size = atoi(optarg);
            break;
        case 'p':
            nproc = atoi(optarg);
            break;
        case 'h': {
            printf("Usage: LU, or \n");
            printf("       LU -nMATRIXSIZE -bBLOCKSIZE -pNPROC\n");
            MP_BARRIER();
            MP_FINALIZE();
            exit(0);
        }
        }
    }

    if(me == 0) {
        printf("\n Blocked Dense LU Factorization\n");
        printf("     %d by %d Matrix\n", n, n);
        printf("     %d Processors\n", nproc);
        printf("     %d by %d Element Blocks\n", block_size, block_size);
        printf("\n");
    }

    num_rows = (int) sqrt((double) nproc);
    for (;;) {
        num_cols = nproc/num_rows;
        if (num_rows*num_cols == nproc)
            break;
        num_rows--;
    }

    nblocks = n/block_size;
    if (block_size * nblocks != n) {
        nblocks++;
    }

    edge = n%block_size;
    if (edge == 0) {
        edge = block_size;
    }

#ifdef DEBUG
    if(me == 0)
        for (i=0; i<nblocks; i++) {
            for (j=0; j<nblocks; j++)
                printf("%d ", block_owner(i, j));
            printf("\n");
        }
    MP_BARRIER();
    MP_FINALIZE();
    exit(0);
#endif

    for (i=0; i<nblocks; i++) {
        for (j=0; j<nblocks; j++) {
            if(block_owner(i,j) == me) {
                if ((i == nblocks-1) && (j == nblocks-1)) {
                    size = edge*edge;
                }
                else if ((i == nblocks-1) || (j == nblocks-1)) {
                    size = edge*block_size;
                }
                else {
                    size = block_size*block_size;
                }
                proc_bytes += size*sizeof(double);
            }
        }
    }

    ptr = (void **)malloc(nproc * sizeof(void *));
#ifdef MPI2_ONESIDED
    MPI_Alloc_mem(proc_bytes, MPI_INFO_NULL, &ptr[me]);
    MPI_Win_create((void*)ptr[me], proc_bytes, 1, MPI_INFO_NULL,
                   MPI_COMM_WORLD, &win);
    for(i=0; i<nproc; i++) ptr[i] = (double *)ptr[me];
    MPI_Barrier(MPI_COMM_WORLD);

#else
    /* initialize ARMCI */
    ARMCI_Init();
    ARMCI_Malloc(ptr, proc_bytes);
#endif

    a = (double **)malloc(nblocks*nblocks*sizeof(double *));
    if (a == NULL) {
        fprintf(stderr, "Could not malloc memory for a\n");
        exit(-1);
    }
    ptr_loc = (double **)malloc(nproc*sizeof(double *));
    for(i=0; i<nproc; i++) ptr_loc[i] = (double *)ptr[i];
    for(i=0; i<nblocks; i ++) {
        for(j=0; j<nblocks; j++) {
            a[i+j*nblocks] = ptr_loc[block_owner(i, j)];
            if ((i == nblocks-1) && (j == nblocks-1)) {
                size = edge*edge;
            } else if ((i == nblocks-1) || (j == nblocks-1)) {
                size = edge*block_size;
            } else {
                size = block_size*block_size;
            }
            ptr_loc[block_owner(i, j)] += size;
        }
    }

    /* initialize the array */
    init_array();

    /* barrier to ensure all initialization is done */
    MP_BARRIER();

    /* to remove cold-start misses, all processors touch their own data */
    touch_array(block_size, me);
    MP_BARRIER();

    if(doprint) {
        if(me == 0) {
            printf("Matrix before LU decomposition\n");
            print_array(me);
        }
        MP_BARRIER();
    }

    lu(n, block_size, me); /* cold start */

    /* Starting the timer */

    MP_BARRIER();
    if(me == 0) start_timer();
    for(i=0; i<nloop; i++) lu(n, block_size, me);
    MP_BARRIER();

    /* Timer Stops here */
    if(me == 0)
        printf("\nRunning time = %lf milliseconds.\n\n",  elapsed_time()/nloop);
    printf("%d: (ngets=%d) Communication (get) time = %e milliseconds\n", me, get_cntr, comm_time*1000/nloop);

    if(doprint) {
        if(me == 0) {
            printf("after LU\n");
            print_array(me);
        }
        MP_BARRIER();
    }

    /* done */
#ifdef MPI2_ONESIDED
    MPI_Win_free(&win);
    MPI_Free_mem(ptr[me]);
#else
    ARMCI_Free(ptr[me]);
    ARMCI_Finalize();
#endif
    MP_FINALIZE();
}
Ejemplo n.º 10
0
int main(int argc, char* argv[])
{
    int i;
    struct timeval start_time[14];
    struct timeval stop_time[14];
    /*
      char * test_name[14] = {
      "dim", "nbdim", "vec_small", "acc",
      "vector", "vector_acc", "fetch_add",
      "swap", "rput", "aggregate", "implicit",
      "memlock", "acc_type", "collective"
      };
      int test_flags[14] = {
      1, 1, 1, 1,
      1, 1, 1,
      1, 1, 0, 1,
      1, 1, 1
      };
    */
    char * test_name[2] = { "acc_type", "collective" };
    int test_flags[2]   = { 1, 1 };

#define TEST_ACC_TYPE   0
#define TEST_COLLECTIVE 1

    MP_INIT(argc, argv);
    ARMCI_Init();
    MP_PROCS(&nproc);
    MP_MYID(&me);

    if(nproc > MAXPROC && me == 0)
       ARMCI_Error("Test works for up to %d processors\n",MAXPROC);

    if(me == 0)
    {
       printf("ARMCI test program (%d processes)\n",nproc); 
       fflush(stdout);
       sleep(1);
    }    

    gettimeofday(&start_time[TEST_ACC_TYPE],NULL);
    if(test_flags[TEST_ACC_TYPE] == 1)
    {
       if(me == 0)
       {
          printf("\nTesting Accumulate Types\n");
          fflush(stdout);
       }
       
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_INT\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_INT);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_LNG\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_LNG);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_FLT\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_FLT);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_DBL\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_DBL);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_CPL\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_CPL);
       ARMCI_AllFence();
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Accumulate ARMCI_ACC_DCP\n");
          fflush(stdout);
       }
       test_acc_type(ARMCI_ACC_DCP);
       ARMCI_AllFence();
       MP_BARRIER();
    }
    gettimeofday(&stop_time[TEST_ACC_TYPE],NULL);

    gettimeofday(&start_time[TEST_COLLECTIVE],NULL);
    if(test_flags[TEST_COLLECTIVE] == 1)
    {
       if(me == 0)
       {
          printf("\nTesting Collective Types\n");
          fflush(stdout);
       }
       if(me == 0)
       {
          printf("Test Collective ARMCI_INT\n");
          fflush(stdout);
       }
       MP_BARRIER();
       test_collective(ARMCI_INT);
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Collective ARMCI_LONG\n");
          fflush(stdout);
       }
       MP_BARRIER();
       test_collective(ARMCI_LONG);
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Collective ARMCI_FLOAT\n");
          fflush(stdout);
       }
       MP_BARRIER();
       test_collective(ARMCI_FLOAT);
       MP_BARRIER();
       if(me == 0)
       {
          printf("Test Collective ARMCI_DOUBLE\n");
          fflush(stdout);
       }
       MP_BARRIER();
       test_collective(ARMCI_DOUBLE);
       MP_BARRIER();
    }
    gettimeofday(&stop_time[TEST_COLLECTIVE],NULL);
    
    if(me == 0)
    {
       printf("Accumulate and Collective tests passed\n");
       fflush(stdout);
    }

    if(me == 0)
    {
       printf("Testcase runtime\n");
       printf("Name,Time(seconds)\n");
       for(i = 0; i < 2; i++)
          if(test_flags[i] == 1)
          {
             double time_spent = (stop_time[i].tv_sec - start_time[i].tv_sec) + ((double) stop_time[i].tv_usec - start_time[i].tv_usec) / 1E6;
             printf("%s,%.6f\n", test_name[i], time_spent);
          }
    }

    MP_BARRIER();
    ARMCI_Finalize();
    MP_FINALIZE();
    return(0);
}
Ejemplo n.º 11
0
int main(int argc, char **argv) {
  int i;
  double **myptrs;
  double t0, t1, tnbget=0, tnbwait=0, t2=0;

  MP_INIT(argc,argv);
  ARMCI_Init();

  MP_PROCS(&nprocs);
  MP_MYID(&me);

  if (nprocs < 2)
    ARMCI_Error("This program requires at least to processes", 1);

  myptrs = (double **)malloc(sizeof(double *)*nprocs);
  ARMCI_Malloc((void **)myptrs, LOOP*sizeof(double)); 
  
  MP_BARRIER();
  
  if(me == 0) {
    for(i = 0; i < 10; i++) {
      // This is a bug:
      // ARMCI_Get(myptrs[me]+i,myptrs[me+1]+i,sizeof(double),me+1);
      ARMCI_Get(myptrs[me+1]+i, myptrs[me]+i, sizeof(double), me+1);
    }

    t0 = MP_TIMER(); 
    for(i = 0; i < LOOP; i++) {
      // This is a bug:
      // ARMCI_Get(myptrs[me]+i,myptrs[me+1]+i,sizeof(double),me+1);
      ARMCI_Get(myptrs[me+1]+1, myptrs[me]+i, sizeof(double), me+1);
    }
    t1 = MP_TIMER(); 

    printf("\nGet Latency=%lf\n", 1e6*(t1-t0)/LOOP);
    fflush(stdout);

    t1 = t0 = 0;

    for(i = 0; i < LOOP; i++) {
      armci_hdl_t nbh;
      ARMCI_INIT_HANDLE(&nbh);

      t0 = MP_TIMER(); 
      //ARMCI_NbGet(myptrs[me]+i, myptrs[me+1]+i, sizeof(double), me+1, &nbh);
      ARMCI_NbGet(myptrs[me+1]+i, myptrs[me]+i, sizeof(double), me+1, &nbh);
      t1 = MP_TIMER(); 
      ARMCI_Wait(&nbh);
      t2 = MP_TIMER();

      tnbget  += (t1-t0);
      tnbwait += (t2-t1);
    }

    printf("\nNb Get Latency=%lf Nb Wait=%lf\n",1e6*tnbget/LOOP,1e6*tnbwait/LOOP);fflush(stdout);
  }

  else
    sleep(1);

  MP_BARRIER();

  ARMCI_Finalize();
  MP_FINALIZE();

  return 0;
}
Ejemplo n.º 12
0
int main(int argc, char *argv[])
{
    int i, j;
    int ch;
    extern char *optarg;
    int edge;
    int size;
    
    /* ARMCI */
    void **ptr;
    double **ptr_loc;
    
    MP_INIT(argc,argv);
    MP_PROCS(&nproc);
    MP_MYID(&me);
    
    while ((ch = getopt(argc, argv, "n:b:p:h")) != -1) {
        switch(ch) {
            case 'n': n = atoi(optarg); break;
            case 'b': block_size = atoi(optarg); break;
            case 'p': nproc = atoi(optarg); break;
            case 'h': {
                printf("Usage: LU, or \n");
        printf("       LU -nMATRIXSIZE -bBLOCKSIZE -pNPROC\n");
                MP_BARRIER();
                MP_FINALIZE();
                exit(0);
            }            
        }
    }
    
    if(me == 0) {
        printf("\n Blocked Dense LU Factorization\n");
        printf("     %d by %d Matrix\n", n, n);
        printf("     %d Processors\n", nproc);
        printf("     %d by %d Element Blocks\n", block_size, block_size);
        printf("\n");
    }
    
/*      num_rows = (int) sqrt((double) nproc); */
/*      for (;;) { */
/*          num_cols = nproc/num_rows; */
/*          if (num_rows*num_cols == nproc) */
/*              break; */
/*          num_rows--; */
/*      } */
    
    nblocks = n/block_size;
    if (block_size * nblocks != n) {
        nblocks++;
    }

    nnodes = nproc / 4;
    if((nnodes * 4) != nproc) {
        num_cols = nproc - nnodes * 4;
        nnodes++;
        num_rows = 1;
    }
    else {
        num_cols = 2;
        num_rows = 2;
    }    
    
    num = (nblocks * nblocks)/nnodes;
    if((num * nnodes) != (nblocks * nblocks))
        num++;

#ifdef DEBUG
    if(me == 0)
        for (i=0;i<nblocks;i++) {
            for (j=0;j<nblocks;j++) 
                printf("%d ", block_owner(i, j));
            printf("\n");
        }
    MP_BARRIER();
    MP_FINALIZE();
    exit(0);
#endif
    
    edge = n%block_size;
    if (edge == 0) {
        edge = block_size;
    }
    
    for (i=0;i<nblocks;i++) {
        for (j=0;j<nblocks;j++) {
            if(block_owner(i,j) == me) {
                if ((i == nblocks-1) && (j == nblocks-1)) {
                    size = edge*edge;
                }
                else if ((i == nblocks-1) || (j == nblocks-1)) {
                    size = edge*block_size;
                }
                else {
                    size = block_size*block_size;
                }
                proc_bytes += size*sizeof(double);
            }
        }
    }
    
    /* initialize ARMCI */
    ARMCI_Init();
    ptr = (void **)malloc(nproc * sizeof(void *));
    ARMCI_Malloc(ptr, proc_bytes);
    
    a = (double **)malloc(nblocks*nblocks*sizeof(double *));
    if (a == NULL) {
        fprintf(stderr, "Could not malloc memory for a\n");
        exit(-1);
    } 
    ptr_loc = (double **)malloc(nproc*sizeof(double *));
    for(i=0; i<nproc; i++) ptr_loc[i] = (double *)ptr[i];
    for(i=0; i<nblocks;i ++) {
        for(j=0; j<nblocks; j++) {
            a[i+j*nblocks] = ptr_loc[block_owner(i, j)];
            if ((i == nblocks-1) && (j == nblocks-1)) {
                size = edge*edge;
            } else if ((i == nblocks-1) || (j == nblocks-1)) {
                size = edge*block_size;
            } else {
                size = block_size*block_size;
            }
            ptr_loc[block_owner(i, j)] += size;
        }
    }
    
    /* initialize the array */
    init_array();
    
    /* barrier to ensure all initialization is done */
    MP_BARRIER();

    /* to remove cold-start misses, all processors touch their own data */
    touch_array(block_size, me);
    MP_BARRIER();

    if(doprint) {
        if(me == 0) {
            printf("Matrix before LU decomposition\n");
            print_array(me); 
        }
        MP_BARRIER();
    }
    

    /* Starting the timer */
    if(me == 0) start_timer();

    lu(n, block_size, me);
    
    MP_BARRIER();

    /* Timer Stops here */
    if(me == 0) 
        printf("\nRunning time = %lf milliseconds.\n\n",  elapsed_time());

    if(doprint) {        
        if(me == 0) {
            printf("after LU\n");
            print_array(me);
        }
        MP_BARRIER();
    }
    
    /* done */
    ARMCI_Free(ptr[me]);
    ARMCI_Finalize();
    MP_FINALIZE();

    return 0;
}
Ejemplo n.º 13
0
int main(int argc, char *argv[])
{
    int ch;
    extern char *optarg;
    int i, j, r;
    thread_t threads[MAX_TPP];

    /* init MP */
    MP_INIT(argc,argv);
    MP_PROCS(&size);
    MP_MYID(&rank);

    while ((ch = getopt(argc, argv, "t:s:i:d:h")) != -1) {
        switch(ch) {
            case 't': /* # of threads */
                tpp = atoi(optarg);
                if (tpp < 1 || tpp > MAX_TPP) {
                    PRINTF0("\"%s\" is improper value for -t, should be a "
                            "number between 1 and %d(MAX_TPP)\n",
                            optarg, MAX_TPP);
                    usage();
                }
                break;
            case 'i': /* # of iterations */
                iters = atoi(optarg);
                if (iters < 1) {
                    PRINTF0("\"%s\" is improper value for -t, should be a "
                            "number equal or larger than 1\n", optarg);
                    usage();
                }
                break;
            case 's': /* # of elements in the array */
                asize = atoi(optarg);
                if (iters < 1) {
                    PRINTF0("\"%s\" is improper value for -s, should be a "
                            "number equal or larger than 1\n", optarg);
                    usage();
                }
                break;
            case 'd': delay = atoi(optarg); break; /* delay before start */
            case 'h': usage(); break; /* print usage info */
        }
    }
#ifdef NOTHREADS
    tpp = 1;
    PRINTF0("Warning: NOTHREADS debug symbol is set -- running w/o threads\n");
#endif
    th_size = size * tpp;
    PRINTF0("\nTest of multi-threaded capabilities:\n"
            "%d threads per process (%d threads total),\n"
            "%d array elements of size %d,\n"
            "%d iteration(s)\n\n", tpp, th_size, asize, sizeof(atype_t), iters);
    if (delay) {
        printf("%d: %d\n", rank, getpid());
        fflush(stdout);
        sleep(delay);
        MP_BARRIER();
    }
    TH_INIT(size,tpp);
    for (i = 0; i < tpp; i++) th_rank[i] = rank * tpp + i;

#if defined(DEBUG) && defined(LOG2FILE)
    for (i = 0; i < tpp; i++) {
        fname[10] = '0' + th_rank[i] / 100;
        fname[11] = '0' + th_rank[i] % 100 / 10;
        fname[12] = '0' + th_rank[i] % 10;
        dbg[i] = fopen(fname, "w");
    }
#endif
    for (i = 0; i < tpp; i++)
        prndbg(i, "proc %d, thread %d(%d):\n", rank, i, th_rank[i]);

    /* init ARMCI */
    ARMCI_Init();

    /* set global seed (to ensure same random sequence across procs) */
    time_seed = (unsigned)time(NULL);
    armci_msg_brdcst(&time_seed, sizeof(time_seed), 0);
    srand(time_seed); rand();
    prndbg(0, "seed = %u\n", time_seed);
    /* random pairs */
    pairs = calloc(th_size, sizeof(int));
    for (i = 0; i < th_size; i++) pairs[i] = -1;
    for (i = 0; i < th_size; i++) {
        if (pairs[i] != -1) continue;
        r = RND(0, th_size);
        while (i == r || pairs[r] != -1 ) r = RND(0, th_size);
        pairs[i] = r; pairs[r] = i;
    }
    for (i = 0, cbufl = 0; i < th_size; i++)
        cbufl += sprintf(cbuf + cbufl, " %d->%d|%d->%d",
                         i, pairs[i], pairs[i], pairs[pairs[i]]);
    prndbg(0, "random pairs:%s\n", cbuf);
    /* random targets */
    rnd_tgts = calloc(th_size, sizeof(int));
    for (i = 0, cbufl = 0; i < th_size; i++) {
        rnd_tgts[i] = RND(0, th_size);
        if (rnd_tgts[i] == i) { i--; continue; }
        cbufl += sprintf(cbuf + cbufl, " %d", rnd_tgts[i]);
    }
    prndbg(0, "random targets:%s\n", cbuf);
    /* random one */
    rnd_one = RND(0, th_size);
    prndbg(0, "random one = %d\n", rnd_one);

    assert(ptrs1 = calloc(th_size, sizeof(void *)));
    assert(ptrs2 = calloc(th_size, sizeof(void *)));
#ifdef NOTHREADS
    thread_main((void *)(long)0);
#else
    for (i = 0; i < tpp; i++) THREAD_CREATE(threads + i, thread_main, (void *)(long)i);
    for (i = 0; i < tpp; i++) THREAD_JOIN(threads[i], NULL);
#endif

    MP_BARRIER();
    PRINTF0("Tests Completed\n");

    /* clean up */
#if defined(DEBUG) && defined(LOG2FILE)
    for (i = 0; i < tpp; i++) fclose(dbg[i]);
#endif
    ARMCI_Finalize();
    TH_FINALIZE();
    MP_FINALIZE();

	return 0;
}
Ejemplo n.º 14
0
main(int argc, char *argv[])
{
  int i, j;
  int ch;
  extern char *optarg;
  int edge;
  int size;
    
  /* ARMCI */
  void **ptr;
  double **ptr_loc;
  void **bufr_g, **bufc_g;

  MP_INIT(arc,argv);
  MP_PROCS(&nproc);
  MP_MYID(&me);
    
  while ((ch = getopt(argc, argv, "n:b:p:h")) != -1) {
    switch(ch) {
    case 'n': n = atoi(optarg); break;
    case 'b': block_size = atoi(optarg); break;
    case 'p': nproc = atoi(optarg); break;
    case 'h': {
      printf("Usage: LU, or \n");
      printf("       LU -nMATRIXSIZE -bBLOCKSIZE -pNPROC\n");
      MP_BARRIER();
      MP_FINALIZE();
      exit(0);
    }            
    }
  }
    
  if(me == 0) {
    printf("\nUsing pre-PUTing\n");
    printf("\n Blocked Dense LU Factorization\n");
    printf("     %d by %d Matrix\n", n, n);
    printf("     %d Processors\n", nproc);
    printf("     %d by %d Element Blocks\n", block_size, block_size);
    printf("\n");
  }
    
  num_rows = (int) sqrt((double) nproc);
  for (;;) {
    num_cols = nproc/num_rows;
    if (num_rows*num_cols == nproc)
      break;
    num_rows--;
  }
    
  nblocks = n/block_size;
  if (block_size * nblocks != n) {
    nblocks++;
  }
    
  edge = n%block_size;
  if (edge == 0) {
    edge = block_size;
  }
    
  #ifdef DEBUG
  if(me == 0)
    for (i=0;i<nblocks;i++) {
      for (j=0;j<nblocks;j++) 
	printf("%d ", block_owner(i, j));
      printf("\n");
    }
  MP_BARRIER();
  MP_FINALIZE();
  exit(0);
  #endif
    
  for (i=0;i<nblocks;i++) {
    for (j=0;j<nblocks;j++) {
      if(block_owner(i,j) == me) {
	if ((i == nblocks-1) && (j == nblocks-1)) {
	  size = edge*edge;
	}
	else if ((i == nblocks-1) || (j == nblocks-1)) {
	  size = edge*block_size;
	}
	else {
	  size = block_size*block_size;
	}
	proc_bytes += size*sizeof(double);
      }
    }
  }
    
  /* initialize ARMCI */
  ARMCI_Init();
  ptr = (void **)ARMCI_Malloc_local(nproc * sizeof(void *));
  ARMCI_Malloc(ptr, proc_bytes);
  
  a = (double **)ARMCI_Malloc_local(nblocks*nblocks*sizeof(double *));
  if (a == NULL) {
    fprintf(stderr, "Could not malloc memory for a\n");
    exit(-1);
  } 
  ptr_loc = (double **)ARMCI_Malloc_local(nproc*sizeof(double *));
  for(i=0; i<nproc; i++) ptr_loc[i] = (double *)ptr[i];
  for(i=0; i<nblocks;i ++) {
    for(j=0; j<nblocks; j++) {
      a[i+j*nblocks] = ptr_loc[block_owner(i, j)];
      if ((i == nblocks-1) && (j == nblocks-1)) {
	size = edge*edge;
      } else if ((i == nblocks-1) || (j == nblocks-1)) {
	size = edge*block_size;
      } else {
	size = block_size*block_size;
      }
      ptr_loc[block_owner(i, j)] += size;
    }
  }
    
  /* initialize the array */
  init_array();
  
  bufr = (double **)ARMCI_Malloc_local(nproc*nblocks * sizeof(double *));
  bufc = (double **)ARMCI_Malloc_local(nproc*nblocks * sizeof(double *));

  if (bufr == NULL || bufc == NULL)
    printf("Could not ARMCI_Malloc_local() mem\n");
  /* bufr points to all k-th row blocks */
  /* save all block address in row-major order */
  proc_bytes = nblocks*block_size*block_size * sizeof(double);
  bufr_g = (void **)ARMCI_Malloc_local(nproc * sizeof(void *));
  ARMCI_Malloc(bufr_g, proc_bytes);

  for (i = 0; i < nproc; i++) {
    bufr[i*nblocks] = (double *) bufr_g[i];
    for (j = 1; j < nblocks; j++) {
      bufr[i*nblocks + j]  = bufr[i*nblocks + j-1] + block_size * block_size;
    }
  }

  /* bufc points to all k-th column blocks */
  bufc_g = (void **)ARMCI_Malloc_local(nproc * sizeof(void *));
  ARMCI_Malloc(bufc_g, proc_bytes);

  for (i = 0; i < nproc; i++) {
    bufc[i*nblocks] = (double *) bufc_g[i];
    for (j = 1; j < nblocks; j++) {
      bufc[i*nblocks + j]  = bufc[i*nblocks + j-1] + block_size * block_size;
    }
  }

  /* barrier to ensure all initialization is done */
  MP_BARRIER();

  /* to remove cold-start misses, all processors touch their own data */
  touch_array(block_size, me);
  MP_BARRIER();

  if(doprint) {
    if(me == 0) {
      printf("Matrix before LU decomposition\n");
      print_array(me); 
    }
    MP_BARRIER();
  }  

  /* Starting the timer */
  if(me == 0) start_timer();

  lu(n, block_size, me);
  
  MP_BARRIER();

  /* Timer Stops here */
  if(me == 0) 
  printf("\nRunning time = %lf milliseconds.\n\n",  elapsed_time());

  if(doprint) {        
    if(me == 0) {
      printf("after LU\n");
      print_array(me);
    }
    MP_BARRIER();
  }
    
  /* done */
  ARMCI_Free(ptr[me]);
  ARMCI_Free(bufc_g[me]);
  ARMCI_Free(bufr_g[me]);
  ARMCI_Finalize();
  MP_FINALIZE();
}