Пример #1
0
int coefs_ga_create_ghost(int Nx, int Ny, int Nz, int nsplines) {
  int data[Nx*Ny*Nz];
  int g_a,dims[4]={Nx,Ny,Nz,nsplines},chunk[4]={-1,-1,-1,nsplines};
  int width[4] = {3, 3, 3, 0};
  int type=C_INT;
  //g_a=NGA_Create(type,4,dims,"Coefs",chunk);
  g_a=NGA_Create_ghosts(type, 4, dims, width, "Coefs", chunk);
  int lo[4],hi[4],ld[3];
  //int value=9; GA_Fill(g_a,&value);
  GA_Print_distribution(g_a);
  return g_a;
}
Пример #2
0
PetscErrorCode vizGA2DA()
{
  PetscErrorCode  ierr;
  int rank;
  MPI_Comm_rank(PETSC_COMM_WORLD,&rank);  
  int d1 = 40, d2 = 50;
  
  DA da;
  Vec vec;
  const PetscInt *lx, *ly, *lz;
  PetscInt m,n,p;
  DALocalInfo info;
  ierr = DACreate2d(PETSC_COMM_WORLD,DA_NONPERIODIC,DA_STENCIL_STAR,
            d1,d2,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0, &da); CHKERRQ(ierr);
  ierr = DACreateGlobalVector(da, &vec); CHKERRQ(ierr);
  ierr = DAGetOwnershipRanges(da, &lx, &ly, &lz); CHKERRQ(ierr);
  ierr = DAGetLocalInfo(da,&info); CHKERRQ(ierr);
  ierr = DAGetInfo(da,0,0,0,0,&m,&n,&p,0,0,0,0); CHKERRQ(ierr);
  /**/
  ierr = DAView(da, PETSC_VIEWER_STDOUT_WORLD); CHKERRQ(ierr);
  for (int i = 0; i < m; ++i) {
    PetscPrintf(PETSC_COMM_WORLD,"%d\tlx: %d\n",i,lx[i]);
  }
  for (int i = 0; i < n; ++i) {
    PetscPrintf(PETSC_COMM_WORLD,"%d\tly: %d\n",i,ly[i]);
  }
  /**/
 
  
  int ga = GA_Create_handle();
  int ndim = 2;
  int dims[2] = {d2,d1};
  GA_Set_data(ga,2,dims,MT_DBL);
  int *map;
  PetscMalloc( sizeof(int)*(m+n), &map);
  map[0] = 0;
  for( int i = 1; i < n; i++ )
  {
    map[i] = ly[i-1] + map[i-1];
  }
  map[n] = 0;
  for( int i = n+1; i < m+n; i++ )
  {
    map[i] = lx[i-n-1] + map[i-1];
  }
  /* correct ordering, but nodeid's dont line up with mpi rank for petsc's da
   * DA: +---+---+   GA: +---+---+   
   *     +-2-+-3-+       +-1-+-3-+
   *     +---+---+       +---+---+
   *     +-0-+-1-+       +-0-+-2-+
   *     +---+---+       +---+---+
  int *map;
  PetscMalloc( sizeof(int)*(m+n), &map);
  map[0] = 0;
  for( int i = 1; i < m; i++ )
  {
    map[i] = lx[i] + map[i-1];
  }
  map[m] = 0;
  for( int i = m+1; i < m+n; i++ )
  {
    map[i] = ly[i-m] + map[i-1];
  }
  */
  int block[2] = {n,m};  
  GA_Set_irreg_distr(ga,map,block);
  ierr = GA_Allocate( ga );
  if( !ierr ) GA_Error("\n\n\nga allocaltion failed\n\n",ierr);
  if( !ga ) GA_Error("\n\n\n ga null \n\n",ierr); 
  if( rank != GA_Nodeid() ) GA_Error("MPI rank does not match GA_Nodeid()",1);
  GA_Print_distribution(ga);  
  
  int lo[2], hi[2];
  NGA_Distribution(ga,rank,lo,hi);
  if( lo[1] != info.xs || hi[1] != info.xs+info.xm-1 ||
      lo[0] != info.ys || hi[0] != info.ys+info.ym-1 )
  {
    PetscSynchronizedPrintf(PETSC_COMM_SELF,"[%d] lo:(%2d,%2d)  hi:(%2d,%2d) \t DA: (%2d,%2d), (%2d, %2d)\n",
        rank, lo[1], lo[0], hi[1], hi[0], info.xs, info.ys, info.xs+info.xm-1, info.ys+info.ym-1);
  }
  PetscBarrier(0);
  PetscSynchronizedFlush(PETSC_COMM_WORLD);

  AO ao;
  DAGetAO(da,&ao);
  if( rank == 0 )
  {
    int *idx, len = d1*d2;
    PetscReal *val;
    PetscMalloc(sizeof(PetscReal)*len, &val);
    PetscMalloc(sizeof(int)*len, &idx);
    for (int j = 0; j < d2; ++j)
    {
      for (int i = 0; i < d1; ++i)
      {
        idx[i + d1*j] = i + d1*j;
        val[i + d1*j] = i + d1*j;
      }
    }
    AOApplicationToPetsc(ao,len,idx);
    VecSetValues(vec,len,idx,val,INSERT_VALUES);

    int a[2], b[2],ld[1]={0};
    double c = 0;
    for (int j = 0; j < d2; ++j)
    {
      for (int i = 0; i < d1; ++i)
      {
        a[0] = j;
        a[1] = i;
//        printf("%5.0f ",c);
        NGA_Put(ga,a,a,&c,ld);
        c++;
      }
    }
  }
//  GA_Print(ga);
  VecAssemblyBegin(vec);
  VecAssemblyEnd(vec);
  
  int ld;
  double *ptr;
  NGA_Access(ga,lo,hi,&ptr,&ld);
  PetscReal **d;
  int c=0;
  ierr = DAVecGetArray(da,vec,&d); CHKERRQ(ierr);
  for (int j = info.ys; j < info.ys+info.ym; ++j)
  {
    for (int i = info.xs; i < info.xs+info.xm; ++i)
    {
      if( d[j][i] != ptr[(i-info.xs)+ld*(j-info.ys)] )
        GA_Error("DA array is not equal to GA array",1);
//      printf("%d (%d,%d):\t%3.0f\t%3.0f\n", c, i, j, d[j][i], ptr[(i-info.xs)+ld*(j-info.ys)]);
      c++;
    }
  }
  ierr = DAVecRestoreArray(da,vec,&d); CHKERRQ(ierr);
  
  c=0;
  PetscReal *v;
  int start, end;
  VecGetOwnershipRange(vec, &start, &end);
  VecGetArray( vec, &v );
  for( int i = start; i < end; i++)
  {
//    printf("%d:\t%3.0f\t%3.0f\t%s\n", start, v[i-start], ptr[i-start], (v[i-start]-ptr[i-start]==0?"":"NO") );
  }
  VecRestoreArray( vec, &v );
  
  NGA_Release_update(ga,lo,hi);

  Vec gada;
  VecCreateMPIWithArray(((PetscObject)da)->comm,da->Nlocal,PETSC_DETERMINE,ptr,&gada);
  VecView(gada,PETSC_VIEWER_STDOUT_SELF);
  
  GA_Destroy(ga);
  
  
  
  ierr = VecDestroy(vec); CHKERRQ(ierr);
  ierr = DADestroy(da); CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Пример #3
0
int main(int argc, char**argv)
{
  int nprocs, me;
  int i,j;
  MPI_Init(&argc,&argv);
  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&me);
  GA_Initialize();
  const int heap=3000000, stack=300000;
  if(! MA_init(C_INT,stack,heap) ) GA_Error((char *) "MA_init failed",stack+heap /*error code*/);
  int Nx=97; int Ny=97; int Nz = 97;
  Nx+=3; Ny+=3; Nz+=3;
  int data[Nx*Ny*Nz];
  int num_splines = 32;
  int g_a,dims[4]={Nx,Ny,Nz,num_splines},chunk[4]={-1,-1,-1,num_splines};
  int width[4] = {3, 3, 3, 0};
  int type=C_INT;
  //g_a=NGA_Create(type,4,dims,"Coefs",chunk);
  g_a=NGA_Create_ghosts(type, 4, dims, width, "Coefs", chunk);
  int lo[4],hi[4],ld[3];
  //double value=9.0; GA_Fill(g_a,&value);
  GA_Print_distribution(g_a);
  fflush(stdout);
  if(me==0)
  {
      for (i=0; i<num_splines; i++)
      {
          int x, y, z;
          for (x=0; x<Nx; x++)
              for (y=0; y<Ny; y++)
                  for (z=0; z<Nz; z++)
                  {   j=x*(Ny*Nz)+y*Nz+z;
              data[j] = (x*100*100+y*100+z)*100+i;}
          lo[0]=lo[1]=lo[2]=0;
          hi[0]=Nx-1;hi[1]=Ny-1;hi[2]=Nz-1;
          lo[3]=hi[3]=i%num_splines;
          ld[0]=Ny;ld[1]=Nz;ld[2]=1;
          NGA_Put(g_a,lo,hi,data,ld);
      }
  }
  GA_Update_ghosts(g_a);
  GA_Sync();
  printf("done\n"),fflush(stdout);
  ga_coefs_t *ga_coefs = malloc(sizeof(ga_coefs_t));
  ga_coefs->Mx = Nx;
  ga_coefs->My = Ny;
  ga_coefs->Mz = Nz;
  ga_coefs->nsplines = num_splines;
  ga_coefs->g_a=g_a;
  int *coefs1 = (int*)malloc((size_t)1*sizeof(int)*4*4*4*num_splines);
  int ix,iy,iz;
  Nx-=3; Ny-=3; Nz-=3;
  ga_coefs->sumt=ga_coefs->amount=0;
  NGA_Distribution(g_a,me,lo,hi);
  GA_Print_distribution(g_a);
  int low[16][4],high[16][4];
  for(i=0;i<nprocs;i++)
      NGA_Distribution(g_a,i,low[i],high[i]);
  srand ( time(NULL) );
  int k=GA_Nodeid();
  printf("%d: low[k]=%d high[k]=%d\n", GA_Nodeid(), low[k][2], high[k][2]);
  int unequal=0;
  for(i=0;i<1000;i++) {
      ix=rand_index(low[k][0],high[k][0]);
      if(ix+3>=dims[0]) ix=low[k][0];
      iy=rand_index(low[k][1],high[k][1]);
      if(iy+3>=dims[1]) iy=low[k][1];
      iz=rand_index(low[k][2],high[k][2]);
      if(iz+3>=dims[2]) iz=low[k][2];
      coefs_ga_get_3d(ga_coefs,coefs1,ix,iy,iz);
      long get_sum=mini_cube_sum(coefs1, ga_coefs->nsplines);
      long ghost_sum=coefs_ghost_access_3d(ga_coefs->g_a, ix, iy, iz, ga_coefs->nsplines);
      if(get_sum!=ghost_sum) {
      printf("ixyz=\t%d\t%d\t%d\t", ix, iy, iz);
          printf("get_sum=%ld ghost_sum=%ld\n", get_sum, ghost_sum);
          unequal++;
      }
  }
  printf("unequal count=%d\n", unequal);
  free(coefs1);
  GA_Terminate();
  MPI_Finalize();
  return 0;
}
Пример #4
0
static int test(int shape_idx, int type_idx, int dist_idx)
{
    int type = TYPES[type_idx];
    int *dims = SHAPES[shape_idx];
    int ndim = SHAPES_NDIM[shape_idx];
    mock_ga_t *mock_a, *result_a;
    int g_a;
    int buffer[100];
    int lo[GA_MAX_DIM], hi[GA_MAX_DIM], ld[GA_MAX_DIM], shape[GA_MAX_DIM];
    int result=0, error_index=-1, error_proc=-1;

    mock_a = Mock_Create(type, ndim, dims, "mock", NULL);
    result_a = Mock_Create(type, ndim, dims, "mock", NULL);

    g_a = create_function[dist_idx](type, ndim, dims);

    mock_data(mock_a, g_a);

    mock_to_global(mock_a, g_a);

    Mock_Zero(mock_a);

    GA_Zero(g_a);

    global_to_mock(g_a, result_a);

    result = neq_mock(mock_a, result_a, &error_index);
    if (0 != result) {
        error_proc = GA_Nodeid();
    }

    GA_Igop(&result, 1, "+");
    GA_Igop(&error_proc, 1, "max");

    if (error_proc != GA_Nodeid()) {
        error_index = 0;
    }

    GA_Igop(&error_index, 1, "+");
    if (0 != result) {
        if (error_proc == GA_Nodeid()) {
            printf("ERROR: local result failed to compare to global result\n");
            printf("\terror_proc=%d\n", error_proc);
            printf("\terror_index=%d\n", error_index);
            printf("***LOCAL RESULT***\n");
            Mock_Print(mock_a);
            printf("***GLOBAL RESULT***\n");
            Mock_Print(result_a);
            printf("\tprinting array distribution\n");
        }
        GA_Sync();
        GA_Print(g_a);
        GA_Print_distribution(g_a);
        return 1;
    }

    Mock_Destroy(mock_a);
    Mock_Destroy(result_a);
    GA_Destroy(g_a);

    return 0;
}
Пример #5
0
static int test(int shape_idx, int type_idx, int dist_idx)
{
    int type = TYPES[type_idx];
    int *dims = SHAPES[shape_idx];
    int ndim = SHAPES_NDIM[shape_idx];
    mock_ga_t *mock_a, *result_a;
    int g_a;
    void *alpha = NULL;
    int buffer[100];
    int lo[GA_MAX_DIM], hi[GA_MAX_DIM], ld[GA_MAX_DIM], shape[GA_MAX_DIM];
    int result=0, error_index=-1, error_proc=-1;
    int ival = 6;
    long lval = 7;
    long long llval = 8;
    float fval = 9;
    double dval = 10;
    SingleComplex cval = {11,12};
    DoubleComplex zval = {13,14};

    /* create the local array and result array */
    mock_a = Mock_Create(type, ndim, dims, "mock", NULL);
    result_a = Mock_Create(type, ndim, dims, "mock", NULL);

    /* create the global array */
    g_a = create_function[dist_idx](type, ndim, dims);

    /* create meaningful data for local array */
    mock_data(mock_a, g_a);

    /* init global array with same data as local array */
    mock_to_global(mock_a, g_a);

    switch (type) {
        case C_INT:      alpha = &ival; break;
        case C_LONG:     alpha = &lval; break;
        case C_LONGLONG: alpha = &llval; break;
        case C_FLOAT:    alpha = &fval; break;
        case C_DBL:      alpha = &dval; break;
        case C_SCPL:     alpha = &cval; break;
        case C_DCPL:     alpha = &zval; break;
    }

    /* call the local routine */
    Mock_Add_constant(mock_a, alpha);

    /* call the global routine */
    GA_Add_constant(g_a, alpha);
    
    /* get the results from the global array */
    global_to_mock(g_a, result_a);
    
    /* compare the results */
    result = neq_mock(mock_a, result_a, &error_index);
    if (0 != result) {
        error_proc = GA_Nodeid();
    }
    /* make sure all procs get same result so they can die gracefully */
    GA_Igop(&result, 1, "+");
    /* if error occured, find the highest failing node ID */
    GA_Igop(&error_proc, 1, "max");
    /* clear the error index for all but the highest failing node ID */
    if (error_proc != GA_Nodeid()) {
        error_index = 0;
    }
    /* make sure all procs get the error index on the highest failing node ID */
    GA_Igop(&error_index, 1, "+");
    if (0 != result) {
        if (error_proc == GA_Nodeid()) {
            printf("ERROR: local result failed to compare to global result\n");
            printf("\terror_proc=%d\n", error_proc);
            printf("\terror_index=%d\n", error_index);
            printf("***LOCAL RESULT***\n");
            Mock_Print(mock_a);
            printf("***GLOBAL RESULT***\n");
            Mock_Print(result_a);
            printf("\tprinting array distribution\n");
        }
        GA_Sync();
        GA_Print(g_a);
        GA_Print_distribution(g_a);
        return 1;
    }

    /* clean up */
    Mock_Destroy(mock_a);
    Mock_Destroy(result_a);
    GA_Destroy(g_a);

    return 0;
}
Пример #6
0
int main(int argc, char **argv) {
    int me;
    int g_a;
    int status;
    int i,j;
    int dims[] = {n,n};
    int proc_group[PROC_LIST_SIZE],proclist[PROC_LIST_SIZE],inode;
    int sbuf[1],rbuf[1];
    MPI_Comm comm;

    MP_INIT(argc,argv);
    GA_Initialize();
    me = GA_Nodeid();

    status = MA_init(MT_DBL, 100000, 100000);
    if (!status) GA_Error("ma_init failed",-1);
    status = MA_set_auto_verify(1);
    status = MA_set_hard_fail(1);
    status = MA_set_error_print(1);

    inode = GA_Cluster_nodeid();
    if (me == 0) {
        printf("there are %d nodes, node 0 has %d procs\n",
                GA_Cluster_nnodes(), GA_Cluster_nprocs(0));
        fflush(stdout);
    }
    GA_Sync();
    for (i=0; i<GA_Cluster_nnodes(); ++i) {
        for (j=0; j<GA_Cluster_nprocs(i); ++j) {
            proclist[j]=GA_Cluster_procid(i,j);
        }
        proc_group[i]=GA_Pgroup_create(proclist,GA_Cluster_nprocs(i));
    }
    GA_Sync();
    for (i=0; i<GA_Cluster_nnodes(); ++i) {
        if (i == inode) {
            printf("%d joining group %d\n", me, proc_group[inode]);
            GA_Pgroup_set_default(proc_group[inode]);
            g_a = NGA_Create(C_DBL, 2, dims, "a", NULL);
            if (!g_a) GA_Error("NGA_Create failed",-1);
            printf("%d Created array of  group %d as proc no. %d\n",
                    me, proc_group[inode], GA_Nodeid());
            GA_Print_distribution(g_a);
            comm = GA_MPI_Comm_pgroup_default();
            if (comm != MPI_COMM_NULL) {
                sbuf[0] = GA_Nodeid();
                status = MPI_Allreduce(sbuf, rbuf, 1, MPI_INT, MPI_MAX, comm);
                printf("%d max nodeid is %d\n", me, rbuf[0]);
                if ((rbuf[0]+1) != GA_Cluster_nprocs(i)) {
                    GA_Error("MPI_Allreduce failed",1);
                }
            }
            else {
                printf("MPI_Comm was null!\n");
            }
            GA_Pgroup_set_default(GA_Pgroup_get_world());
        }
        GA_Sync();
    }

    GA_Terminate();
    MP_FINALIZE();

    return 0;
}
Пример #7
0
// note: Sayan: brings down memory requirement to about 268 MB
int main(int argc, char **argv)
{
    int me, nproc, g_a = -1, i, j;

#if defined(USE_ELEMENTAL)
    int ndim=2, dims[2]= {N1,N2};
#else
    int ndim=2, type=MT_F_DBL, dims[2]= {N1,N2};
#endif

    double *buf;

    int lo[2], hi[2], ld[1];
    double alpha = 1.0;

#if defined(USE_ELEMENTAL)
    // initialize Elemental (which will initialize MPI)
    ElInitialize( &argc, &argv );
    ElMPICommRank( MPI_COMM_WORLD, &me );
    ElMPICommSize( MPI_COMM_WORLD, &nproc );

    ElGlobalArrays_d eldga;

    // instantiate el::global array
    ElGlobalArraysConstruct_d( &eldga );
    // initialize global arrays
    ElGlobalArraysInitialize_d( eldga );
    printf ("INITIALIZED elemental global array...\n");
#else
    MP_INIT(argc,argv);
    GA_Initialize_ltd(-1);

    me=GA_Nodeid();
    nproc=GA_Nnodes();
#endif

    if(me==0) printf("Using %ld processes\n",(long)nproc);
    if(me==0) printf("memory = %ld bytes\n",((long)N1)*((long)N2)*8);

#if defined(USE_ELEMENTAL)
    // create and allocate a global array
    printf ("ndim = %d\n", ndim);
    printf ("dim[0] = %d and dim[1] = %d\n", dims[0], dims[1]);
    ElGlobalArraysCreate_d( eldga, ndim, dims, "A", &g_a);
    printf ("CREATED elemental global array...\n");
    // print distribution
    ElGlobalArraysPrint_d( eldga, g_a );
#else
    g_a = NGA_Create(type, ndim, dims, "A", NULL);

    GA_Zero(g_a);   /* zero the matrix */

    GA_Print_distribution(g_a);
#endif

    if(me == 0) {
//        buf = (double*)(malloc(N1*1024*sizeof(double)));
        buf = (double*)(malloc(N1*128*sizeof(double)));
//        for(j = 0; j < N1*1024; ++j) buf[j] = 1.0;
//        for(i = 0; i < N2/1024; ++i) {
        for(j = 0; j < N1*128; ++j) buf[j] = 1.0;
        for(i = 0; i < N2/128; ++i) {

            lo[0] = 0;
            hi[0] = lo[0] + N1   -1;
            /*
                lo[1] = i*1024;
                hi[1] = lo[1] + 1024 -1;
                ld[0] = 1024;
            */
            lo[1] = i*128;
            hi[1] = lo[1] + 128 -1;
            ld[0] = 128;
            printf("NGA_Acc.%d:  %d:%d %d:%d\n",i,lo[0],hi[0],lo[1],hi[1]);

#if defined(USE_ELEMENTAL)
            ElGlobalArraysAccumulate_d( eldga, g_a, lo, hi, buf, ld, &alpha );
            // there is an explicit flush in NGA_Acc/Put, so when it returns, the buffer
            // can be reused and data has reached the destination
#else
            NGA_Init_fence();
            NGA_Acc(g_a, lo, hi, buf, ld, &alpha);
            NGA_Fence();
#endif
        }
    }

#if defined(USE_ELEMENTAL)
    ElGlobalArraysSync_d( eldga );
    ElGlobalArraysDestroy_d( eldga, g_a );
    ElGlobalArraysTerminate_d( eldga );
    // call el::global arrays destructor
    ElGlobalArraysDestruct_d( eldga );
    ElFinalize();
#else
    GA_Sync();

    GA_Destroy(g_a);

    GA_Terminate();
    MP_FINALIZE();
#endif

    return 0;
}
Пример #8
0
int main(int argc, char**argv)
{
  int nprocs, me;
  int i,j;
  MPI_Init(&argc,&argv);
  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&me);
  GA_Initialize();
  int flag=1;
  int Nx=108; int Ny=108; int Nz = 108;
  Nx+=3; Ny+=3; Nz+=3;
  int data[Nx*Ny*Nz];
  int num_splines = 32;
  int g_a,dims[4]={Nx,Ny,Nz,num_splines},chunk[4]={-1,-1,-1,num_splines};
  int type=C_INT;
  g_a=NGA_Create(type,4,dims,"Coefs",chunk);
  int lo[4],hi[4],ld[3];
  double value=9.0;
  GA_Fill(g_a,&value);
  GA_Print_distribution(g_a);
  fflush(stdout);
  if(me==0)
  {
      for (i=0; i<num_splines; i++) 
      {
          for (j=0; j<Nx*Ny*Nz; j++) 
              data[j] = rand()%1000;
          lo[0]=lo[1]=lo[2]=0;
          hi[0]=Nx-1;hi[1]=Ny-1;hi[2]=Nz-1;
          lo[3]=hi[3]=i%num_splines;
          ld[0]=Ny;ld[1]=Nz;ld[2]=1;
          NGA_Put(g_a,lo,hi,data,ld);
      }
  }
  printf("done\n"),fflush(stdout);
  GA_Sync();
  ga_coefs_t *ga_coefs = malloc(sizeof(ga_coefs_t));
  ga_coefs->Mx = Nx; 
  ga_coefs->My = Ny;
  ga_coefs->Mz = Nz;
  ga_coefs->nsplines = num_splines;
  ga_coefs->g_a=g_a;
  int *coefs1 = (int*)malloc((size_t)1*sizeof(int)*4*4*4*num_splines);
  int ix,iy,iz;
  Nx-=3; Ny-=3; Nz-=3;
  ga_coefs->sumt=ga_coefs->amount=0;
  NGA_Distribution(g_a,me,lo,hi);
  int low[16][4],high[16][4];
  for(i=0;i<nprocs;i++)
      NGA_Distribution(g_a,i,low[i],high[i]);
  srand ( time(NULL) );
  int k;
  for(k=0;k<nprocs;k++)
  {
      ga_coefs->sumt=ga_coefs->amount=0;
      {
          for(i=0;i<1000;i++)
          {
              ix=rand_index(low[k][0],high[k][0]);
              iy=rand_index(low[k][1],high[k][1]);
              iz=rand_index(low[k][2],high[k][2]);
              coefs_ga_get_3d(ga_coefs,coefs1,ix,iy,iz);
              mini_cube_sum(coefs1, ga_coefs->nsplines);
          }
      }
      printf("<%d,%d>\t %lf \t %d \t %lf\n", me,k, ga_coefs->sumt, ga_coefs->amount, ga_coefs->sumt/ga_coefs->amount),fflush(stdout);
  }
  free(coefs1);
  GA_Terminate();
  MPI_Finalize();
  return 0;
}
Пример #9
0
void do_work()
{
int g_a, g_b;
int me=GA_Nodeid(), nproc=GA_Nnodes(), proc, loop;
int dims[NDIM], lo[NDIM], hi[NDIM], block[NDIM], ld[NDIM-1];
int i,d,*proclist, offset;
int adims[NDIM], ndim,type;
typedef struct {
       int lo[NDIM];
       int hi[NDIM];
} patch_t;
patch_t *regions;
int *map;
double *buf;

     /***** create array A with default distribution  *****/
     if(me==0){printf("Creating array A\n"); fflush(stdout);}
     for(i = 0; i<NDIM; i++)dims[i] = N*(i+1);
#ifdef NEW_API
     g_a = GA_Create_handle();
     GA_Set_data(g_a,NDIM,dims,MT_F_DBL);
     GA_Set_array_name(g_a,"array A");
     (void)GA_Allocate(g_a);
#else
     g_a = NGA_Create(MT_F_DBL, NDIM, dims, "array A", NULL);
#endif
     if(!g_a) GA_Error("create failed: A",0); 
     if(me==0)printf("OK\n\n");

     /* print info about array we got */
     NGA_Inquire(g_a, &type, &ndim, adims);
     GA_Print_distribution(g_a);

     GA_Sync();
     /* duplicate array A with ga_create irreg rather than ga_duplicate
      * -- want to show distribution control 
      * -- with ga_duplicate it would be g_b=GA_Duplicate(g_a,name) 
      */
     if(me==0)printf("\nReconstructing distribution description for A\n");

     /* get memory for arrays describing distribution */
     proclist = (int*)malloc(nproc*sizeof(int));
     if(!proclist)GA_Error("malloc failed for proclist",0);
     regions = (patch_t*)malloc(nproc*sizeof(patch_t));
     if(!regions)GA_Error("malloc failed for regions",0);
     map = (int*)malloc((nproc+ndim)*sizeof(int)); /* ubound= nproc+mdim */
     if(!map)GA_Error("malloc failed for map",0);

     /* first find out how array g_a is distributed */
     for(i=0;i<ndim;i++)lo[i]=BASE;
     for(i=0;i<ndim;i++)hi[i]=adims[i] -1 + BASE;
     proc = NGA_Locate_region(g_a, lo, hi, (int*)regions, proclist);
     if(proc<1) GA_Error("error in NGA_Locate_region",proc);

     /* determine blocking for each dimension */
     for(i=0;i<ndim;i++)block[i]=0;
     for(i=0;i<ndim;i++)adims[i]=0;
     
     offset =0;
     for(d=0; d<ndim; d++)
         for(i=0;i<proc;i++)
             if( regions[i].hi[d]>adims[d] ){
                map[offset] = regions[i].lo[d];
                offset++;
                block[d]++;
                adims[d]= regions[i].hi[d];
             }
            
     if(me==0){
        printf("Distribution map contains %d elements\n",offset); 
        print_subscript("number of blocks for each dimension",ndim,block,"\n");
        print_subscript("distribution map",offset,map,"\n\n");
        fflush(stdout);
     }
     
     if(me==0)printf("Creating array B applying distribution of A\n");

#    ifdef USE_DUPLICATE
       g_b = GA_Duplicate(g_a,"array B");
#    else
       g_b = NGA_Create_irreg(MT_F_DBL, NDIM, dims, "array B", block,map);
#    endif
     if(!g_b) GA_Error("create failed: B",0); 
     if(me==0)printf("OK\n\n");
     free(proclist); free(regions); free(map);
     
     GA_Print_distribution(g_b);

     GA_Sync();

     if(me==0){
       printf("\nCompare distributions of A and B\n");
       if(GA_Compare_distr(g_a,g_b))
          printf("Failure: distributions NOT identical\n");
       else 
          printf("Success: distributions identical\n");
       fflush(stdout);
     }
       

     if(me==0){
        printf("\nAccessing local elements of A: set them to the owner process id\n");
        fflush(stdout);
     }
     GA_Sync();

     NGA_Distribution(g_a,me,lo,hi);

     if(hi[0]>=0){/* -1 means no elements stored on this processor */
         double *ptr;
         int locdim[NDIM];
         NGA_Access(g_a, lo,hi, &ptr, ld);
         for(i=0;i<ndim;i++)locdim[i]=hi[i]-lo[i]+1;
         fill_patch(ptr, locdim, ld, ndim,(double)me);
     }

     for(i=0;i<nproc; i++){
       if(me==i && hi[0]>=0){
         char msg[100];
         sprintf(msg,"%d: leading dimensions",me);
         print_subscript(msg,ndim-1,ld,"\n");
         fflush(stdout);
       }
       GA_Sync();
     }
     
     GA_Sync();
     if(me==0)printf("\nRandomly checking the update using ga_get on array sections\n");
     GA_Sync();

     /* show ga_get working and verify array updates 
      * every process does N random gets
      * for simplicity get only a single row at a time
      */
     srand(me); /* different seed for every process */
     hi[ndim-1]=adims[ndim-1] -1 + BASE;
     for(i=1;i<ndim-1; i++)ld[i]=1; ld[ndim-2]=adims[ndim-1] -1 + BASE;

     /* get buffer memory */
     buf = (double*)malloc(adims[ndim-1]*sizeof(double));
     if(!buf)GA_Error("malloc failed for buf",0);

     /* half of the processes check the result */
     if(me<=nproc/2) 
     for(loop = 0; loop< N; loop++){ /* task parallel loop */
         lo[ndim-1]=BASE;
         for (i= 0; i < ndim -1; i ++){
              lo[i] = hi[i] = rand()%adims[i]+BASE; 
         }

         /* print_subscript("getting",ndim,lo,"\n");*/
         NGA_Get(g_a,lo,hi,buf,ld); 
         
         /* check values */
         for(i=0;i<adims[ndim-1]; i++){
             int p = NGA_Locate(g_a, lo);
             if((double)p != buf[i]) {
                char msg[100];
                sprintf(msg,"%d: wrong value: %d != %lf a",me, p, buf[i]);
                print_subscript(msg,ndim,lo,"\n");
                GA_Error("Error - bye",i);  
             }
             lo[ndim-1]++;
          }
     }
             
     free(buf);
     GA_Sync();
           
     if(me==0)printf("OK\n");
     
     GA_Destroy(g_a);
     GA_Destroy(g_b);
}
Пример #10
0
static int test(int shape_idx, int type_idx, int dist_idx)
{
    int type = TYPES[type_idx];
    int *dims = SHAPES[shape_idx];
    int ndim = SHAPES_NDIM[shape_idx];
    mock_ga_t *mock_a, *mock_b, *mock_c, *mock_r;
    int g_a, g_b, g_c;
    int buffer[100];
    int lo[GA_MAX_DIM], hi[GA_MAX_DIM], ld[GA_MAX_DIM], shape[GA_MAX_DIM];
    int result=0, error_index=-1, error_proc=-1;

    mock_a = Mock_Create(type, ndim, dims, "mock", NULL);
    mock_b = Mock_Create(type, ndim, dims, "mock", NULL);
    mock_c = Mock_Create(type, ndim, dims, "mock", NULL);
    mock_r = Mock_Create(type, ndim, dims, "mock", NULL);

    g_a = create_function[dist_idx](type, ndim, dims);
    g_b = create_function[dist_idx](type, ndim, dims);
    g_c = create_function[dist_idx](type, ndim, dims);

    /* create meaningful data for local array */
    mock_data(mock_a, g_a);
    mock_data(mock_b, g_b);

    /* init global array with same data as local array */
    mock_to_global(mock_a, g_a);
    mock_to_global(mock_b, g_b);

    /* call the local routine */
    Mock_Elem_multiply(mock_a, mock_b, mock_c);

    /* call the global routine */
    GA_Elem_multiply(g_a, g_b, g_c);

    /* get the results from the global array */
    global_to_mock(g_c, mock_r);

    /* compare the results */
    result = neq_mock(mock_c, mock_r, &error_index);
    if (0 != result) {
        error_proc = GA_Nodeid();
    }
    /* make sure all procs get same result so they can die gracefully */
    GA_Igop(&result, 1, "+");
    /* if error occured, find the highest failing node ID */
    GA_Igop(&error_proc, 1, "max");
    /* clear the error index for all but the highest failing node ID */
    if (error_proc != GA_Nodeid()) {
        error_index = 0;
    }
    /* make sure all procs get the error index on the highest failing node ID */
    GA_Igop(&error_index, 1, "+");
    if (0 != result) {
        if (error_proc == GA_Nodeid()) {
            printf("ERROR: local result failed to compare to global result\n");
            printf("\terror_proc=%d\n", error_proc);
            printf("\terror_index=%d\n", error_index);
            printf("***LOCAL RESULT***\n");
            Mock_Print(mock_a);
            printf("***GLOBAL RESULT***\n");
            Mock_Print(mock_r);
            printf("\tprinting array distribution\n");
        }
        GA_Sync();
        GA_Print(g_a);
        GA_Print_distribution(g_a);
        return 1;
    }

    /* clean up */
    Mock_Destroy(mock_a);
    Mock_Destroy(mock_r);
    GA_Destroy(g_a);

    return 0;

}