Esempi in C++ (Cpp) per NGA_Create

Esempio n. 1

0

Mostra file

File: ga_elem_maximumpatch.c Progetto: jeffhammond/ga

main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, g_B, g_C, local_C[DIM][DIM], dims[DIM]={5,5};
  int val_A=5, val_B=3, ld=DIM, max; 
  int lo[DIM]={2,2}, hi[DIM]={4,4}, blo[DIM]={0,0}, bhi[DIM]={2,2}, clo[DIM]={1,1}, chi[DIM]={3,3};

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
  
  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  g_B = NGA_Create(C_INT, DIM, dims, "array_B", NULL);
  g_C = NGA_Create(C_INT, DIM, dims, "array_C", NULL);

  GA_Fill(g_A, &val_A);
  GA_Fill(g_B, &val_B);
  GA_Zero(g_C);
  GA_Elem_maximum_patch(g_A, lo, hi, g_B, blo, bhi, g_C, clo, chi);
  GA_Print(g_C);
  GA_Sync();
  
  NGA_Get(g_C, clo, chi, local_C, &ld);
  if(rank==1)
    {
  
      for(i=0; i<DIM; i++)
	{
	  for(j=0; j<DIM; j++)printf("%d ", local_C[i][j]);
	  printf("\n");
	}
      
      if(val_A>val_B) max=val_A;
      else max=val_B;

      for(i=0; i<DIM; i++)
	{
	  for(j=0; j<DIM; j++)
	    if(local_C[i][j]!=max) printf("GA Error : \n");
	}
      
    }
    
  GA_Sync();
  if(rank == 0)
    printf("Test Completed \n");

  GA_Terminate();
  MPI_Finalize();

}

Esempio n. 2

0

Mostra file

File: ga_solve.c Progetto: ryancoleman/lotsofcoresbook1code

main(int argc, char **argv)
{
    int rank, nprocs, i, j;
    int g_A, g_B;
    int dims[MAX_DIM], val=4, ndim, re;


    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

    MA_init(C_INT, 1000, 1000);
    GA_Initialize();

    for(i=1; i<=MAX_DIM; i++)
    {
        ndim=i;
        dims[i]=SIZE;
        //      for(j=0; j<ndim; j++)

        g_A = NGA_Create(C_INT, ndim, dims, "array_A", NULL);
        g_B = NGA_Create(C_INT, ndim, dims, "array_B", NULL);

        if(!g_A)
            GA_Error("GA Error: no global array exists \n", ndim);
        if(!g_B)
            GA_Error("GA Error: no global array exists \n", ndim);
    }

    GA_Sync();

    GA_Fill(g_A, &val);
    re=GA_Solve(g_A, g_B);

    if(re==0)
        printf("Cholesky Fact is Successful \n");
    else if (re >0)
        printf("Cholesky Fact couldn't be completed \n");
    else
        printf("An Error occured\n");
    if(rank == 0)
        GA_PRINT_MSG();

    GA_Destroy(g_A);
    GA_Destroy(g_B);
    GA_Terminate();
    MPI_Finalize();
}

Esempio n. 3

0

Mostra file

File: ga_intialize.c Progetto: jeffhammond/ga

int main(int argc, char **argv)
{
  int rank, nprocs, n=1;
  int g_A, dims[DIM]={5,5};

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
  
  printf("check %d \n", n);  

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  if(GA_Ndim(g_A)!=DIM)
    printf("ERROR: GA_Ndim didnt return nDimension after GA_Initialize\n");
  printf("%d : %d \n", rank, GA_Ndim(g_A));

  GA_Terminate();

  if(rank==0)
    printf(" GA: Test Completed \n");
  MPI_Finalize();

  return 0;
}

Esempio n. 4

0

Mostra file

File: ga_create_irreg2.c Progetto: jeffhammond/ga

irregular_array2(int rank)
{

  int g_A, g_B; 
  int dims[DIM]={GSIZE,GSIZE}, dims2[DIM], block[DIM]={3,2}, map[5]={0,2,6,0,4}, val_A=4, val_B=7;
  int n_block[DIM], block_dims[DIM], i;

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  g_B = NGA_Create_irreg(C_INT, DIM, dims, "array_B", block, map);


  GA_Fill(g_A, &val_A);
  GA_Print(g_A);

  GA_Fill(g_B, &val_B);
  GA_Print(g_B);
  GA_Sync();

  /*
  GA_Get_block_info(g_B, n_block, block_dims);
  for(i=0; i<DIM; i++)
    printf(" %d:  %d ___ %d --- \n", rank, n_block[i], block_dims[i]);
  */

  GA_Destroy(g_A);
  GA_Destroy(g_B);
}

Esempio n. 5

0

Mostra file

File: ga_create_irreg.c Progetto: jeffhammond/ga

irregular_array1(int rank)
{

  int g_A, g_B; 
  int dims[DIM]={5,10}, dims2[DIM], ndim, type, value=5, block[DIM]={2,3}, map[5]={0,2,0,4,6}, val=7;
  int n_block[DIM], block_dims[DIM], i;

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  g_B = NGA_Create_irreg(C_INT, DIM, dims, "array_B", block, map);

  GA_Fill(g_A, &value);
  GA_Print(g_A);

  GA_Fill(g_B, &val);
  GA_Print(g_B);
  GA_Sync();

  NGA_Inquire(g_A, &type, &ndim, dims2);
  //printf(" %d -- %d,,\n", type, ndim);

  /*
  GA_Get_block_info(g_B, n_block, block_dims);
  for(i=0; i<DIM; i++)
    printf(" %d:  %d ___ %d --- \n", rank, n_block[i], block_dims[i]);
  */

  GA_Destroy(g_A);
  GA_Destroy(g_B);
}

Esempio n. 6

0

Mostra file

File: ga_fillpatch.c Progetto: jeffhammond/ga

fillandscale(int rank, int nprocs)
{
  int g_A,  val1=5, val2=5, local_A[SIZE][SIZE], i, j; 
  int dims[DIM]={SIZE,SIZE}, alo[DIM]={1,1}, ahi[DIM]={2,2}, ld=5;

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  GA_Zero(g_A);
  NGA_Fill_patch(g_A, alo, ahi, &val1);
  GA_Print(g_A);

  GA_Scale(g_A, &val2);
  GA_Print(g_A);

  NGA_Get(g_A, alo, ahi, local_A, &ld);

  if(rank == 1)
    {
      for(i=0; i<DIM; i++)
	{
	  for(j=0; j<DIM; j++) if(local_A[i][j]!=val1*val2)
	    printf(" GA ERROR: \n");
	}
    }
  GA_Destroy(g_A);
}

Esempio n. 7

0

Mostra file

File: nga_access.c Progetto: niuqingpeng/microbenchmark

int main(int argc, char** argv) 
{ 
    int nprocs,myid,nprocssq; 
    int dims[2],chunk[2]; 
    int i,j,k; 
    int stack = 100000, heap = 100000; 
    MPI_Init(&argc,&argv); 
    GA_Initialize(); 
    MA_init(C_DBL,stack,heap); 
    nprocssq = GA_Nnodes(); 
    nprocs = sqrt(nprocssq); 
    myid = GA_Nodeid(); 
    dims[0] = N; dims[1] = N; 
    chunk[0] = N/nprocs; 
    chunk[1] = N/nprocs; 
    int g_a = NGA_Create(C_DBL,2,dims,"Array A",chunk); 
    int lo[2],hi[2]; 
    NGA_Distribution(g_a,myid,lo,hi); 
    int ld[1] = {N/nprocs}; 
    void *ptr; 
    double *local; 
    printf("Myid = %d, lo = [%d,%d] , hi = [%d,%d] , ld = %d \n",myid,lo[0],lo[1],hi[0],hi[1],ld[0]); 
    NGA_Access(g_a,lo,hi,&ptr,ld); 
    local = (double*) ptr; 
    printf("Myid = %d , local[0][0] = %f\n",*local); 
    GA_Sync(); 
    GA_Destroy(g_a); 
    GA_Terminate(); 
    MPI_Finalize(); 
    return 0; 
}

Esempio n. 8

0

Mostra file

File: util_gnxtval.c Progetto: jnafziger/pdft-nwchem

Integer util_gnxtval_(Integer *val) {

    if(*val > 0) {
       if(!initialized) ga_error("nxtval: not yet initialized", 0L);
       return (Integer) NGA_Read_inc(g_T, &subscript, 1);
    }
    else if(*val==0) {
       int n = 1;
       initialized=1;

       /* create task array */
       g_T = NGA_Create(C_LONG, 1, &n,"Atomic Task", NULL);
       
       /* Initialize the task array */
       if(GA_Nodeid()==0) {
	  int lo=0, hi=0;
	  NGA_Put (g_T, &lo, &hi, &initval, &hi);
	  initval=0;
       }
              GA_Sync();
       return 0;
    }
    else if (*val < 0) { GA_Destroy(g_T); initialized=0; initval=0; return 0;}
    
    ga_error("nxtval: invalid value passed", 0L);
    return -1;
}

Esempio n. 9

0

Mostra file

File: ga_scale_rows.c Progetto: jeffhammond/ga

main(int argc, char **argv)
{
  int rank, nprocs;
  int g_A, g_V,  val1=5, val2=5, local_A[SIZE][SIZE], dims_V=SIZE, local_V[dims_V]; 
  int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={1,1}, hi[DIM]={2,2}, ld=5, i, j;
  int loV=0, hiV=dims_V-1;

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  g_V = NGA_Create(C_INT, 1, &dims_V, "array_A", NULL);
  GA_Fill(g_A, &val1);
  GA_Print(g_A);
  printf("\n");

  GA_Scale(g_A, &val2);
  GA_Print(g_A);

  GA_Get_diag(g_A, g_V);
  GA_Print(g_V);
  
  NGA_Get(g_A, lo, hi, local_A, &ld);
  NGA_Get(g_V, &loV, &hiV, local_V, &ld);

  if(rank==1)
    {
      for(i=0; i<dims_V; i++)
	if(local_V[i]!=val1*val2) printf(" GA Error: \n");
    }

  if(rank == 0)
    printf("Test Completed \n");

  GA_Terminate();
  MPI_Finalize();
}

Esempio n. 10

0

Mostra file

File: print.c Progetto: jeffhammond/ga

int main(int argc, char **argv)
{
    int size_dst = 15;
    int g_a = 0;
    int I_NEG_ONE = -1;
    long L_NEG_ONE = -1;
    long long LL_NEG_ONE = -1;
    int FIVE = 5;
    int TEN = 10;
    int lo;
    int hi;
    int *ptr;
    int i;

    MP_INIT(argc,argv);

    GA_INIT(argc,argv);

    for (i=0; i<3; ++i) {
        if (0 == i) {
            g_a = NGA_Create(C_INT, 1, &size_dst, "dst", NULL);
            GA_Fill(g_a, &I_NEG_ONE);
        } else if (1 == i) {
            g_a = NGA_Create(C_LONG, 1, &size_dst, "dst", NULL);
            GA_Fill(g_a, &L_NEG_ONE);
        } else if (2 == i) {
            g_a = NGA_Create(C_LONGLONG, 1, &size_dst, "dst", NULL);
            GA_Fill(g_a, &LL_NEG_ONE);
        }
        GA_Sync();
        GA_Print(g_a);
        NGA_Print_patch(g_a, &FIVE, &TEN, 0);
        NGA_Print_patch(g_a, &FIVE, &TEN, 1);
        NGA_Distribution(g_a, GA_Nodeid(), &lo, &hi);
        NGA_Access(g_a, &lo, &hi, &ptr, NULL);
        printf("[%d] (%d)=%d\n", GA_Nodeid(), lo, *ptr);
        NGA_Release(g_a, &lo, &hi);
    }

    GA_Terminate();
    MP_FINALIZE();
    exit(EXIT_SUCCESS);
}

Esempio n. 11

0

Mostra file

File: ga_gather3.c Progetto: jeffhammond/ga

main(int argc, char **argv)
{
  int rank, nprocs;
  int g_A, dims[D]={SIZE,SIZE}, *local_A=NULL, *local_G=NULL, **sub_array=NULL, **s_array=NULL;
  int i, j, value=5;
  
  MPI_Init(&argc, &argv);
  GA_Initialize();
  MA_init(C_INT, 1000, 1000);
  
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  s_array=(int**)malloc(N*sizeof(int*));
  for(i=0; i<N; i++)
    {
      s_array[i]=(int*)malloc(D*sizeof(int));
      for(j=0; j<D; j++) s_array[i][j]=rand()%10;
    }

  sub_array=(int**)malloc(N*sizeof(int*));
  for(i=0; i<N; i++)
    {
      sub_array[i]=(int*)malloc(D*sizeof(int));
      for(j=0; j<D; j++) sub_array[i][j]=rand()%10;
    }

  for(i=0; i<N; i++) local_A=(int*)malloc(N*sizeof(int));

  for(i=0; i<N; i++) local_G=(int*)malloc(N*sizeof(int));
  
  g_A=NGA_Create(C_INT, D, dims, "array_A", NULL);
  GA_Fill(g_A, &value);
  GA_Sync();
                                                   
  NGA_Scatter(g_A, local_A, s_array, N);
  NGA_Gather(g_A, local_G, s_array, N);
  GA_Sync();
  GA_Print(g_A);

  if(rank==0)
    {
      for(i=0; i<N; i++)
	if(local_G[i]!=local_A[i]) printf("GA Error: \n");
    }
  GA_Sync();
  if(rank==0)
    GA_PRINT_MSG();

  GA_Terminate();
  MPI_Finalize();
  return 0;
}

Esempio n. 12

0

Mostra file

File: lennard-jones_ga2.c Progetto: fuentesdt/tao-1.10.1-p3

/**
 * Block Topology (of Force Matrix): 
 * Say for example: If there are 4 block and 100 atoms, the size of 
 * the force matrix is 100x100 and each block size is 50x50. 
 * 
 *  -----------
 * |     |     |
 * | 0,0 | 0,1 |
 *  -----------
 * | 1,0 | 1,1 |
 * |     |     |
 *  -----------
 */
int SetupBlocks(AppCtx *user)
{
  int i,j,k=0;
  int n;
  int zero = 0;
  int x_space, g_space;

  if (user->natoms % user->BlockSize) {
    GA_Error("Number of atoms should be a multiple of block size. Choose a different block size.", 0L);
  }

  n = user->natoms / user->BlockSize;
  user->nBlocks = n*n;

  if (user->nBlocks > MAX_BLOCKS) 
    GA_Error("Number of blocks is greater that MAX_BLOCKS: Solution is either to increase the defined MAX_BLOCKS or increase your block size",0L);

  if (user->nBlocks < user->nproc)
    GA_Error("Number of blocks should be greater than or equal to the number of processors",0L);

  
  for (i=0;i<n;i++)
    for (j=0;j<n;j++,k++) {
      user->btopo[k].x = i;
      user->btopo[k].y = j;
    }
  
  /* Create task array */
  n = 1;
  user->atomicTask = NGA_Create(C_INT, 1, &n, "Atomic Task", NULL);
  if (!user->atomicTask)
    GA_Error("NGA_Create failed for Atomic Task",0);

  if (user->me == 0) 
    NGA_Put(user->atomicTask, &zero, &zero, &user->nproc, &zero);
  
  
  /* space for x values from two processors */
  x_space = 2 * user->BlockSize * user->ndim;
  /* space for ALL gradient value */
  g_space = user->natoms * user->ndim; 
             

  if (MA_push_stack(C_DBL, x_space + g_space+3, "GA LJ bufs", &user->memHandle))
    MA_get_pointer(user->memHandle, &user->x1);
  else
    GA_Error("ma_alloc_get failed",x_space + g_space);
  
  user->x2  = user->x1 + x_space/2 + 1;
  user->grad = user->x2 + x_space/2 + 1;
  GA_Sync();
  return 0;
}

Esempio n. 13

0

Mostra file

File: ga_dot.c Progetto: jeffhammond/ga

integer_dot(int rank, int nprocs)
{
  
  int g_A, g_B;
  int dims[DIM]={SIZE,SIZE}, val_A=5, val_B=10, op;
  
  MA_init(C_INT, 1000, 1000);
  
  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  g_B = NGA_Create(C_INT, DIM, dims, "array_B", NULL);
  
  GA_Fill(g_A, &val_A);
  GA_Fill(g_B, &val_B);
  
  if(rank==0)
    {
      op=GA_Idot(g_A, g_B);
      
      printf("%d \n", op);
    }
      
}

Esempio n. 14

0

Mostra file

File: ga_ndim2.c Progetto: jeffhammond/ga

verify_ga_dim(int ndim)
{
  int g_A, dims[ndim], i;

  for(i=0; i<ndim; i++) dims[i]=SIZE;

  g_A = NGA_Create(C_INT, ndim, dims, "array_A", NULL);

  if(GA_Ndim(g_A) != ndim)
    printf("ERROR: GA_Ndim -- %d returned wrong \n", ndim);

  GA_Destroy(g_A);
 }

Esempio n. 15

0

Mostra file

File: ga_get.c Progetto: jeffhammond/ga

main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, **local_A=NULL, **local_B=NULL; 
  int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=5, value=5;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);
  GA_Initialize();

  local_A=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    {
      local_A[i]=(int*)malloc(SIZE*sizeof(int));
      for(j=0; j<SIZE; j++) local_A[i][j]=rand()%10;
    }

  local_B=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    {
      local_B[i]=(int*)malloc(SIZE*sizeof(int));
      for(j=0; j<SIZE; j++) local_B[i][j]=rand()%10;
    }

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  GA_Zero(g_A);
  
  if(rank==0)
    {
      NGA_Put(g_A, lo, hi, local_A, &ld);
      NGA_Get(g_A, lo, hi, local_B, &ld);

      for(i=0; i<SIZE; i++)
	{
	  for(j=0; j<SIZE; j++)
	    if(local_A[i][j]!=local_B[i][j]) GA_ERROR_MSG();
	}
    }
  
  GA_Sync();
  GA_Destroy(g_A);
  
  if(rank == 0) GA_PRINT_MSG();

  GA_Terminate();
  MPI_Finalize();
}

Esempio n. 16

0

Mostra file

File: ga_transpose3.c Progetto: jeffhammond/ga

main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, g_B, **local_value=NULL;

  int dims[DIM]={SIZE,SIZE}, lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE;

  local_value=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    local_value[i]=(int*)malloc(SIZE*sizeof(int));

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  g_B = NGA_Create(C_INT, DIM, dims, "array_B", NULL);

  for(i=0; i<SIZE; i++)
    for(j=0; j<SIZE; j++)
      local_value[i][j]=rand()%10;
	
  if(rank==0) NGA_Put(g_A, lo, hi, local_value, &ld);
  GA_Transpose(g_A, g_B);
  if(rank==0) validate_transpose(g_A, g_B, lo, hi, ld);

  GA_Sync();
  if(rank == 1) GA_PRINT_MSG();
  
  GA_Terminate();
  MPI_Finalize();
}

Esempio n. 17

0

Mostra file

File: ga_add_patch.c Progetto: jeffhammond/ga

main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, g_B, g_C, local_C[DIM][DIM], dims[DIM]={5,5}, val1=5, val2=4, alpha=3, beta=2, ld=5;
  int alo[DIM]={2,2}, ahi[DIM]={3,3}, blo[DIM]={2,2}, bhi[DIM]={3,3}, clo[DIM]={1,1}, chi[DIM]={2,2};

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
  
  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);

  g_B = GA_Duplicate(g_A, "array_B");
  g_C = GA_Duplicate(g_A, "array_C");

  GA_Fill(g_A, &val1);
  GA_Fill(g_B, &val2);
  GA_Zero(g_C);

  NGA_Add_patch(&alpha, g_A, clo, chi, &beta, g_B, blo, bhi, g_C, clo, chi);

  GA_Sync();
  GA_Print(g_A);
  GA_Print(g_B);
  GA_Print(g_C);

  NGA_Get(g_C, clo, chi, local_C, &ld);

  //printf("check 1 \n");

  for(i=0; i<DIM; i++)
    {
      for(j=0; j<DIM; j++)printf("%d ", local_C[i][j]);
      printf("\n");
    }
  
  if(rank == 0)
    {
      printf("check 2\n");
    
      for(i=0; i<DIM; i++)
	{
	  for(j=0; j<DIM; j++)
	    if(local_C[i][j]!=(alpha*val1)+(beta*val2)) printf("GA Error : \n");
	}
    }
  
  if(rank==0)
    GA_PRINT_MSG();

  GA_Sync();

  /*
  GA_Destroy(g_A);
  GA_Destroy(g_B);
  GA_Destroy(g_C);
  */

  //*******************************************************************

  /* what would be the possible reason for GA_destroy to get failed .., 
   * solve this before consolidate the whole
   */

  GA_Terminate();
  MPI_Finalize();
 
}

Esempio n. 18

0

Mostra file

File: lennard-jones_ga.c Progetto: fuentesdt/tao-1.10.1-p3

int main (int argc, char **argv)
{
  double startTime;
  int info;			/* used to check for functions returning nonzeros */
  GAVec ga_x;        		/* solution vector */
  TAO_SOLVER tao;		/* TAO_SOLVER solver context */
  TAO_GA_APPLICATION taoapp;	/* TAO application context */
  TaoTerminateReason reason;
  AppCtx user;			/* user-defined application context */

  /*initialize GA and MPI */
  int heap = 4000, stack = 4000;
  MPI_Init (&argc, &argv);	/* initialize MPI */
  GA_Initialize ();		/* initialize GA */
  if (!MA_init(MT_F_DBL, stack, heap))
    GA_Error((char*)"MA_init failed", stack+heap);

  /* Initialize TAO */
  TaoInitialize (&argc, &argv, (char *) 0, help);

  startTime = MPI_Wtime();

  /* Initialize problem parameters */
  user.natoms = NATOMS;
  user.ndim = NDIM;
  user.n = user.natoms*user.ndim;

  /* Create working space */
  if (MA_push_stack(C_DBL, 2*user.n, "Vector buffers", &user.memHandle) == MA_FALSE)
    GA_Error((char*)"MAIN::ma_alloc_get failed",2*user.n);

  /* Allocate Global Array vector for the solution */
  int dims[2];
  dims[0] = user.n;
  ga_x = NGA_Create (C_DBL, 1, dims, (char*)"GA_X", NULL);
  if (!ga_x) GA_Error ((char*)"lennard-jones.main::NGA_Create ga_x", ga_x);

  /* The TAO code begins here */
  /* Create TAO solver with desired solution method */
  info = TaoCreate (MPI_COMM_WORLD, "tao_cg", &tao); CHKERRQ(info);
  info = TaoGAApplicationCreate (MPI_COMM_WORLD, &taoapp); CHKERRQ(info);

  /* Set initial vector */
  info = InitializeVariables(ga_x, &user); CHKERRQ(info);
  info = TaoGAAppSetInitialSolutionVec(taoapp, ga_x); CHKERRQ(info);

  /* Set routines for function, gradient */
  info = TaoGAAppSetObjectiveAndGradientRoutine (taoapp, FormFunctionGradient, (void *) &user); 
  CHKERRQ(info);

  /* Check for TAO command line options */
  info = TaoSetFromOptions (tao); CHKERRQ(info);

  /* SOLVE THE APPLICATION */
  info = TaoSolveGAApplication (taoapp, tao); CHKERRQ(info);

  /*  To View TAO solver information use */
  info = TaoView(tao); CHKERRQ(info);

  /* Get termination information */
  info = TaoGetTerminationReason (tao, &reason); CHKERRQ(info);

  if (reason <= 0)
    printf("Try a different TAO method, adjust some parameters, or check the function evaluation routines\n");

  printf("TIME TAKEN = %lf\n", MPI_Wtime()-startTime);

  /*output the solutions */ 
  printf ("The solution is :\n");
  GA_Print (ga_x);

  /* Free TAO data structures */
  info = TaoDestroy (tao); CHKERRQ(info);
  info = TaoGAAppDestroy (taoapp); CHKERRQ(info);

  /* Free GA data structures */
  GA_Destroy (ga_x);
  if (!MA_pop_stack(user.memHandle))
    GA_Error((char*)"Main::MA_pop_stack failed",0);

  /* Finalize TAO, GA, and MPI */
  TaoFinalize ();
  GA_Terminate ();
  MPI_Finalize ();

  return 0;
}

Esempio n. 19

0

Mostra file

File: simple_groups_commc.c Progetto: jeffhammond/ga

int main(int argc, char **argv) {
    int me;
    int g_a;
    int status;
    int i,j;
    int dims[] = {n,n};
    int proc_group[PROC_LIST_SIZE],proclist[PROC_LIST_SIZE],inode;
    int sbuf[1],rbuf[1];
    MPI_Comm comm;

    MP_INIT(argc,argv);
    GA_Initialize();
    me = GA_Nodeid();

    status = MA_init(MT_DBL, 100000, 100000);
    if (!status) GA_Error("ma_init failed",-1);
    status = MA_set_auto_verify(1);
    status = MA_set_hard_fail(1);
    status = MA_set_error_print(1);

    inode = GA_Cluster_nodeid();
    if (me == 0) {
        printf("there are %d nodes, node 0 has %d procs\n",
                GA_Cluster_nnodes(), GA_Cluster_nprocs(0));
        fflush(stdout);
    }
    GA_Sync();
    for (i=0; i<GA_Cluster_nnodes(); ++i) {
        for (j=0; j<GA_Cluster_nprocs(i); ++j) {
            proclist[j]=GA_Cluster_procid(i,j);
        }
        proc_group[i]=GA_Pgroup_create(proclist,GA_Cluster_nprocs(i));
    }
    GA_Sync();
    for (i=0; i<GA_Cluster_nnodes(); ++i) {
        if (i == inode) {
            printf("%d joining group %d\n", me, proc_group[inode]);
            GA_Pgroup_set_default(proc_group[inode]);
            g_a = NGA_Create(C_DBL, 2, dims, "a", NULL);
            if (!g_a) GA_Error("NGA_Create failed",-1);
            printf("%d Created array of  group %d as proc no. %d\n",
                    me, proc_group[inode], GA_Nodeid());
            GA_Print_distribution(g_a);
            comm = GA_MPI_Comm_pgroup_default();
            if (comm != MPI_COMM_NULL) {
                sbuf[0] = GA_Nodeid();
                status = MPI_Allreduce(sbuf, rbuf, 1, MPI_INT, MPI_MAX, comm);
                printf("%d max nodeid is %d\n", me, rbuf[0]);
                if ((rbuf[0]+1) != GA_Cluster_nprocs(i)) {
                    GA_Error("MPI_Allreduce failed",1);
                }
            }
            else {
                printf("MPI_Comm was null!\n");
            }
            GA_Pgroup_set_default(GA_Pgroup_get_world());
        }
        GA_Sync();
    }

    GA_Terminate();
    MP_FINALIZE();

    return 0;
}

Esempio n. 20

0

Mostra file

File: ga_gather2.c Progetto: jeffhammond/ga

main(int argc, char **argv)
{
  int rank, nprocs;
  int g_A, dims[D]={5,10}, local_A[N], local_G[N], **sub_array=NULL, **s_array=NULL;
  int i, j, value=5;
  
  MPI_Init(&argc, &argv);
  GA_Initialize();
  MA_init(C_INT, 1000, 1000);
  
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  s_array=(int**)malloc(N*sizeof(int*));
  for(i=0; i<N; i++)
    {
      s_array[i]=(int*)malloc(D*sizeof(int));
      for(j=0; j<D; j++) s_array[i][j]=rand()%5;
    }

  sub_array=(int**)malloc(N*sizeof(int*));
  for(i=0; i<N; i++)
    {
      sub_array[i]=(int*)malloc(D*sizeof(int));
      for(j=0; j<D; j++) sub_array[i][j]=rand()%5;
    }


  for(i=0; i<N; i++)
    //local_A=(int*)malloc(N*sizeof(int));
  
  /*
   * depends on the value of array ..we can generate the location values in randon 
   * we can also use the if-condition
   */
  
  // PRINTing all the genrated array for reference 

  for(i=0; i<N; i++)
    {
      for(j=0; j<D; j++)printf("%d ",s_array[i][j]);
      printf("\n");
    } 

  printf("\n");
  for(i=0; i<N; i++)
    {
      for(j=0; j<D; j++)printf("%d ",sub_array[i][j]);
      printf("\n");
    } 

  printf("\n");
  for(i=0; i<N; i++)printf("%d \n",local_A[i]=rand()%5+1);

  //   PRINT done - now creating array 
  g_A=NGA_Create(C_INT, D, dims, "array_A", NULL);
  GA_Fill(g_A, &value);
  GA_Sync();
                                                   
  NGA_Scatter(g_A, local_A, s_array, N);
  NGA_Gather(g_A, local_G, s_array, N);
  GA_Sync();
  GA_Print(g_A);

  for(i=0; i<N; i++)printf("%d \n",local_G[i]);
  printf("\n");

  if(rank==0)
    {
      for(i=0; i<N; i++)
	if(local_G[i]!=local_A[i]) printf("GA Error: \n");
    }
  GA_Sync();
  if(rank==0)
    GA_PRINT_MSG();

  GA_Terminate();
  MPI_Finalize();
  return 0;
}

Esempio n. 21

0

Mostra file

File: ga-mpi.c Progetto: jeffhammond/ga

void do_work()
{
int ZERO=0;   /* useful constants */
int g_a, g_b;
int n=N, ndim=2,type=MT_F_DBL,dims[2]={N,N},coord[2];
int me=GA_Nodeid(), nproc=GA_Nnodes();
int row, i, j;
 int lo[2], hi[2];

/* Note: on all current platforms DoublePrecision = double */
DoublePrecision buf[N], *max_row=NULL;

MPI_Comm WORLD_COMM;
MPI_Comm ROW_COMM;
int ilo,ihi, jlo,jhi, ld, prow, pcol;
int root=0, grp_me=-1;

     WORLD_COMM = GA_MPI_Comm_pgroup_default();

     if(me==0)printf("Creating matrix A\n");
     dims[0]=n; dims[1]=n;
     g_a = NGA_Create(type, ndim, dims, "A", NULL);
     if(!g_a) GA_Error("create failed: A",n); 
     if(me==0)printf("OK\n");
     
     if(me==0)printf("Creating matrix B\n");
     dims[0]=n;
     g_b = NGA_Create(type, 1, dims, "B", NULL);
     if(!g_b) GA_Error("create failed: B",n); 
     if(me==0)printf("OK\n");
     
     GA_Zero(g_a);   /* zero the matrix */
     
     if(me==0)printf("Initializing matrix A\n");
     /* fill in matrix A with values: A(i,j) = (i+j) */ 
     for(row=me; row<n; row+= nproc){
    /**
     * simple load balancing: 
     * each process works on a different row in MIMD style 
     */ 
    for(i=0; i<n; i++) buf[i]=(DoublePrecision)(i+row+1); 
    lo[0]=hi[0]=row;
    lo[1]=ZERO;  hi[1]=n-1; 
    NGA_Put(g_a, lo, hi, buf, &n); 
     }
     
     /* GA_print(&g_a);*/
     NGA_Distribution(g_a, me, lo, hi);
     ilo=lo[0]; ihi=hi[0];
     jlo=lo[1]; jhi=hi[1];
     
     GA_Sync(); 
     if(ihi-ilo+1 >0){
        max_row=(DoublePrecision*)malloc(sizeof(DoublePrecision)*(ihi-ilo+1));
        if (!max_row) GA_Error("malloc 3 failed",(ihi-ilo+1));
        for (i=0; i<(ihi-ilo+1); i++) {
            max_row[i] = 0.0;
        }
     }
     NGA_Proc_topology(g_a, me, coord);  /* block coordinates */
     prow = coord[0];
     pcol = coord[1];

     if(me==0)printf("Splitting comm according to distribution of A\n");
     
     /* GA on SP1 requires synchronization before & after message-passing !!*/
     GA_Sync(); 
     
     if(me==0)printf("Computing max row elements\n");
     /* create communicator for processes that 'own' A[:,jlo:jhi] */
     MPI_Barrier(WORLD_COMM);
     if(pcol < 0 || prow <0)
    MPI_Comm_split(WORLD_COMM,MPI_UNDEFINED,MPI_UNDEFINED, &ROW_COMM);
     else
    MPI_Comm_split(WORLD_COMM, (int)pcol, (int)prow, &ROW_COMM);
     
     if(ROW_COMM != MPI_COMM_NULL){
    double *ptr;
    MPI_Comm_rank(ROW_COMM, &grp_me);
    
    /* each process computes max elements in the block it 'owns' */
    lo[0]=ilo; hi[0]=ihi;
    lo[1]=jlo; hi[1]=jhi;
    NGA_Access(g_a, lo, hi, &ptr, &ld);
    for(i=0; i<ihi-ilo+1; i++){
       for(j=0; j<jhi-jlo+1; j++)
          if(max_row[i] < ptr[i*ld + j]){
         max_row[i] = ptr[i*ld + j];
          }
    }
    MPI_Reduce(max_row, buf, ihi-ilo+1, MPI_DOUBLE, MPI_MAX,
           root, ROW_COMM);
    
     }else fprintf(stderr,"process %d not participating\n",me);
     GA_Sync(); 
     
     /* processes with rank=root in ROW_COMM put results into g_b */
     ld = 1;
     if(grp_me == root) {
    lo[0]=ilo;  hi[0]=ihi;
    NGA_Put(g_b, lo, hi, buf, &ld); 
     }
        
     GA_Sync();

     if(me==0)printf("Checking the result\n");
     if(me==0){
    lo[0]=ZERO; hi[0]=n-1;
        NGA_Get(g_b, lo, hi, buf, &n); 
        for(i=0; i< n; i++)if(buf[i] != (double)n+i){
            fprintf(stderr,"error:%d max=%f should be:%d\n",i,buf[i],n+i);
            GA_Error("terminating...",1);
        }
     }
     
     if(me==0)printf("OK\n");

     GA_Destroy(g_a);
     GA_Destroy(g_b);
}

Esempio n. 22

0

Mostra file

File: ga_lu.c Progetto: dmlb2000/nwchem-cml

/* input is matrix size */
void ga_lu(double *A, int matrix_size) 
{
    int g_a, g_b, dims[2], type=C_DBL;
    int lo[2], hi[2], ld;
    int block_size[2], proc_grid[2];
    double time, gflops;
    
    /* create a 2-d GA (global matrix) */
    dims[0] = matrix_size;
    dims[1] = matrix_size;
    block_size[0] = BLOCK_SIZE;
    block_size[1] = BLOCK_SIZE;
#ifdef USE_SCALAPACK_DISTR
    proc_grid[0] = 2;
    proc_grid[1] = nprocs/2;
    if(nprocs%2) GA_Error("For ScaLAPACK stle distribution, nprocs must be "
                         " divisible by 2", 0);
#endif
    
    
#ifndef BLOCK_CYCLIC
    g_a = NGA_Create(type, 2, dims, "A", NULL);
    g_b = GA_Duplicate(g_a, "transposed array B");
#else
    g_a = GA_Create_handle();
    GA_Set_data(g_a, 2, dims, type);
    GA_Set_array_name(g_a,"A");
#  ifdef USE_SCALAPACK_DISTR
    GA_Set_block_cyclic_proc_grid(g_a, block_size, proc_grid);
#  else
    GA_Set_block_cyclic(g_a, block_size);    
#  endif
    GA_Allocate(g_a);
    
    g_b = GA_Create_handle();
    GA_Set_data(g_b, 2, dims, type);
    GA_Set_array_name(g_b,"B");
#  ifdef USE_SCALAPACK_DISTR
    GA_Set_block_cyclic_proc_grid(g_b, block_size, proc_grid);
#  else
    GA_Set_block_cyclic(g_b, block_size);
#  endif
    GA_Allocate(g_b);
    
#endif
    
    /* copy the local matrix into GA */
    if(me==0) 
    {
       lo[0] = 0;
       hi[0] = matrix_size - 1;
       lo[1] = 0;
       hi[1] = matrix_size - 1;
       ld    = matrix_size;
       
       NGA_Put(g_a, lo, hi, A, &ld);
    }
    GA_Sync();

    GA_Transpose(g_a, g_b);
    time = CLOCK_();
    GA_Lu('n', g_b);
    time = CLOCK_() - time;

    /* 2/3 N^3 - 1/2 N^2 flops for LU and 2*N^2 for solver */
    gflops = ( (((double)matrix_size) * matrix_size)/(time*1.0e+9) *
               (2.0/3.0 * (double)matrix_size - 0.5) );
    if(me==0) printf("\nGA_Lu: N=%d flops=%2.5e Gflops, time=%2.5e secs\n\n",
                     matrix_size, gflops, time);

#if DEBUG
    GA_Print(g_a);
    GA_Print(g_b);
#endif
    /* if(me==0) lu(A, matrix_size);     */

    GA_Destroy(g_a);
    GA_Destroy(g_b);
}

Esempio n. 23

0

Mostra file

File: lennard-jones_ga2.c Progetto: fuentesdt/tao-1.10.1-p3

int main (int argc, char **argv)
{
  double startTime;
  int info;			/* used to check for functions returning nonzeros */
  GAVec ga_x;        		/* solution vector */
  TAO_SOLVER tao;		/* TAO_SOLVER solver context */
  TAO_GA_APPLICATION taoapp;	/* TAO application context */
  TaoTerminateReason reason;
  AppCtx user;			/* user-defined application context */

  /*initialize GA and MPI */
  int heap = 400000, stack = 400000;
  MPI_Init (&argc, &argv);	/* initialize MPI */
  GA_Initialize ();		/* initialize GA */
  user.me = GA_Nodeid ();
  user.nproc = GA_Nnodes ();
  startTime = MPI_Wtime();
  
  if (user.me == 0) {
    if (GA_Uses_fapi ())
      GA_Error ("Program runs with C array API only", 0);
    printf ("Using %ld processes\n", (long) user.nproc);
    fflush (stdout);
  }
  heap /= user.nproc;
  stack /= user.nproc;
  if (!MA_init (MT_F_DBL, stack, heap))
    GA_Error ("MA_init failed", stack + heap);	/* initialize memory allocator */
  
  /* Initialize TAO */
  TaoInitialize (&argc, &argv, (char *) 0, help);

  /* Initialize problem parameters */
  user.ndim = NDIM;
  user.natoms = NATOMS;
  user.BlockSize = BLOCKSIZE;


  /* Allocate vectors for the solution and gradient */
  int dims[2];
  dims[0] = user.ndim*user.natoms;
  ga_x = NGA_Create (C_DBL, 1, dims, "GA_X", NULL);
  if (!ga_x) GA_Error ("lennard-jones.main::NGA_Create ga_x", ga_x);

  /* Set up structures for data distribution */
  info = SetupBlocks(&user); CHKERRQ(info);


  /* The TAO code begins here */
  /* Create TAO solver with desired solution method */
  info = TaoCreate (MPI_COMM_WORLD, "tao_lmvm", &tao); CHKERRQ(info);
  info = TaoGAApplicationCreate (MPI_COMM_WORLD, &taoapp); CHKERRQ(info);

  /* Set the initial solution */
  info = InitializeVariables(ga_x, &user); CHKERRQ(info);
  info = TaoGAAppSetInitialSolutionVec(taoapp, ga_x); CHKERRQ(info);

  /* Set routines for function, gradient */
  info = TaoGAAppSetObjectiveAndGradientRoutine (taoapp, FormFunctionGradient, 
					      (void *) &user); CHKERRQ(info);

  /* Check for TAO command line options */
  info = TaoSetFromOptions (tao); CHKERRQ(info);


  /* SOLVE THE APPLICATION */
  info = TaoSolveGAApplication (taoapp, tao); CHKERRQ(info);

  /*  To View TAO solver information use */
  info = TaoView(tao); CHKERRQ(info);

  /* Get termination information */
  info = TaoGetTerminationReason (tao, &reason);
  if(info) GA_Error("lennard-jones.main.TaoGetTerminationReason",info);
  if (user.me == 0) {
    if (reason <= 0)
      printf("Try a different TAO method, adjust some parameters, or check the function evaluation routines\n");
    
    printf("WALL TIME TAKEN = %lf\n", MPI_Wtime()-startTime);
    /*output the solutions */ 

    printf ("The solution is :\n");
  }
  GA_Print (ga_x);




  /* Free TAO data structures */
  info = TaoDestroy (tao); CHKERRQ(info);
  info = TaoGAAppDestroy (taoapp); CHKERRQ(info);

  /* Free GA data structures */
  GA_Destroy (ga_x);
  if (!MA_pop_stack(user.memHandle)) 
    ga_error("Main::MA_pop_stack for memHandle failed",0);

  /* Finalize TAO, GA, and MPI */
  TaoFinalize ();
  GA_Terminate ();
  MPI_Finalize ();

  return 0;
}

Esempio n. 24

0

Mostra file

File: gatscat.c Progetto: jeffhammond/ga

int main( int argc, char **argv ) {
  int g_a, g_b, i, j, size, size_me;
  int icnt, idx, jdx, ld;
  int n=N, type=MT_C_INT, one;
  int *values, *ptr;
  int **indices;
  int dims[2]={N,N};
  int lo[2], hi[2];

  int heap=3000000, stack=2000000;
  int me, nproc;

  int datatype, elements;
  double *prealloc_mem;
  MP_INIT(argc,argv);

#if 1
  GA_INIT(argc,argv);                            /* initialize GA */
  me=GA_Nodeid(); 
  nproc=GA_Nnodes();
  if(me==0) {
    if(GA_Uses_fapi())GA_Error("Program runs with C array API only",1);
    printf("\nUsing %ld processes\n",(long)nproc);
    fflush(stdout);
  }

  heap /= nproc;
  stack /= nproc;
  if(! MA_init(MT_F_DBL, stack, heap)) 
    GA_Error("MA_init failed",stack+heap);  /* initialize memory allocator*/ 

  /* Create a regular matrix. */
  if(me==0)printf("\nCreating matrix A of size %d x %d\n",N,N);
  g_a = NGA_Create(type, 2, dims, "A", NULL);
  if(!g_a) GA_Error("create failed: A",n); 

  /* Fill matrix using scatter routines */
  size = N*N;
  if (size%nproc == 0) {
    size_me = size/nproc;
  } else {
    i = size - size%nproc;
    size_me = i/nproc;
    if (me < size%nproc) size_me++;
  }

  /* Check that sizes are all okay */
  i = size_me;
  GA_Igop(&i,1,"+");
  if (i != size) {
    GA_Error("Sizes don't add up correctly: ",i);
  } else if (me==0) {
    printf("\nSizes add up correctly\n");
  }

  /* Allocate index and value arrays */
  indices = (int**)malloc(size_me*sizeof(int*));
  values = (int*)malloc(size_me*sizeof(int));
  icnt = me;
  for (i=0; i<size_me; i++) {
    values[i] = icnt;
    idx = icnt%N; 
    jdx = (icnt-idx)/N;
    if (idx >= N || idx < 0) {
      printf("p[%d] Bogus index i: %d\n",me,idx);
    }
    if (jdx >= N || jdx < 0) {
      printf("p[%d] Bogus index j: %d\n",me,jdx);
    }
    indices[i] = (int*)malloc(2*sizeof(int));
    (indices[i])[0] = idx;
    (indices[i])[1] = jdx;
    icnt += nproc;
  }

  /* Scatter values into g_a */
  NGA_Scatter(g_a, values, indices, size_me);
  GA_Sync();

  /* Check to see if contents of g_a are correct */
  NGA_Distribution( g_a, me, lo, hi );
  NGA_Access(g_a, lo, hi, &ptr, &ld);
  for (i=lo[0]; i<hi[0]; i++) {
    idx = i-lo[0];
    for (j=lo[1]; j<hi[1]; j++) {
      jdx = j-lo[1];
      if (ptr[idx*ld+jdx] != j*N+i) {
        printf("p[%d] (Scatter) expected: %d actual: %d\n",me,j*N+i,ptr[idx*ld+jdx]);
      }
    }
  }
  if (me==0) printf("\nCompleted test of NGA_Scatter\n");

  for (i=0; i<size_me; i++) {
    values[i] = 0;
  }
  GA_Sync();
  NGA_Gather(g_a, values, indices, size_me);
  icnt = me;
  for (i=0; i<size_me; i++) {
    if (icnt != values[i]) {
      printf("p[%d] (Gather) expected: %d actual: %d\n",me,icnt,values[i]);
    }
    icnt += nproc;
  }
  if (me==0) printf("\nCompleted test of NGA_Gather\n");
  GA_Sync();

  /* Scatter-accumulate values back into GA*/
  one = 1;
  NGA_Scatter_acc(g_a, values, indices, size_me, &one);
  GA_Sync();

  /* Check to see if contents of g_a are correct */
  for (i=lo[0]; i<hi[0]; i++) {
    idx = i-lo[0];
    for (j=lo[1]; j<hi[1]; j++) {
      jdx = j-lo[1];
      if (ptr[idx*ld+jdx] != 2*(j*N+i)) {
        printf("p[%d] (Scatter_acc) expected: %d actual: %d\n",me,2*(j*N+i),ptr[idx*ld+jdx]);
      }
    }
  }
  if (me==0) printf("\nCompleted test of NGA_Scatter_acc\n");
  NGA_Release(g_a, lo, hi);

  /* Test fixed buffer size */
  NGA_Alloc_gatscat_buf(size_me);

  /* Scatter-accumulate values back into GA*/
  GA_Sync();
  NGA_Scatter_acc(g_a, values, indices, size_me, &one);
  GA_Sync();

  /* Check to see if contents of g_a are correct */
  for (i=lo[0]; i<hi[0]; i++) {
    idx = i-lo[0];
    for (j=lo[1]; j<hi[1]; j++) {
      jdx = j-lo[1];
      if (ptr[idx*ld+jdx] != 3*(j*N+i)) {
        printf("p[%d] (Scatter_acc) expected: %d actual: %d\n",me,3*(j*N+i),ptr[idx*ld+jdx]);
      }
    }
  }
  if (me==0) printf("\nCompleted test of NGA_Scatter_acc using fixed buffers\n");
  NGA_Release(g_a, lo, hi);
  NGA_Free_gatscat_buf();

  GA_Destroy(g_a);
  if(me==0)printf("\nSuccess\n");
  GA_Terminate();
#endif

  MP_FINALIZE();

 return 0;
}

Esempio n. 25

0

Mostra file

File: testc.c Progetto: jeffhammond/ga

void do_work()
{
int ONE=1 ;   /* useful constants */
int g_a, g_b;
int n=N, type=MT_F_DBL;
int me=GA_Nodeid(), nproc=GA_Nnodes();
int i, row;
int dims[2]={N,N};
int lo[2], hi[2], ld;

/* Note: on all current platforms DoublePrecision == double */
double buf[N], err, alpha, beta;

     if(me==0)printf("Creating matrix A\n");
     g_a = NGA_Create(type, 2, dims, "A", NULL);
     if(!g_a) GA_Error("create failed: A",n); 
     if(me==0)printf("OK\n");

     if(me==0)printf("Creating matrix B\n");
     /* create matrix B  so that it has dims and distribution of A*/
     g_b = GA_Duplicate(g_a, "B");
     if(! g_b) GA_Error("duplicate failed",n); 
     if(me==0)printf("OK\n");

     GA_Zero(g_a);   /* zero the matrix */

     if(me==0)printf("Initializing matrix A\n");
     /* fill in matrix A with random values in range 0.. 1 */ 
     lo[1]=0; hi[1]=n-1;
     for(row=me; row<n; row+= nproc){
         /* each process works on a different row in MIMD style */
         lo[0]=hi[0]=row;   
         for(i=0; i<n; i++) buf[i]=sin((double)i + 0.1*(row+1));
         NGA_Put(g_a, lo, hi, buf, &n);
     }


     if(me==0)printf("Symmetrizing matrix A\n");
     GA_Symmetrize(g_a);   /* symmetrize the matrix A = 0.5*(A+A') */
   

     /* check if A is symmetric */ 
     if(me==0)printf("Checking if matrix A is symmetric\n");
     GA_Transpose(g_a, g_b); /* B=A' */
     alpha=1.; beta=-1.;
     GA_Add(&alpha, g_a, &beta, g_b, g_b);  /* B= A - B */
     err= GA_Ddot(g_b, g_b);
     
     if(me==0)printf("Error=%f\n",(double)err);
     
     if(me==0)printf("\nChecking atomic accumulate \n");

     GA_Zero(g_a);   /* zero the matrix */
     for(i=0; i<n; i++) buf[i]=(double)i;

     /* everybody accumulates to the same location/row */
     alpha = 1.0;
     row = n/2;
     lo[0]=hi[0]=row;
     lo[1]=0; hi[1]=n-1;
     ld = hi[1]-lo[1]+1;
     NGA_Acc(g_a, lo, hi, buf, &ld, &alpha );
     GA_Sync();

     if(me==0){ /* node 0 is checking the result */

        NGA_Get(g_a, lo, hi, buf,&ld);
        for(i=0; i<n; i++) if(buf[i] != (double)nproc*i)
           GA_Error("failed: column=",i);
        printf("OK\n\n");

     }
     
     GA_Destroy(g_a);
     GA_Destroy(g_b);
}

Esempio n. 26

0

Mostra file

File: big.c Progetto: sg0/Elemental

// note: Sayan: brings down memory requirement to about 268 MB
int main(int argc, char **argv)
{
    int me, nproc, g_a = -1, i, j;

#if defined(USE_ELEMENTAL)
    int ndim=2, dims[2]= {N1,N2};
#else
    int ndim=2, type=MT_F_DBL, dims[2]= {N1,N2};
#endif

    double *buf;

    int lo[2], hi[2], ld[1];
    double alpha = 1.0;

#if defined(USE_ELEMENTAL)
    // initialize Elemental (which will initialize MPI)
    ElInitialize( &argc, &argv );
    ElMPICommRank( MPI_COMM_WORLD, &me );
    ElMPICommSize( MPI_COMM_WORLD, &nproc );

    ElGlobalArrays_d eldga;

    // instantiate el::global array
    ElGlobalArraysConstruct_d( &eldga );
    // initialize global arrays
    ElGlobalArraysInitialize_d( eldga );
    printf ("INITIALIZED elemental global array...\n");
#else
    MP_INIT(argc,argv);
    GA_Initialize_ltd(-1);

    me=GA_Nodeid();
    nproc=GA_Nnodes();
#endif

    if(me==0) printf("Using %ld processes\n",(long)nproc);
    if(me==0) printf("memory = %ld bytes\n",((long)N1)*((long)N2)*8);

#if defined(USE_ELEMENTAL)
    // create and allocate a global array
    printf ("ndim = %d\n", ndim);
    printf ("dim[0] = %d and dim[1] = %d\n", dims[0], dims[1]);
    ElGlobalArraysCreate_d( eldga, ndim, dims, "A", &g_a);
    printf ("CREATED elemental global array...\n");
    // print distribution
    ElGlobalArraysPrint_d( eldga, g_a );
#else
    g_a = NGA_Create(type, ndim, dims, "A", NULL);

    GA_Zero(g_a);   /* zero the matrix */

    GA_Print_distribution(g_a);
#endif

    if(me == 0) {
//        buf = (double*)(malloc(N1*1024*sizeof(double)));
        buf = (double*)(malloc(N1*128*sizeof(double)));
//        for(j = 0; j < N1*1024; ++j) buf[j] = 1.0;
//        for(i = 0; i < N2/1024; ++i) {
        for(j = 0; j < N1*128; ++j) buf[j] = 1.0;
        for(i = 0; i < N2/128; ++i) {

            lo[0] = 0;
            hi[0] = lo[0] + N1   -1;
            /*
                lo[1] = i*1024;
                hi[1] = lo[1] + 1024 -1;
                ld[0] = 1024;
            */
            lo[1] = i*128;
            hi[1] = lo[1] + 128 -1;
            ld[0] = 128;
            printf("NGA_Acc.%d:  %d:%d %d:%d\n",i,lo[0],hi[0],lo[1],hi[1]);

#if defined(USE_ELEMENTAL)
            ElGlobalArraysAccumulate_d( eldga, g_a, lo, hi, buf, ld, &alpha );
            // there is an explicit flush in NGA_Acc/Put, so when it returns, the buffer
            // can be reused and data has reached the destination
#else
            NGA_Init_fence();
            NGA_Acc(g_a, lo, hi, buf, ld, &alpha);
            NGA_Fence();
#endif
        }
    }

#if defined(USE_ELEMENTAL)
    ElGlobalArraysSync_d( eldga );
    ElGlobalArraysDestroy_d( eldga, g_a );
    ElGlobalArraysTerminate_d( eldga );
    // call el::global arrays destructor
    ElGlobalArraysDestruct_d( eldga );
    ElFinalize();
#else
    GA_Sync();

    GA_Destroy(g_a);

    GA_Terminate();
    MP_FINALIZE();
#endif

    return 0;
}

Esempio n. 27

0

Mostra file

File: ga_nbacc.c Progetto: sg0/Elemental

int main(int argc, char **argv)
{
  int rank, nprocs;
  int g_A;
  int *local_A=NULL, *local_B=NULL, *output_A=NULL;
  int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE;
  int value=SIZE;

#if defined(USE_ELEMENTAL)
  // initialize Elemental (which will initialize MPI)
  ElInitialize( &argc, &argv );
  ElMPICommRank( MPI_COMM_WORLD, &rank );
  ElMPICommSize( MPI_COMM_WORLD, &nprocs );
  // instantiate el::global array
  ElGlobalArraysConstruct_i( &eliga );
  // initialize global arrays
  ElGlobalArraysInitialize_i( eliga );
#else
  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
#endif

  local_A=(int*)malloc(SIZE*SIZE*sizeof(int));
  output_A=(int*)malloc(SIZE*SIZE*sizeof(int));
  memset (output_A, 0, SIZE*SIZE*sizeof(int));
  for(int j=0; j<SIZE; j++)
      for(int i=0; i<SIZE; i++) local_A[i+j*ld]=(i + j);
      //for(int i=0; i<SIZE; i++) local_A[i+j*ld]=(rand()%10);

  local_B=(int*)malloc(SIZE*SIZE*sizeof(int));
  memset (local_B, 0, SIZE*SIZE*sizeof(int));

  // nb handle
#if defined(USE_ELEMENTAL)
  typedef ElInt ga_nbhdl_t;
#endif
  ga_nbhdl_t nbnb;

#if defined(USE_ELEMENTAL)
  ElGlobalArraysCreate_i( eliga, DIM, dims, "array_A", NULL, &g_A );
  ElGlobalArraysFill_i( eliga, g_A, &value );
#else
  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  GA_Fill(g_A, &value);
#endif

  if (rank == 0) printf ("Initial global array:\n");
#if defined(USE_ELEMENTAL)
  ElGlobalArraysPrint_i( eliga, g_A );
#else
  GA_Print(g_A);
#endif

  for (int i = 0; i < NITERS; i++)
  {
      // acc data
#if defined(USE_ELEMENTAL)
      ElGlobalArraysNBAccumulate_i( eliga, g_A, lo, hi, local_A, &ld, &value, &nbnb );
#else
      NGA_NbAcc(g_A, lo, hi, local_A, &ld, &value, &nbnb);
#endif
      
      // updated output
      MPI_Reduce (local_A, output_A, SIZE*SIZE, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

#if defined(USE_ELEMENTAL)
      ElGlobalArraysNBWait_i( eliga, &nbnb );
#else
      NGA_NbWait (&nbnb);
#endif 

      // get
      if (rank == 0) printf ("Get in iter #%d\n", i);
#if defined(USE_ELEMENTAL)
      ElGlobalArraysSync_i( eliga );
      ElGlobalArraysGet_i( eliga, g_A, lo, hi, local_B, &ld );
      ElGlobalArraysPrint_i( eliga, g_A );
#else
      GA_Sync();
      NGA_Get(g_A, lo, hi, local_B, &ld);
      GA_Print(g_A);
#endif
  } // end of iters

  if(rank==0)
    {
      printf(" Alpha (multiplier): %d\n", value);
      printf(" Original local buffer (before accumulation): \n");

      for(int i=0; i<SIZE; i++)
	{
	  for(int j=0; j<SIZE; j++)
	    printf("%d ", local_A[i*ld+j]);
	  printf("\n");
	}
      printf("\n");
      printf(" Get returns: \n");
      for(int i=0; i<SIZE; i++)
	{
	  for(int j=0; j<SIZE; j++)
	    printf("%d ", local_B[i*ld + j]);
	  printf("\n");
	}

      printf("\n");
      for(int i=0; i<SIZE; i++)
	{
	  for(int j=0; j<SIZE; j++)
	    {
	      if(local_B[i*ld+j]!=(value + (NITERS * value * (output_A[i*ld+j]))))
		  GA_Error("ERROR", -99);
	    }
	}
    }
#if defined(USE_ELEMENTAL)
  ElGlobalArraysDestroy_i( eliga, g_A );
#else
  GA_Destroy(g_A);
#endif
  if(rank == 0)
    printf ("OK. Test passed\n");

    free (local_A);
    free (local_B);
    free (output_A);

#if defined(USE_ELEMENTAL)
    ElGlobalArraysTerminate_i( eliga );
    // call el::global arrays destructor
    ElGlobalArraysDestruct_i( eliga );
    ElFinalize();
#else
    GA_Terminate();
    MPI_Finalize();
#endif
}

Esempio n. 28

0

Mostra file

File: rate.c Progetto: jeffhammond/ga

void test_io_dbl()
{
    int n, ndim = NDIM;
    double err, tt0, tt1, mbytes;
    int g_a, g_b, d_a;
    int i, itmp, j, req, loop;
    int glo[MAXDIM],ghi[MAXDIM];
    dra_size_t dlo[MAXDIM],dhi[MAXDIM];
    dra_size_t ddims[MAXDIM],reqdims[MAXDIM];
    dra_size_t m;
    int index[MAXDIM], dims[MAXDIM];
    int me, nproc, isize;
    double *ptr;
    double plus, minus;
    int ld[MAXDIM], chunk[MAXDIM];
    char filename[80];
    FILE *fd;

    n = SIZE;
    m = ((dra_size_t)NFACTOR)*((dra_size_t)SIZE);

    loop  = 1;
    for (i=0; i<ndim; i++) loop *= NFACTOR;
    req = -1;
    nproc = GA_Nnodes();
    me    = GA_Nodeid();

    if (me == 0) {
        printf("Creating temporary global arrays %d",n);
        for (i=1; i<ndim; i++) {
            printf(" x %d",n);
        }
        printf("\n");
    }
    if (me == 0) fflush(stdout);
    GA_Sync();
    for (i=0; i<ndim; i++) {
        dims[i] = n;
        chunk[i] = 1;
    }

    g_a = NGA_Create(MT_DBL, ndim, dims, "a", chunk);
    if (!g_a) GA_Error("NGA_Create failed: a", 0);
    g_b = NGA_Create(MT_DBL, ndim, dims, "b", chunk);
    if (!g_b) GA_Error("NGA_Create failed: b", 0);
    if (me == 0) printf("done\n");
    if (me == 0) fflush(stdout);

    /*     initialize g_a, g_b with random values
           ... use ga_access to avoid allocating local buffers for ga_put */

    GA_Sync();
    NGA_Distribution(g_a, me, glo, ghi);
    NGA_Access(g_a, glo, ghi, &ptr, ld);
    isize = 1;
    for (i=0; i<ndim; i++) isize *= (ghi[i]-glo[i]+1);
    fill_random(ptr, isize);
    GA_Sync();
    GA_Zero(g_b);


    /*.......................................................................*/
    if (me == 0) {
        printf("Creating Disk array %ld",m);
        for (i=1; i<ndim; i++) {
            printf(" x %ld",m);
        }
        printf("\n");
    }
    if (me == 0) fflush(stdout);
    for (i=0; i<ndim; i++) {
        ddims[i] = m;
        reqdims[i] = (dra_size_t)n;
    }
    GA_Sync();
    strcpy(filename,FNAME);
    if (! (fd = fopen(filename, "w"))) {
        strcpy(filename,FNAME_ALT);
        if (! (fd = fopen(filename, "w"))) {
            GA_Error("open failed",0);
        }
    }
    fclose(fd);
    if (NDRA_Create(MT_DBL, ndim, ddims, "A", filename, DRA_RW,
                reqdims, &d_a) != 0) {
        GA_Error("NDRA_Create failed(d_a): ",0);
    }
    if (me == 0) printf("testing write\n");
    fflush(stdout);
    tt1 = 0.0;
    for (i=0; i<loop; i++) {
        itmp=i;
        for (j=0; j<ndim; j++) {
            index[j] = itmp%NFACTOR;
            itmp = (itmp - index[j])/NFACTOR;
        }
        for (j=0; j<ndim; j++) {
            glo[j] = 0;
            ghi[j] = SIZE - 1;
            dlo[j] = ((dra_size_t)index[j])*((dra_size_t)SIZE);
            dhi[j] = (((dra_size_t)index[j])+(dra_size_t)1)
                * ((dra_size_t)SIZE) - (dra_size_t)1;
        }
        tt0 = MP_TIMER();
        if (NDRA_Write_section(FALSE, g_a, glo, ghi,
                    d_a, dlo, dhi, &req) != 0) {
            GA_Error("ndra_write_section failed:",0);
        }
        if (DRA_Wait(req) != 0) {
            GA_Error("DRA_Wait failed(d_a): ",req);
        }
        tt1 += (MP_TIMER() - tt0);
    }
    GA_Dgop(&tt1,1,"+");
    tt1 = tt1/((double)nproc);
    mbytes = 1.e-6 * (double)(pow(m,ndim)*sizeof(double));
    if (me == 0) {
        printf("%11.2f MB  time = %11.2f rate = %11.3f MB/s\n",
                mbytes,tt1,mbytes/tt1);
    }

    if (DRA_Close(d_a) != 0) {
        GA_Error("DRA_Close failed(d_a): ",d_a);
    }

    if (me == 0) printf("\n");
    if (me == 0) printf("disk array closed\n");
    if (me == 0) fflush(stdout);

    /*..........................................................*/

    if (me == 0) printf("\n");
    if (me == 0) printf("opening disk array\n");
    if (DRA_Open(filename, DRA_R, &d_a) != 0) {
        GA_Error("DRA_Open failed",0);
    }
    if (me == 0) printf("testing read\n");
    /*  printf("testing read on proc %d\n",me); */
    if (me == 0) fflush(stdout);
    tt1 = 0.0;
    for (i=0; i<loop; i++) {
        itmp=i;
        for (j=0; j<ndim; j++) {
            index[j] = itmp%NFACTOR;
            itmp = (itmp - index[j])/NFACTOR;
        }
        for (j=0; j<ndim; j++) {
            glo[j] = 0;
            ghi[j] = SIZE - 1;
            dlo[j] = ((dra_size_t)index[j])*((dra_size_t)SIZE);
            dhi[j] = (((dra_size_t)index[j])+(dra_size_t)1)
                * ((dra_size_t)SIZE) - (dra_size_t)1;
        }
        tt0 = MP_TIMER();
        if (NDRA_Read_section(FALSE, g_b, glo, ghi,
                    d_a, dlo, dhi, &req) != 0) {
            GA_Error("ndra_read_section failed:",0);
        }
        if (DRA_Wait(req) != 0) {
            GA_Error("DRA_Wait failed(d_a): ",req);
        }
        tt1 += (MP_TIMER() - tt0);
        plus = 1.0;
        minus = -1.0;
        GA_Add(&plus, g_a, &minus, g_b, g_b);
        err = GA_Ddot(g_b, g_b);
        if (err != 0) {
            if (me == 0) {
                printf("BTW, we have error = %f on loop value %d\n",
                        err,i);
            }
            GA_Error(" bye",0);
        }
    }
    GA_Dgop(&tt1,1,"+");
    tt1 = tt1/((double)nproc);
    if (me == 0) {
        printf("%11.2f MB  time = %11.2f rate = %11.3f MB/s\n",
                mbytes,tt1,mbytes/tt1);
    }
    if (DRA_Delete(d_a) != 0) GA_Error("DRA_Delete failed",0);
    /*.......................................................................*/
    GA_Destroy(g_a);
    GA_Destroy(g_b);
}

Esempio n. 29

0

Mostra file

File: gemmtest.c Progetto: jeffhammond/ga

void
test(int data_type) {
  int me=GA_Nodeid();
  int nproc = GA_Nnodes();
  int g_a, g_b, g_c;
  int ndim = 2;
  int dims[2]={N,N};
  int lo[2]={0,0};
  int hi[2]={N-1,N-1};
  int block_size[2]={NB,NB-1};
  int proc_grid[2];
  int i,j,l,k,m,n, ld;

  double alpha_dbl = 1.0, beta_dbl = 0.0;
  double dzero = 0.0;
  double ddiff;

  float alpha_flt = 1.0, beta_flt = 0.0;
  float fzero = 0.0;
  float fdiff;
  float ftmp;
  double dtmp;
  SingleComplex ctmp;
  DoubleComplex ztmp;

  DoubleComplex alpha_dcpl = {1.0, 0.0} , beta_dcpl = {0.0, 0.0}; 
  DoubleComplex zzero = {0.0,0.0};
  DoubleComplex zdiff;

  SingleComplex alpha_scpl = {1.0, 0.0} , beta_scpl = {0.0, 0.0}; 
  SingleComplex czero = {0.0,0.0};
  SingleComplex cdiff;

  void *alpha=NULL, *beta=NULL;
  void *abuf=NULL, *bbuf=NULL, *cbuf=NULL, *c_ptr=NULL;

  switch (data_type) {
  case C_FLOAT:
    alpha  = (void *)&alpha_flt;
    beta   = (void *)&beta_flt;
    abuf = (void*)malloc(N*N*sizeof(float));
    bbuf = (void*)malloc(N*N*sizeof(float));
    cbuf = (void*)malloc(N*N*sizeof(float));
    if(me==0) printf("Single Precision: Testing GA_Sgemm,NGA_Matmul_patch for %d-Dimension", ndim);
    break;      
  case C_DBL:
    alpha  = (void *)&alpha_dbl;
    beta   = (void *)&beta_dbl;
    abuf = (void*)malloc(N*N*sizeof(double));
    bbuf = (void*)malloc(N*N*sizeof(double));
    cbuf = (void*)malloc(N*N*sizeof(double));
    if(me==0) printf("Double Precision: Testing GA_Dgemm,NGA_Matmul_patch for %d-Dimension", ndim); 
    break;    
  case C_DCPL:
    alpha  = (void *)&alpha_dcpl;
    beta   = (void *)&beta_dcpl;
    abuf = (void*)malloc(N*N*sizeof(DoubleComplex));
    bbuf = (void*)malloc(N*N*sizeof(DoubleComplex));
    cbuf = (void*)malloc(N*N*sizeof(DoubleComplex));
    if(me==0) printf("Double Complex:   Testing GA_Zgemm,NGA_Matmul_patch for %d-Dimension", ndim);
    break;
  case C_SCPL:
    alpha  = (void *)&alpha_scpl;
    beta   = (void *)&beta_scpl;
    abuf = (void*)malloc(N*N*sizeof(SingleComplex));
    bbuf = (void*)malloc(N*N*sizeof(SingleComplex));
    cbuf = (void*)malloc(N*N*sizeof(SingleComplex));
    if(me==0) printf("Single Complex:   Testing GA_Cgemm,NGA_Matmul_patch for %d-Dimension", ndim);
    break;
  default:
    GA_Error("wrong data type", data_type);
  }

  if (me==0) printf("\nCreate A, B, C\n");
#ifdef USE_REGULAR
  g_a = NGA_Create(data_type, ndim, dims, "array A", NULL);
#endif
#ifdef USE_SIMPLE_CYCLIC
  g_a = NGA_Create_handle();
  NGA_Set_data(g_a,ndim,dims,data_type);
  NGA_Set_array_name(g_a,"array A");
  NGA_Set_block_cyclic(g_a,block_size);
  if (!GA_Allocate(g_a)) {
    GA_Error("Failed: create: g_a",40);
  }
#endif
#ifdef USE_SCALAPACK
  g_a = NGA_Create_handle();
  NGA_Set_data(g_a,ndim,dims,data_type);
  NGA_Set_array_name(g_a,"array A");
  grid_factor(nproc,&i,&j);
  proc_grid[0] = i;
  proc_grid[1] = j;
  NGA_Set_block_cyclic_proc_grid(g_a,block_size,proc_grid);
  if (!GA_Allocate(g_a)) {
    GA_Error("Failed: create: g_a",40);
  }
#endif
#ifdef USE_TILED
  g_a = NGA_Create_handle();
  NGA_Set_data(g_a,ndim,dims,data_type);
  NGA_Set_array_name(g_a,"array A");
  grid_factor(nproc,&i,&j);
  proc_grid[0] = i;
  proc_grid[1] = j;
  NGA_Set_tiled_proc_grid(g_a,block_size,proc_grid);
  if (!GA_Allocate(g_a)) {
    GA_Error("Failed: create: g_a",40);
  }
#endif
  g_b = GA_Duplicate(g_a, "array B");  
  g_c = GA_Duplicate(g_a, "array C");
  if(!g_a || !g_b || !g_c) GA_Error("Create failed: a, b or c",1);

  ld = N;
  if (me==0) printf("\nInitialize A\n");
  /* Set up matrix A */
  if (me == 0) {
    for (i=0; i<N; i++) {
      for (j=0; j<N; j++) {
        switch (data_type) {
          case C_FLOAT:
            ((float*)abuf)[i*N+j] = (float)(i*N+j);
            break;
          case C_DBL:
            ((double*)abuf)[i*N+j] = (double)(i*N+j);
            break;
          case C_DCPL:
            ((DoubleComplex*)abuf)[i*N+j].real = (double)(i*N+j);
            ((DoubleComplex*)abuf)[i*N+j].imag = 1.0;
            break;
          case C_SCPL:
            ((SingleComplex*)abuf)[i*N+j].real = (float)(i*N+j);
            ((SingleComplex*)abuf)[i*N+j].imag = 1.0;
            break;
          default:
            GA_Error("wrong data type", data_type);
        }
      }
    }
    NGA_Put(g_a,lo,hi,abuf,&ld);
  }
  GA_Sync();

  if (me==0) printf("\nInitialize B\n");
  /* Set up matrix B */
  if (me == 0) {
    for (i=0; i<N; i++) {
      for (j=0; j<N; j++) {
        switch (data_type) {
          case C_FLOAT:
            ((float*)bbuf)[i*N+j] = (float)(j*N+i);
            break;
          case C_DBL:
            ((double*)bbuf)[i*N+j] = (double)(j*N+i);
            break;
          case C_DCPL:
            ((DoubleComplex*)bbuf)[i*N+j].real = (double)(j*N+i);
            ((DoubleComplex*)bbuf)[i*N+j].imag = 1.0;
            break;
          case C_SCPL:
            ((SingleComplex*)bbuf)[i*N+j].real = (float)(j*N+i);
            ((SingleComplex*)bbuf)[i*N+j].imag = 1.0;
            break;
          default:
            GA_Error("wrong data type", data_type);
        }
      }
    }
    NGA_Put(g_b,lo,hi,bbuf,&ld);
  }
  GA_Sync();

  if (me==0) printf("\nPerform matrix multiply\n");
  switch (data_type) {
    case C_FLOAT:
      NGA_Matmul_patch('N','N',&alpha_flt,&beta_flt,g_a,lo,hi,
        g_b,lo,hi,g_c,lo,hi);
      break;
    case C_DBL:
      NGA_Matmul_patch('N','N',&alpha_dbl,&beta_dbl,g_a,lo,hi,
        g_b,lo,hi,g_c,lo,hi);
      break;
    case C_SCPL:
      NGA_Matmul_patch('N','N',&alpha_scpl,&beta_scpl,g_a,lo,hi,
        g_b,lo,hi,g_c,lo,hi);
      break;
    case C_DCPL:
      NGA_Matmul_patch('N','N',&alpha_dcpl,&beta_dcpl,g_a,lo,hi,
        g_b,lo,hi,g_c,lo,hi);
      break;
    default:
      GA_Error("wrong data type", data_type);
  }
  GA_Sync();
#if 0
  if (me==0) printf("\nCheck answer\n");
  /*
  GA_Print(g_a);
  if (me == 0) printf("\n\n\n\n");
  GA_Print(g_b);
  if (me == 0) printf("\n\n\n\n");
  GA_Print(g_c); 
  */

  /* Check answer */
  NGA_Get(g_a,lo,hi,abuf,&ld);
  NGA_Get(g_b,lo,hi,bbuf,&ld);
  for (i=0; i<N; i++) {
    for (j=0; j<N; j++) {
      switch (data_type) {
        case C_FLOAT:
          ((float*)cbuf)[i*N+j] = fzero;
          break;
        case C_DBL:
          ((double*)cbuf)[i*N+j] = dzero;
          break;
        case C_DCPL:
          ((DoubleComplex*)cbuf)[i*N+j] = zzero;
          break;
        case C_SCPL:
          ((SingleComplex*)cbuf)[i*N+j] = czero;
          break;
        default:
          GA_Error("wrong data type", data_type);
      }
      for (k=0; k<N; k++) {
        switch (data_type) {
          case C_FLOAT:
            ((float*)cbuf)[i*N+j] += ((float*)abuf)[i*N+k]
              *((float*)bbuf)[k*N+j];
            break;
          case C_DBL:
            ((double*)cbuf)[i*N+j] += ((double*)abuf)[i*N+k]
              *((double*)bbuf)[k*N+j];
            break;
          case C_DCPL:
            ((DoubleComplex*)cbuf)[i*N+j].real +=
              (((DoubleComplex*)abuf)[i*N+k].real
               *((DoubleComplex*)bbuf)[k*N+j].real
               -(((DoubleComplex*)abuf)[i*N+k].imag
                 *((DoubleComplex*)bbuf)[k*N+j].imag));
            ((DoubleComplex*)cbuf)[i*N+j].imag +=
              (((DoubleComplex*)abuf)[i*N+k].real
               *((DoubleComplex*)bbuf)[k*N+j].imag
               +(((DoubleComplex*)abuf)[i*N+k].imag
                 *((DoubleComplex*)bbuf)[k*N+j].real));
            break;
          case C_SCPL:
            ((SingleComplex*)cbuf)[i*N+j].real +=
              (((SingleComplex*)abuf)[i*N+k].real
               *((SingleComplex*)bbuf)[k*N+j].real
               -(((SingleComplex*)abuf)[i*N+k].imag
                 *((SingleComplex*)bbuf)[k*N+j].imag));
            ((SingleComplex*)cbuf)[i*N+j].imag +=
              (((SingleComplex*)abuf)[i*N+k].real
               *((SingleComplex*)bbuf)[k*N+j].imag
               +(((SingleComplex*)abuf)[i*N+k].imag
                 *((SingleComplex*)bbuf)[k*N+j].real));
            break;
          default:
            GA_Error("wrong data type", data_type);
        }
      }
    }
  }
  GA_Sync();
  if (me == 0) {
    NGA_Get(g_c,lo,hi,abuf,&ld);
    for (i=0; i<N; i++) {
      for (j=0; j<N; j++) {
        switch (data_type) {
          case C_FLOAT:
            fdiff = ((float*)abuf)[i*N+j]-((float*)cbuf)[i*N+j];
            if (((float*)abuf)[i*N+j] != 0.0) {
              fdiff /= ((float*)abuf)[i*N+j];
            }
            if (fabs(fdiff) > TOLERANCE) {
              printf("p[%d] [%d,%d] Actual: %f Expected: %f\n",me,i,j,
                  ((float*)abuf)[i*N+j],((float*)cbuf)[i*N+j]);
            }
            break;
          case C_DBL:
            ddiff = ((double*)abuf)[i*N+j]-((double*)cbuf)[i*N+j];
            if (((double*)abuf)[i*N+j] != 0.0) {
              ddiff /= ((double*)abuf)[i*N+j];
            }
            if (fabs(ddiff) > TOLERANCE) {
              printf("p[%d] [%d,%d] Actual: %f Expected: %f\n",me,i,j,
                  ((double*)abuf)[i*N+j],((double*)cbuf)[i*N+j]);
            }
            break;
          case C_DCPL:
            zdiff.real = ((DoubleComplex*)abuf)[i*N+j].real
              -((DoubleComplex*)cbuf)[i*N+j].real;
            zdiff.imag = ((DoubleComplex*)abuf)[i*N+j].imag
              -((DoubleComplex*)cbuf)[i*N+j].imag;
            if (((DoubleComplex*)abuf)[i*N+j].real != 0.0 ||
                ((DoubleComplex*)abuf)[i*N+j].imag != 0.0) {
              ztmp = ((DoubleComplex*)abuf)[i*N+j];
              ddiff = sqrt((zdiff.real*zdiff.real+zdiff.imag*zdiff.imag)
                  /(ztmp.real*ztmp.real+ztmp.imag*ztmp.imag));
            } else {
              ddiff = sqrt(zdiff.real*zdiff.real+zdiff.imag*zdiff.imag);
            }
            if (fabs(ddiff) > TOLERANCE) {
              printf("p[%d] [%d,%d] Actual: (%f,%f) Expected: (%f,%f)\n",me,i,j,
                  ((DoubleComplex*)abuf)[i*N+j].real,
                  ((DoubleComplex*)abuf)[i*N+j].imag,
                  ((DoubleComplex*)cbuf)[i*N+j].real,
                  ((DoubleComplex*)cbuf)[i*N+j].imag);
            }
            break;
          case C_SCPL:
            cdiff.real = ((SingleComplex*)abuf)[i*N+j].real
              -((SingleComplex*)cbuf)[i*N+j].real;
            cdiff.imag = ((SingleComplex*)abuf)[i*N+j].imag
              -((SingleComplex*)cbuf)[i*N+j].imag;
            if (((SingleComplex*)abuf)[i*N+j].real != 0.0 ||
                ((SingleComplex*)abuf)[i*N+j].imag != 0.0) {
              ctmp = ((SingleComplex*)abuf)[i*N+j];
              fdiff = sqrt((cdiff.real*cdiff.real+cdiff.imag*cdiff.imag)
                  /(ctmp.real*ctmp.real+ctmp.imag*ctmp.imag));
            } else {
              fdiff = sqrt(cdiff.real*cdiff.real+cdiff.imag*cdiff.imag);
            }
            if (fabs(fdiff) > TOLERANCE) {
              printf("p[%d] [%d,%d] Actual: (%f,%f) Expected: (%f,%f)\n",me,i,j,
                  ((SingleComplex*)abuf)[i*N+j].real,
                  ((SingleComplex*)abuf)[i*N+j].imag,
                  ((SingleComplex*)cbuf)[i*N+j].real,
                  ((SingleComplex*)cbuf)[i*N+j].imag);
            }
            break;
          default:
            GA_Error("wrong data type", data_type);
        }
      }
    }
  }
  GA_Sync();

  /* copy cbuf back to g_a */
  if (me == 0) {
    NGA_Put(g_a,lo,hi,cbuf,&ld);
  }
  GA_Sync();

  /* Get norm of g_a */
  switch (data_type) {
    case C_FLOAT:
      ftmp = GA_Fdot(g_a,g_a);
      break;
    case C_DBL:
      dtmp = GA_Ddot(g_a,g_a);
      break;
    case C_DCPL:
      ztmp = GA_Zdot(g_a,g_a);
      break;
    case C_SCPL:
      ctmp = GA_Cdot(g_a,g_a);
      break;
    default:
      GA_Error("wrong data type", data_type);
  }
  /* subtract C from A and put the results in B */
  beta_flt = -1.0;
  beta_dbl = -1.0;
  beta_scpl.real = -1.0;
  beta_dcpl.real = -1.0;
  GA_Zero(g_b);
  GA_Add(alpha,g_a,beta,g_c,g_b);
  /* evaluate the norm of the difference between the two matrices */
  switch (data_type) {
    case C_FLOAT:
      fdiff = GA_Fdot(g_b, g_b);
      if (ftmp != 0.0) {
        fdiff /= ftmp;
      }
      if(fabs(fdiff) > TOLERANCE) {
        printf("\nabs(result) = %f > %f\n", fabsf(fdiff), TOLERANCE);
        GA_Error("GA_Sgemm Failed", 1);
      } else if (me == 0) {
        printf("\nGA_Sgemm OK\n\n");
      }
      break;
    case C_DBL:
      ddiff = GA_Ddot(g_b, g_b);
      if (dtmp != 0.0) {
        ddiff /= dtmp;
      }
      if(fabs(ddiff) > TOLERANCE) {
        printf("\nabs(result) = %f > %f\n", fabsf(ddiff), TOLERANCE);
        GA_Error("GA_Dgemm Failed", 1);
      } else if (me == 0) {
        printf("\nGA_Dgemm OK\n\n");
      }
      break;
    case C_DCPL:
      zdiff = GA_Zdot(g_b, g_b);
      if (ztmp.real != 0.0 || ztmp.imag != 0.0) {
        ddiff = sqrt((zdiff.real*zdiff.real+zdiff.imag*zdiff.imag)
            /(ztmp.real*ztmp.real+ztmp.imag*ztmp.imag));
      } else {
        ddiff = sqrt(zdiff.real*zdiff.real+zdiff.imag*zdiff.imag);
      }
      if(fabs(ddiff) > TOLERANCE) {
        printf("\nabs(result) = %f > %f\n", fabsf(zdiff.real), TOLERANCE);
        GA_Error("GA_Zgemm Failed", 1);
      } else if (me == 0) {
        printf("\nGA_Zgemm OK\n\n");
      }
      break;
    case C_SCPL:
      cdiff = GA_Cdot(g_b, g_b);
      if (ctmp.real != 0.0 || ctmp.imag != 0.0) {
        fdiff = sqrt((cdiff.real*cdiff.real+cdiff.imag*cdiff.imag)
            /(ctmp.real*ctmp.real+ctmp.imag*ctmp.imag));
      } else {
        fdiff = sqrt(cdiff.real*cdiff.real+cdiff.imag*cdiff.imag);
      }
      if(fabs(fdiff) > TOLERANCE) {
        printf("\nabs(result) = %f > %f\n", fabsf(cdiff.real), TOLERANCE);
        GA_Error("GA_Cgemm Failed", 1);
      } else if (me == 0) {
        printf("\nGA_Cgemm OK\n\n");
      }
      break;
    default:
      GA_Error("wrong data type", data_type);
  }
#endif

  free(abuf);
  free(bbuf);
  free(cbuf);

  switch (data_type) {
  case C_FLOAT:
    abuf = (void*)malloc(N*N*sizeof(float)/4);
    bbuf = (void*)malloc(N*N*sizeof(float)/4);
    cbuf = (void*)malloc(N*N*sizeof(float)/4);
    break;      
  case C_DBL:
    abuf = (void*)malloc(N*N*sizeof(double)/4);
    bbuf = (void*)malloc(N*N*sizeof(double)/4);
    cbuf = (void*)malloc(N*N*sizeof(double)/4);
    break;    
  case C_DCPL:
    abuf = (void*)malloc(N*N*sizeof(DoubleComplex)/4);
    bbuf = (void*)malloc(N*N*sizeof(DoubleComplex)/4);
    cbuf = (void*)malloc(N*N*sizeof(DoubleComplex)/4);
    break;
  case C_SCPL:
    abuf = (void*)malloc(N*N*sizeof(SingleComplex)/4);
    bbuf = (void*)malloc(N*N*sizeof(SingleComplex)/4);
    cbuf = (void*)malloc(N*N*sizeof(SingleComplex)/4);
    break;
  default:
    GA_Error("wrong data type", data_type);
  }

  /* Test multiply on a fraction of matrix. Start by reinitializing
   * A and B */
  GA_Zero(g_a);
  GA_Zero(g_b);
  GA_Zero(g_c);

  if (me==0) printf("\nTest patch multiply\n");

  lo[0] = N/4;
  lo[1] = N/4;
  hi[0] = 3*N/4-1;
  hi[1] = 3*N/4-1;
  ld = N/2;

  /* Set up matrix A */
  if (me==0) printf("\nInitialize A\n");
  if (me == 0) {
    for (i=N/4; i<3*N/4; i++) {
      for (j=N/4; j<3*N/4; j++) {
        switch (data_type) {
          case C_FLOAT:
            ((float*)abuf)[(i-N/4)*N/2+(j-N/4)] = (float)(i*N+j);
            break;
          case C_DBL:
            ((double*)abuf)[(i-N/4)*N/2+(j-N/4)] = (double)(i*N+j);
            break;
          case C_DCPL:
            ((DoubleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].real = (double)(i*N+j);
            ((DoubleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0;
            break;
          case C_SCPL:
            ((SingleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].real = (float)(i*N+j);
            ((SingleComplex*)abuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0;
            break;
          default:
            GA_Error("wrong data type", data_type);
        }
      }
    }
    NGA_Put(g_a,lo,hi,abuf,&ld);
  }
  GA_Sync();

  if (me==0) printf("\nInitialize B\n");
  /* Set up matrix B */
  if (me == 0) {
    for (i=N/4; i<3*N/4; i++) {
      for (j=N/4; j<3*N/4; j++) {
        switch (data_type) {
          case C_FLOAT:
            ((float*)bbuf)[(i-N/4)*N/2+(j-N/4)] = (float)(j*N+i);
            break;
          case C_DBL:
            ((double*)bbuf)[(i-N/4)*N/2+(j-N/4)] = (double)(j*N+i);
            break;
          case C_DCPL:
            ((DoubleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].real = (double)(j*N+i);
            ((DoubleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0;
            break;
          case C_SCPL:
            ((SingleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].real = (float)(j*N+i);
            ((SingleComplex*)bbuf)[(i-N/4)*N/2+(j-N/4)].imag = 1.0;
            break;
          default:
            GA_Error("wrong data type", data_type);
        }
      }
    }
    NGA_Put(g_b,lo,hi,bbuf,&ld);
  }
  GA_Sync();

  beta_flt = 0.0;
  beta_dbl = 0.0;
  beta_scpl.real = 0.0;
  beta_dcpl.real = 0.0;
  if (me==0) printf("\nPerform matrix multiply on sub-blocks\n");
  switch (data_type) {
    case C_FLOAT:
      NGA_Matmul_patch('N','N',&alpha_flt,&beta_flt,g_a,lo,hi,
        g_b,lo,hi,g_c,lo,hi);
      break;
    case C_DBL:
      NGA_Matmul_patch('N','N',&alpha_dbl,&beta_dbl,g_a,lo,hi,
        g_b,lo,hi,g_c,lo,hi);
      break;
    case C_SCPL:
      NGA_Matmul_patch('N','N',&alpha_scpl,&beta_scpl,g_a,lo,hi,
        g_b,lo,hi,g_c,lo,hi);
      break;
    case C_DCPL:
      NGA_Matmul_patch('N','N',&alpha_dcpl,&beta_dcpl,g_a,lo,hi,
        g_b,lo,hi,g_c,lo,hi);
      break;
    default:
      GA_Error("wrong data type", data_type);
  }
  GA_Sync();
#if 0
  if (0) {
  /*
  if (data_type != C_SCPL && data_type != C_DCPL) {
  */

  if (me==0) printf("\nCheck answer\n");

  /* Multiply buffers by hand */
  if (me == 0) {
    for (i=0; i<N/2; i++) {
      for (j=0; j<N/2; j++) {
        switch (data_type) {
          case C_FLOAT:
            ((float*)cbuf)[i*N/2+j] = fzero;
            break;
          case C_DBL:
            ((double*)cbuf)[i*N/2+j] = dzero;
            break;
          case C_DCPL:
            ((DoubleComplex*)cbuf)[i*N/2+j] = zzero;
            break;
          case C_SCPL:
            ((SingleComplex*)cbuf)[i*N/2+j] = czero;
            break;
          default:
            GA_Error("wrong data type", data_type);
        }
        for (k=0; k<N/2; k++) {
          switch (data_type) {
            case C_FLOAT:
              ((float*)cbuf)[i*N/2+j] += ((float*)abuf)[i*N/2+k]
                *((float*)bbuf)[k*N/2+j];
              break;
            case C_DBL:
              ((double*)cbuf)[i*N/2+j] += ((double*)abuf)[i*N/2+k]
                *((double*)bbuf)[k*N/2+j];
              break;
            case C_DCPL:
              ((DoubleComplex*)cbuf)[i*N/2+j].real +=
                (((DoubleComplex*)abuf)[i*N/2+k].real
                 *((DoubleComplex*)bbuf)[k*N/2+j].real
                 -(((DoubleComplex*)abuf)[i*N/2+k].imag
                   *((DoubleComplex*)bbuf)[k*N/2+j].imag));
              ((DoubleComplex*)cbuf)[i*N/2+j].imag +=
                (((DoubleComplex*)abuf)[i*N/2+k].real
                 *((DoubleComplex*)bbuf)[k*N/2+j].imag
                 +(((DoubleComplex*)abuf)[i*N/2+k].imag
                   *((DoubleComplex*)bbuf)[k*N/2+j].real));
              break;
            case C_SCPL:
              ((SingleComplex*)cbuf)[i*N/2+j].real +=
                (((SingleComplex*)abuf)[i*N/2+k].real
                 *((SingleComplex*)bbuf)[k*N/2+j].real
                 -(((SingleComplex*)abuf)[i*N/2+k].imag
                   *((SingleComplex*)bbuf)[k*N/2+j].imag));
              ((SingleComplex*)cbuf)[i*N/2+j].imag +=
                (((SingleComplex*)abuf)[i*N/2+k].real
                 *((SingleComplex*)bbuf)[k*N/2+j].imag
                 +(((SingleComplex*)abuf)[i*N/2+k].imag
                   *((SingleComplex*)bbuf)[k*N/2+j].real));
              break;
            default:
              GA_Error("wrong data type", data_type);
          }
        }
      }
    }
    NGA_Put(g_a,lo,hi,cbuf,&ld);
  }
  if (me == 0) printf("\n\n\n\n");

  /* Get norm of g_a */
  switch (data_type) {
    case C_FLOAT:
      ftmp = NGA_Fdot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi);
      break;
    case C_DBL:
      dtmp = NGA_Ddot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi);
      break;
    case C_DCPL:
      ztmp = NGA_Zdot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi);
      break;
    case C_SCPL:
      ctmp = NGA_Cdot_patch(g_a,'N',lo,hi,g_a,'N',lo,hi);
      break;
    default:
      GA_Error("wrong data type", data_type);
  }
  /* subtract C from A and put the results in B */
  beta_flt = -1.0;
  beta_dbl = -1.0;
  beta_scpl.real = -1.0;
  beta_dcpl.real = -1.0;
  NGA_Zero_patch(g_b,lo,hi);
  NGA_Add_patch(alpha,g_a,lo,hi,beta,g_c,lo,hi,g_b,lo,hi);
  /* evaluate the norm of the difference between the two matrices */
  switch (data_type) {
    case C_FLOAT:
      fdiff = NGA_Fdot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi);
      if (ftmp != 0.0) {
        fdiff /= ftmp;
      }
      if(fabs(fdiff) > TOLERANCE) {
        printf("\nabs(result) = %f > %f\n", fabsf(fdiff), TOLERANCE);
        GA_Error("GA_Sgemm Failed", 1);
      } else if (me == 0) {
        printf("\nGA_Sgemm OK\n\n");
      }
      break;
    case C_DBL:
      ddiff = NGA_Ddot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi);
      if (dtmp != 0.0) {
        ddiff /= dtmp;
      }
      if(fabs(ddiff) > TOLERANCE) {
        printf("\nabs(result) = %f > %f\n", fabsf(ddiff), TOLERANCE);
        GA_Error("GA_Dgemm Failed", 1);
      } else if (me == 0) {
        printf("\nGA_Dgemm OK\n\n");
      }
      break;
    case C_DCPL:
      zdiff = NGA_Zdot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi);
      if (ztmp.real != 0.0 || ztmp.imag != 0.0) {
        ddiff = sqrt((zdiff.real*zdiff.real+zdiff.imag*zdiff.imag)
            /(ztmp.real*ztmp.real+ztmp.imag*ztmp.imag));
      } else {
        ddiff = sqrt(zdiff.real*zdiff.real+zdiff.imag*zdiff.imag);
      }
      if(fabs(ddiff) > TOLERANCE) {
        printf("\nabs(result) = %f > %f\n", fabsf(zdiff.real), TOLERANCE);
        GA_Error("GA_Zgemm Failed", 1);
      } else if (me == 0) {
        printf("\nGA_Zgemm OK\n\n");
      }
      break;
    case C_SCPL:
      cdiff = NGA_Cdot_patch(g_b,'N',lo,hi,g_b,'N',lo,hi);
      if (ctmp.real != 0.0 || ctmp.imag != 0.0) {
        fdiff = sqrt((cdiff.real*cdiff.real+cdiff.imag*cdiff.imag)
            /(ctmp.real*ctmp.real+ctmp.imag*ctmp.imag));
      } else {
        fdiff = sqrt(cdiff.real*cdiff.real+cdiff.imag*cdiff.imag);
      }
      if(fabs(fdiff) > TOLERANCE) {
        printf("\nabs(result) = %f > %f\n", fabsf(cdiff.real), TOLERANCE);
        GA_Error("GA_Cgemm Failed", 1);
      } else if (me == 0) {
        printf("\nGA_Cgemm OK\n\n");
      }
      break;
    default:
      GA_Error("wrong data type", data_type);
  }

  }
#endif
  free(abuf);
  free(bbuf);
  free(cbuf);

  GA_Destroy(g_a);
  GA_Destroy(g_b);
  GA_Destroy(g_c);
}

Esempio n. 30

0

Mostra file

File: ga_add.c Progetto: jeffhammond/ga

main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, g_B, g_C, **local_C=NULL, dims[DIM]={SIZE,SIZE}, val1=5, val2=4, alpha=3, beta=2;
  int clo[DIM]={SIZE-SIZE,SIZE-SIZE}, chi[DIM]={SIZE-1,SIZE-1}, ld=SIZE;
  int **local_tm=NULL;

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();

  local_C=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    local_C[i]=(int*)malloc(SIZE*sizeof(int));

  local_tm=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    local_tm[i]=(int*)malloc(SIZE*sizeof(int));
  
  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);

  g_B = GA_Duplicate(g_A, "array_B");
  g_C = GA_Duplicate(g_A, "array_C");

  GA_Fill(g_A, &val1);
  GA_Fill(g_B, &val2);
  
  GA_Add(&alpha, g_A, &beta, g_B, g_C);

  GA_Sync();

  GA_Print(g_A);
  GA_Print(g_B);
  GA_Print(g_C);

  //printf("check 1\n");
  NGA_Get(g_C, clo, chi, local_tm, &ld);
  //printf("check 2\n");
  // GA_Sync();
  if(rank==0)
    {
      for(i=0; i<SIZE; i++)
	{
	  for(j=0; j<SIZE; j++)printf("%d ", local_tm[i][j]);
	  printf("\n");
	}
    }
  /*
  if(rank==0)
    {
      NGA_Get(g_C, clo, chi, local_C, &ld);
      
      printf("check 1 \n");
      
      for(i=0; i<SIZE; i++)
	{
	  for(j=0; j<SIZE; j++)printf("%d ", local_C[i][j]);
	  printf("\n");
	}
      
      printf("check 2\n");
      
      for(i=0; i<SIZE; i++)
	{
	  for(j=0; j<SIZE; j++)
	    if(local_C[i][j]!=(alpha*val1)+(beta*val2)) printf("GA Error : \n");
	}
    }

  */
  //GA_Sync();
  if(rank==0)
    printf("Test Completed \n");

  //  GA_Sync();

  /*
  GA_Destroy(g_A);
  GA_Destroy(g_B);
  GA_Destroy(g_C);
  */

  //*******************************************************************

  /* what would be the possible reason for GA_destroy to get failed .., 
   * solve this before consolidate the whole
   */

  GA_Terminate();
  MPI_Finalize();
 
}