Ejemplo n.º 1
0
int main(int argc, char **argv) {
    int heap=300000, stack=300000;
    int me, nprocs, i;
    double start, end;
	
    /* Initialize Message Passing library */
    MPI_Init(&argc, &argv);

    /* Initialize GA */
    GA_Initialize();
    
    /* Initialize Memory Allocator (MA) */
    if(! MA_init(C_DBL, stack, heap) ) GA_Error("MA_init failed",stack+heap);

    me     = GA_Nodeid();
    nprocs = GA_Nnodes();
    if(me==0) {
       printf("\nUsing %d processes\n\n", nprocs); fflush(stdout);
    }
     
	start = MPI_Wtime();
	for(i =0; i< 1000; i++) {
		TRANSPOSE1D();
	}
	end = MPI_Wtime();
	if(me==0) printf("  Time=%2.5e secs\n\n",end-start); 
    
    if(me==0)printf("\nTerminating ..\n");
    GA_Terminate();
    MPI_Finalize();    
}
Ejemplo n.º 2
0
int main(int argc, char** argv) 
{ 
    int nprocs,myid,nprocssq; 
    int dims[2],chunk[2]; 
    int i,j,k; 
    int stack = 100000, heap = 100000; 
    MPI_Init(&argc,&argv); 
    GA_Initialize(); 
    MA_init(C_DBL,stack,heap); 
    nprocssq = GA_Nnodes(); 
    nprocs = sqrt(nprocssq); 
    myid = GA_Nodeid(); 
    dims[0] = N; dims[1] = N; 
    chunk[0] = N/nprocs; 
    chunk[1] = N/nprocs; 
    int g_a = NGA_Create(C_DBL,2,dims,"Array A",chunk); 
    int lo[2],hi[2]; 
    NGA_Distribution(g_a,myid,lo,hi); 
    int ld[1] = {N/nprocs}; 
    void *ptr; 
    double *local; 
    printf("Myid = %d, lo = [%d,%d] , hi = [%d,%d] , ld = %d \n",myid,lo[0],lo[1],hi[0],hi[1],ld[0]); 
    NGA_Access(g_a,lo,hi,&ptr,ld); 
    local = (double*) ptr; 
    printf("Myid = %d , local[0][0] = %f\n",*local); 
    GA_Sync(); 
    GA_Destroy(g_a); 
    GA_Terminate(); 
    MPI_Finalize(); 
    return 0; 
} 
Ejemplo n.º 3
0
int main(int argc, char **argv)
{
  int rank, nprocs, n=1;
  int g_A, dims[DIM]={5,5};

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
  
  printf("check %d \n", n);  

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  if(GA_Ndim(g_A)!=DIM)
    printf("ERROR: GA_Ndim didnt return nDimension after GA_Initialize\n");
  printf("%d : %d \n", rank, GA_Ndim(g_A));

  GA_Terminate();

  if(rank==0)
    printf(" GA: Test Completed \n");
  MPI_Finalize();

  return 0;
}
Ejemplo n.º 4
0
int main(int argc, char **argv)
{

  int rank, nprocs, n=10;

  MPI_Init(&argc, &argv);
  GA_Initialize();
  MA_init(C_INT, 1000, 1000);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  
  /*
    addition_operator(rank, nprocs, n);
    multiply_operator(rank, nprocs, n);
    max_operator(rank, nprocs, n);
    min_operator(rank, nprocs, n);
    absmax_operator(rank, nprocs, n);
    absmin_operator(rank, nprocs, n);
  */

  checking_operator (rank, nprocs);
  
  if(rank==0)
    GA_PRINT_MSG();

  GA_Terminate();

  MPI_Finalize();

  return 0;
}
Ejemplo n.º 5
0
main(int argc, char **argv)
{
  int rank, nprocs;

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();

  //irregular_array1(rank);  
  //  irregular_array2(rank);  
 
  //  auto_number1(rank, nprocs);
  auto_number2(rank, nprocs);

  if(rank == 0)
    printf("Test Completed \n");

  GA_Terminate();
  MPI_Finalize();
  
}
Ejemplo n.º 6
0
int main(int argc, char **argv)
{
  int rank, nprocs, i;

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();

  for(i=1; i<=NDIM; i++)
    {
      verify_ga_dim(i);
    }

  GA_Sync();
  if(rank == 0)
    printf("Test Completed \n");

  GA_Terminate();
  MPI_Finalize();

  return 0;
}
Ejemplo n.º 7
0
// -------------------------------------------------------------
// Environment:: constructors / destructor
// -------------------------------------------------------------
Environment::Environment(int& argc, char **argv,
                         const long int& ma_stack, 
                         const long int& ma_heap)
  : p_boostEnv(argc, argv)
{
  GA_Initialize();
  MA_init(C_DBL, ma_stack, ma_heap);
}
Ejemplo n.º 8
0
extern "C" void
gridpack_initialize_parallel(int ma_stack, int ma_heap)
{
  int argc(0);
  
  int ierr = MPI_Init(&argc, NULL);
  GA_Initialize();
  MA_init(C_DBL, ma_stack, ma_heap);
}
Ejemplo n.º 9
0
main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int p_Geven, p_Godd, p_size, mod, p_size_mod, *list_even=NULL, *list_odd=NULL;
  
  MPI_Init(&argc, &argv);
  
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
  
  p_size=nprocs/2;
  mod=nprocs%2;
  p_size_mod=p_size+mod;
  list_even = (int*)malloc(p_size*sizeof(int));
  list_odd = (int*)malloc(p_size*sizeof(int));
  
  j=0;
  for(i=0; i<nprocs; i++)
    {
      if(i%2==0)
	list_even[j]=i;
      else if(i%2==1)
	list_odd[j]=i;
      else
	break;
      j++;
    }
  j=0;
  /*
  for(i=0; i<nprocs; i++)
    {
      if(i%2==1)
	{

	  j++;
	}
    }
  */
  p_Geven=GA_Pgroup_create(list_even, p_size_mod);
  p_Godd=GA_Pgroup_create(list_odd, p_size);

  if(rank%2==0)
    printf("%d: My ID is %d :: %d -- even \n", rank, GA_Pgroup_nodeid(p_Geven), GA_Pgroup_nnodes(p_Geven));
  else
    printf("%d: My ID is %d :: %d --- odd\n", rank, GA_Pgroup_nodeid(p_Godd), GA_Pgroup_nnodes(p_Godd));
  
  GA_Sync();
  if(rank==0)
    GA_PRINT_MSG();
  
  GA_Terminate();
  MPI_Finalize();
}
Ejemplo n.º 10
0
int main(int argc, char **argv)
{
    int status, me;
    int max_arrays = 10;
    double max_sz = 1e8, max_disk = 1e10, max_mem = 1e6;
    int stack = 120000, heap = 3200000;
    int numfiles, numioprocs;
    int total, nproc;

    MP_INIT(argc,argv);
    GA_Initialize();
    me = GA_Nodeid();
    nproc = GA_Nnodes();
    total = pow(SIZE,NDIM)*sizeof(double);
    if (!GA_Uses_ma()) {
        if (GA_Nodeid() == 0) {
            printf("GA is not using MA\n");
        }
        stack = 100000;
        heap  = (int)(2.2*(float)(total));
    }

    if (MA_init(MT_F_DBL, stack, heap) ) {
        if (DRA_Init(max_arrays, max_sz, max_disk, max_mem) != 0)
            GA_Error("DRA_Init failed: ",0);
        if (USER_CONFIG == 0) {
            numfiles = -1;
            numioprocs = -1;
        } else if (USER_CONFIG == 1) {
            numfiles = 1;
            numioprocs = GA_Cluster_nnodes();
        } else if (USER_CONFIG == 2) {
            numfiles = GA_Cluster_nnodes();
            numioprocs = GA_Cluster_nnodes();
        } else {
            numfiles = 1;
            numioprocs = 1;
        }
        if (me==0) {
            printf("Disk resident arrays configured as:\n");
            printf("    Number of files: %d\n",numfiles);
            printf("    Number of I/O processors: %d\n",numioprocs);
        }
        DRA_Set_default_config(numfiles,numioprocs);
        if (me == 0) printf("\n");
        if (me == 0) printf("TESTING PERFORMANCE OF DISK ARRAYS\n");
        if (me == 0) printf("\n");
        test_io_dbl();
        status = DRA_Terminate();
        GA_Terminate();
    } else {
        printf("MA_init failed\n");
    }
    if(me == 0) printf("all done ...\n");
    MP_FINALIZE();
    return 0;
}
Ejemplo n.º 11
0
main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, g_B, g_C, local_C[DIM][DIM], dims[DIM]={5,5};
  int val_A=5, val_B=3, ld=DIM, max; 
  int lo[DIM]={2,2}, hi[DIM]={4,4}, blo[DIM]={0,0}, bhi[DIM]={2,2}, clo[DIM]={1,1}, chi[DIM]={3,3};

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
  
  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  g_B = NGA_Create(C_INT, DIM, dims, "array_B", NULL);
  g_C = NGA_Create(C_INT, DIM, dims, "array_C", NULL);

  GA_Fill(g_A, &val_A);
  GA_Fill(g_B, &val_B);
  GA_Zero(g_C);
  GA_Elem_maximum_patch(g_A, lo, hi, g_B, blo, bhi, g_C, clo, chi);
  GA_Print(g_C);
  GA_Sync();
  
  NGA_Get(g_C, clo, chi, local_C, &ld);
  if(rank==1)
    {
  
      for(i=0; i<DIM; i++)
	{
	  for(j=0; j<DIM; j++)printf("%d ", local_C[i][j]);
	  printf("\n");
	}
      
      if(val_A>val_B) max=val_A;
      else max=val_B;

      for(i=0; i<DIM; i++)
	{
	  for(j=0; j<DIM; j++)
	    if(local_C[i][j]!=max) printf("GA Error : \n");
	}
      
    }
    
  GA_Sync();
  if(rank == 0)
    printf("Test Completed \n");

  GA_Terminate();
  MPI_Finalize();

}
Ejemplo n.º 12
0
main(int argc, char **argv)
{
  int rank, nprocs;
  int g_A, dims[D]={SIZE,SIZE}, *local_A=NULL, *local_G=NULL, **sub_array=NULL, **s_array=NULL;
  int i, j, value=5;
  
  MPI_Init(&argc, &argv);
  GA_Initialize();
  MA_init(C_INT, 1000, 1000);
  
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  s_array=(int**)malloc(N*sizeof(int*));
  for(i=0; i<N; i++)
    {
      s_array[i]=(int*)malloc(D*sizeof(int));
      for(j=0; j<D; j++) s_array[i][j]=rand()%10;
    }

  sub_array=(int**)malloc(N*sizeof(int*));
  for(i=0; i<N; i++)
    {
      sub_array[i]=(int*)malloc(D*sizeof(int));
      for(j=0; j<D; j++) sub_array[i][j]=rand()%10;
    }

  for(i=0; i<N; i++) local_A=(int*)malloc(N*sizeof(int));

  for(i=0; i<N; i++) local_G=(int*)malloc(N*sizeof(int));
  
  g_A=NGA_Create(C_INT, D, dims, "array_A", NULL);
  GA_Fill(g_A, &value);
  GA_Sync();
                                                   
  NGA_Scatter(g_A, local_A, s_array, N);
  NGA_Gather(g_A, local_G, s_array, N);
  GA_Sync();
  GA_Print(g_A);

  if(rank==0)
    {
      for(i=0; i<N; i++)
	if(local_G[i]!=local_A[i]) printf("GA Error: \n");
    }
  GA_Sync();
  if(rank==0)
    GA_PRINT_MSG();

  GA_Terminate();
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 13
0
main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, **local_A=NULL, **local_B=NULL; 
  int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=5, value=5;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);
  GA_Initialize();

  local_A=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    {
      local_A[i]=(int*)malloc(SIZE*sizeof(int));
      for(j=0; j<SIZE; j++) local_A[i][j]=rand()%10;
    }

  local_B=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    {
      local_B[i]=(int*)malloc(SIZE*sizeof(int));
      for(j=0; j<SIZE; j++) local_B[i][j]=rand()%10;
    }

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  GA_Zero(g_A);
  
  if(rank==0)
    {
      NGA_Put(g_A, lo, hi, local_A, &ld);
      NGA_Get(g_A, lo, hi, local_B, &ld);

      for(i=0; i<SIZE; i++)
	{
	  for(j=0; j<SIZE; j++)
	    if(local_A[i][j]!=local_B[i][j]) GA_ERROR_MSG();
	}
    }
  
  GA_Sync();
  GA_Destroy(g_A);
  
  if(rank == 0) GA_PRINT_MSG();

  GA_Terminate();
  MPI_Finalize();
}
Ejemplo n.º 14
0
main(int argc, char **argv)
{
    int rank, nprocs, i, j;
    int g_A, g_B;
    int dims[MAX_DIM], val=4, ndim, re;


    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

    MA_init(C_INT, 1000, 1000);
    GA_Initialize();

    for(i=1; i<=MAX_DIM; i++)
    {
        ndim=i;
        dims[i]=SIZE;
        //      for(j=0; j<ndim; j++)

        g_A = NGA_Create(C_INT, ndim, dims, "array_A", NULL);
        g_B = NGA_Create(C_INT, ndim, dims, "array_B", NULL);

        if(!g_A)
            GA_Error("GA Error: no global array exists \n", ndim);
        if(!g_B)
            GA_Error("GA Error: no global array exists \n", ndim);
    }

    GA_Sync();

    GA_Fill(g_A, &val);
    re=GA_Solve(g_A, g_B);

    if(re==0)
        printf("Cholesky Fact is Successful \n");
    else if (re >0)
        printf("Cholesky Fact couldn't be completed \n");
    else
        printf("An Error occured\n");
    if(rank == 0)
        GA_PRINT_MSG();

    GA_Destroy(g_A);
    GA_Destroy(g_B);
    GA_Terminate();
    MPI_Finalize();
}
int main(int argc, char **argv) 
{
	int proc, nprocs;
	int M, N, K; /* */
	int blockX_len, blockY_len;
	int heap=3000000, stack=3000000;

	if (argc == 6) {
		M = atoi(argv[1]);
		N = atoi(argv[2]);
		K = atoi(argv[3]);
		blockX_len = atoi(argv[4]);
		blockY_len = atoi(argv[5]);
	}
	else {
		printf("Please enter ./a.out <M> <N> <K> <BLOCK-X-LEN> <BLOCK-Y-LEN>");
		exit(-1);
	}

	MPI_Init(&argc, &argv);

	GA_Initialize();

	if(! MA_init(C_DBL, stack, heap) ) GA_Error("MA_init failed",stack+heap);

	proc   = GA_Nodeid();
	nprocs = GA_Nnodes();

	if(proc == 0) {
		printf("Using %d processes\n", nprocs); 
		fflush(stdout);
	}

	matrix_multiply(M, N, K, blockX_len, blockY_len);

	if(proc == 0)
		printf("\nTerminating ..\n");

	GA_Terminate();

	MPI_Finalize();    

	return 0;
}
Ejemplo n.º 16
0
main(int argc, char **argv)
{
  int rank, nprocs;
  int g_A, g_V,  val1=5, val2=5, local_A[SIZE][SIZE], dims_V=SIZE, local_V[dims_V]; 
  int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={1,1}, hi[DIM]={2,2}, ld=5, i, j;
  int loV=0, hiV=dims_V-1;

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  g_V = NGA_Create(C_INT, 1, &dims_V, "array_A", NULL);
  GA_Fill(g_A, &val1);
  GA_Print(g_A);
  printf("\n");

  GA_Scale(g_A, &val2);
  GA_Print(g_A);

  GA_Get_diag(g_A, g_V);
  GA_Print(g_V);
  
  NGA_Get(g_A, lo, hi, local_A, &ld);
  NGA_Get(g_V, &loV, &hiV, local_V, &ld);

  if(rank==1)
    {
      for(i=0; i<dims_V; i++)
	if(local_V[i]!=val1*val2) printf(" GA Error: \n");
    }

  if(rank == 0)
    printf("Test Completed \n");

  GA_Terminate();
  MPI_Finalize();
}
Ejemplo n.º 17
0
main(int argc, char **argv)
{
  int rank, nprocs;

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);
  //  MA_init(C_DBL, 1000, 1000);
  
  GA_Initialize();
  
  integer_dot(rank, nprocs);
  //float_dot(rank, nprocs);
  
  GA_Terminate();
  MPI_Finalize();
}
Ejemplo n.º 18
0
Archivo: gada.c Proyecto: adrielb/DCell
int main(int argc, char **args)
{
  PetscErrorCode  ierr;
  ierr = PetscInitialize(&argc, &args, (char *) 0, ""); CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD, "Start %s\n", __FILE__); CHKERRQ(ierr);
  GA_Initialize();
  ierr = MA_init(MT_DBL, 1000, 1000 );
  if( !ierr ) GA_Error("\n\n\nga allocaltion failed\n\n",ierr);
  
  size_t len = 1024;
  char name[1024];
  PetscGetHostName(name,len);
  PetscPrintf(PETSC_COMM_SELF, "Hello from %s\n", name);
  
  ierr = testCreate3D(); CHKERRQ(ierr);
  
  GA_Terminate();
  ierr = PetscPrintf(PETSC_COMM_WORLD, "End %s\n", __FILE__); CHKERRQ(ierr);
  ierr = PetscFinalize(); CHKERRQ(ierr);
}
Ejemplo n.º 19
0
int main(int argc, char **argv) {
    int heap=300000, stack=300000;
    int me, nprocs;
    
    /* Step1: Initialize Message Passing library */
#ifdef MPI
    MPI_Init(&argc, &argv);   /* initialize MPI */
#else
    PBEGIN_(argc, argv);      /* initialize TCGMSG */
#endif

    /* Step2: Initialize GA */
    /* ### intialize the GA library */
    GA_Initialize();

    
    /* Step3: Initialize Memory Allocator (MA) */
    if(! MA_init(C_DBL, stack, heap) ) GA_Error("MA_init failed",stack+heap);

    /* ### assign the local processor ID to the int variable "me"
     * ### and the total number of processors to the int variable
     * ### "nprocs" */
    me     = GA_Nodeid();
    nprocs = GA_Nnodes();
    if(me==0) {
       printf("\nUsing %d processes\n\n", nprocs); fflush(stdout);
    }
    
       
    TRANSPOSE1D();
    
    if(me==0)printf("\nTerminating ..\n");
    /* ### terminate the GA library */
    GA_Terminate();

#ifdef MPI
    MPI_Finalize();    
#else
    PEND_();
#endif
}
Ejemplo n.º 20
0
main(int argc, char **argv)
{
  // Initialize MPI libraries
  int ierr = MPI_Init(&argc, &argv);
  // Initialize Math libraries
  gridpack::math::Initialize();

  GA_Initialize();
  int stack = 200000, heap = 200000;
  MA_init(C_DBL, stack, heap);

  gridpack::resistor_grid::RGApp app;
  app.execute(argc, argv);

  GA_Terminate();

  // Terminate Math libraries
  gridpack::math::Finalize();
  // Clean up MPI libraries
  ierr = MPI_Finalize();
}
Ejemplo n.º 21
0
main(int argc, char **argv)
{
  int rank, nprocs;
  
  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
  fillonly(rank, nprocs);
  fillandscale(rank, nprocs);

  GA_Sync();
  if(rank == 0)
    printf("Test Completed \n");

  GA_Terminate();
  MPI_Finalize();
}
Ejemplo n.º 22
0
main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, g_B, **local_value=NULL;

  int dims[DIM]={SIZE,SIZE}, lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE;

  local_value=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    local_value[i]=(int*)malloc(SIZE*sizeof(int));

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();

  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  g_B = NGA_Create(C_INT, DIM, dims, "array_B", NULL);

  for(i=0; i<SIZE; i++)
    for(j=0; j<SIZE; j++)
      local_value[i][j]=rand()%10;
	
  if(rank==0) NGA_Put(g_A, lo, hi, local_value, &ld);
  GA_Transpose(g_A, g_B);
  if(rank==0) validate_transpose(g_A, g_B, lo, hi, ld);

  GA_Sync();
  if(rank == 1) GA_PRINT_MSG();
  
  GA_Terminate();
  MPI_Finalize();
}
Ejemplo n.º 23
0
int main(int argc, char **argv)
{
  int rank, nprocs;
  int g_A;
  int *local_A=NULL, *local_B=NULL, *output_A=NULL;
  int dims[DIM]={SIZE,SIZE}, dims2[DIM], lo[DIM]={SIZE-SIZE,SIZE-SIZE}, hi[DIM]={SIZE-1,SIZE-1}, ld=SIZE;
  int value=SIZE;

#if defined(USE_ELEMENTAL)
  // initialize Elemental (which will initialize MPI)
  ElInitialize( &argc, &argv );
  ElMPICommRank( MPI_COMM_WORLD, &rank );
  ElMPICommSize( MPI_COMM_WORLD, &nprocs );
  // instantiate el::global array
  ElGlobalArraysConstruct_i( &eliga );
  // initialize global arrays
  ElGlobalArraysInitialize_i( eliga );
#else
  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
#endif

  local_A=(int*)malloc(SIZE*SIZE*sizeof(int));
  output_A=(int*)malloc(SIZE*SIZE*sizeof(int));
  memset (output_A, 0, SIZE*SIZE*sizeof(int));
  for(int j=0; j<SIZE; j++)
      for(int i=0; i<SIZE; i++) local_A[i+j*ld]=(i + j);
      //for(int i=0; i<SIZE; i++) local_A[i+j*ld]=(rand()%10);

  local_B=(int*)malloc(SIZE*SIZE*sizeof(int));
  memset (local_B, 0, SIZE*SIZE*sizeof(int));

  // nb handle
#if defined(USE_ELEMENTAL)
  typedef ElInt ga_nbhdl_t;
#endif
  ga_nbhdl_t nbnb;

#if defined(USE_ELEMENTAL)
  ElGlobalArraysCreate_i( eliga, DIM, dims, "array_A", NULL, &g_A );
  ElGlobalArraysFill_i( eliga, g_A, &value );
#else
  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);
  GA_Fill(g_A, &value);
#endif

  if (rank == 0) printf ("Initial global array:\n");
#if defined(USE_ELEMENTAL)
  ElGlobalArraysPrint_i( eliga, g_A );
#else
  GA_Print(g_A);
#endif

  for (int i = 0; i < NITERS; i++)
  {
      // acc data
#if defined(USE_ELEMENTAL)
      ElGlobalArraysNBAccumulate_i( eliga, g_A, lo, hi, local_A, &ld, &value, &nbnb );
#else
      NGA_NbAcc(g_A, lo, hi, local_A, &ld, &value, &nbnb);
#endif
      
      // updated output
      MPI_Reduce (local_A, output_A, SIZE*SIZE, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

#if defined(USE_ELEMENTAL)
      ElGlobalArraysNBWait_i( eliga, &nbnb );
#else
      NGA_NbWait (&nbnb);
#endif 

      // get
      if (rank == 0) printf ("Get in iter #%d\n", i);
#if defined(USE_ELEMENTAL)
      ElGlobalArraysSync_i( eliga );
      ElGlobalArraysGet_i( eliga, g_A, lo, hi, local_B, &ld );
      ElGlobalArraysPrint_i( eliga, g_A );
#else
      GA_Sync();
      NGA_Get(g_A, lo, hi, local_B, &ld);
      GA_Print(g_A);
#endif
  } // end of iters

  if(rank==0)
    {
      printf(" Alpha (multiplier): %d\n", value);
      printf(" Original local buffer (before accumulation): \n");

      for(int i=0; i<SIZE; i++)
	{
	  for(int j=0; j<SIZE; j++)
	    printf("%d ", local_A[i*ld+j]);
	  printf("\n");
	}
      printf("\n");
      printf(" Get returns: \n");
      for(int i=0; i<SIZE; i++)
	{
	  for(int j=0; j<SIZE; j++)
	    printf("%d ", local_B[i*ld + j]);
	  printf("\n");
	}

      printf("\n");
      for(int i=0; i<SIZE; i++)
	{
	  for(int j=0; j<SIZE; j++)
	    {
	      if(local_B[i*ld+j]!=(value + (NITERS * value * (output_A[i*ld+j]))))
		  GA_Error("ERROR", -99);
	    }
	}
    }
#if defined(USE_ELEMENTAL)
  ElGlobalArraysDestroy_i( eliga, g_A );
#else
  GA_Destroy(g_A);
#endif
  if(rank == 0)
    printf ("OK. Test passed\n");

    free (local_A);
    free (local_B);
    free (output_A);

#if defined(USE_ELEMENTAL)
    ElGlobalArraysTerminate_i( eliga );
    // call el::global arrays destructor
    ElGlobalArraysDestruct_i( eliga );
    ElFinalize();
#else
    GA_Terminate();
    MPI_Finalize();
#endif
}
Ejemplo n.º 24
0
int main(int argc, char **argv) {
    int me;
    int g_a;
    int status;
    int i,j;
    int dims[] = {n,n};
    int proc_group[PROC_LIST_SIZE],proclist[PROC_LIST_SIZE],inode;
    int sbuf[1],rbuf[1];
    MPI_Comm comm;

    MP_INIT(argc,argv);
    GA_Initialize();
    me = GA_Nodeid();

    status = MA_init(MT_DBL, 100000, 100000);
    if (!status) GA_Error("ma_init failed",-1);
    status = MA_set_auto_verify(1);
    status = MA_set_hard_fail(1);
    status = MA_set_error_print(1);

    inode = GA_Cluster_nodeid();
    if (me == 0) {
        printf("there are %d nodes, node 0 has %d procs\n",
                GA_Cluster_nnodes(), GA_Cluster_nprocs(0));
        fflush(stdout);
    }
    GA_Sync();
    for (i=0; i<GA_Cluster_nnodes(); ++i) {
        for (j=0; j<GA_Cluster_nprocs(i); ++j) {
            proclist[j]=GA_Cluster_procid(i,j);
        }
        proc_group[i]=GA_Pgroup_create(proclist,GA_Cluster_nprocs(i));
    }
    GA_Sync();
    for (i=0; i<GA_Cluster_nnodes(); ++i) {
        if (i == inode) {
            printf("%d joining group %d\n", me, proc_group[inode]);
            GA_Pgroup_set_default(proc_group[inode]);
            g_a = NGA_Create(C_DBL, 2, dims, "a", NULL);
            if (!g_a) GA_Error("NGA_Create failed",-1);
            printf("%d Created array of  group %d as proc no. %d\n",
                    me, proc_group[inode], GA_Nodeid());
            GA_Print_distribution(g_a);
            comm = GA_MPI_Comm_pgroup_default();
            if (comm != MPI_COMM_NULL) {
                sbuf[0] = GA_Nodeid();
                status = MPI_Allreduce(sbuf, rbuf, 1, MPI_INT, MPI_MAX, comm);
                printf("%d max nodeid is %d\n", me, rbuf[0]);
                if ((rbuf[0]+1) != GA_Cluster_nprocs(i)) {
                    GA_Error("MPI_Allreduce failed",1);
                }
            }
            else {
                printf("MPI_Comm was null!\n");
            }
            GA_Pgroup_set_default(GA_Pgroup_get_world());
        }
        GA_Sync();
    }

    GA_Terminate();
    MP_FINALIZE();

    return 0;
}
Ejemplo n.º 25
0
main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, g_B, g_C, local_C[DIM][DIM], dims[DIM]={5,5}, val1=5, val2=4, alpha=3, beta=2, ld=5;
  int alo[DIM]={2,2}, ahi[DIM]={3,3}, blo[DIM]={2,2}, bhi[DIM]={3,3}, clo[DIM]={1,1}, chi[DIM]={2,2};

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();
  
  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);

  g_B = GA_Duplicate(g_A, "array_B");
  g_C = GA_Duplicate(g_A, "array_C");

  GA_Fill(g_A, &val1);
  GA_Fill(g_B, &val2);
  GA_Zero(g_C);

  NGA_Add_patch(&alpha, g_A, clo, chi, &beta, g_B, blo, bhi, g_C, clo, chi);

  GA_Sync();
  GA_Print(g_A);
  GA_Print(g_B);
  GA_Print(g_C);

  NGA_Get(g_C, clo, chi, local_C, &ld);

  //printf("check 1 \n");

  for(i=0; i<DIM; i++)
    {
      for(j=0; j<DIM; j++)printf("%d ", local_C[i][j]);
      printf("\n");
    }
  
  if(rank == 0)
    {
      printf("check 2\n");
    
      for(i=0; i<DIM; i++)
	{
	  for(j=0; j<DIM; j++)
	    if(local_C[i][j]!=(alpha*val1)+(beta*val2)) printf("GA Error : \n");
	}
    }
  
  if(rank==0)
    GA_PRINT_MSG();

  GA_Sync();

  /*
  GA_Destroy(g_A);
  GA_Destroy(g_B);
  GA_Destroy(g_C);
  */

  //*******************************************************************

  /* what would be the possible reason for GA_destroy to get failed .., 
   * solve this before consolidate the whole
   */

  GA_Terminate();
  MPI_Finalize();
 
}
Ejemplo n.º 26
0
int main(int argc, char**argv)
{
  int nprocs, me;
  int i,j;
  MPI_Init(&argc,&argv);
  MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD,&me);
  GA_Initialize();
  const int heap=3000000, stack=300000;
  if(! MA_init(C_INT,stack,heap) ) GA_Error((char *) "MA_init failed",stack+heap /*error code*/);
  int Nx=97; int Ny=97; int Nz = 97;
  Nx+=3; Ny+=3; Nz+=3;
  int data[Nx*Ny*Nz];
  int num_splines = 32;
  int g_a,dims[4]={Nx,Ny,Nz,num_splines},chunk[4]={-1,-1,-1,num_splines};
  int width[4] = {3, 3, 3, 0};
  int type=C_INT;
  //g_a=NGA_Create(type,4,dims,"Coefs",chunk);
  g_a=NGA_Create_ghosts(type, 4, dims, width, "Coefs", chunk);
  int lo[4],hi[4],ld[3];
  //double value=9.0; GA_Fill(g_a,&value);
  GA_Print_distribution(g_a);
  fflush(stdout);
  if(me==0)
  {
      for (i=0; i<num_splines; i++)
      {
          int x, y, z;
          for (x=0; x<Nx; x++)
              for (y=0; y<Ny; y++)
                  for (z=0; z<Nz; z++)
                  {   j=x*(Ny*Nz)+y*Nz+z;
              data[j] = (x*100*100+y*100+z)*100+i;}
          lo[0]=lo[1]=lo[2]=0;
          hi[0]=Nx-1;hi[1]=Ny-1;hi[2]=Nz-1;
          lo[3]=hi[3]=i%num_splines;
          ld[0]=Ny;ld[1]=Nz;ld[2]=1;
          NGA_Put(g_a,lo,hi,data,ld);
      }
  }
  GA_Update_ghosts(g_a);
  GA_Sync();
  printf("done\n"),fflush(stdout);
  ga_coefs_t *ga_coefs = malloc(sizeof(ga_coefs_t));
  ga_coefs->Mx = Nx;
  ga_coefs->My = Ny;
  ga_coefs->Mz = Nz;
  ga_coefs->nsplines = num_splines;
  ga_coefs->g_a=g_a;
  int *coefs1 = (int*)malloc((size_t)1*sizeof(int)*4*4*4*num_splines);
  int ix,iy,iz;
  Nx-=3; Ny-=3; Nz-=3;
  ga_coefs->sumt=ga_coefs->amount=0;
  NGA_Distribution(g_a,me,lo,hi);
  GA_Print_distribution(g_a);
  int low[16][4],high[16][4];
  for(i=0;i<nprocs;i++)
      NGA_Distribution(g_a,i,low[i],high[i]);
  srand ( time(NULL) );
  int k=GA_Nodeid();
  printf("%d: low[k]=%d high[k]=%d\n", GA_Nodeid(), low[k][2], high[k][2]);
  int unequal=0;
  for(i=0;i<1000;i++) {
      ix=rand_index(low[k][0],high[k][0]);
      if(ix+3>=dims[0]) ix=low[k][0];
      iy=rand_index(low[k][1],high[k][1]);
      if(iy+3>=dims[1]) iy=low[k][1];
      iz=rand_index(low[k][2],high[k][2]);
      if(iz+3>=dims[2]) iz=low[k][2];
      coefs_ga_get_3d(ga_coefs,coefs1,ix,iy,iz);
      long get_sum=mini_cube_sum(coefs1, ga_coefs->nsplines);
      long ghost_sum=coefs_ghost_access_3d(ga_coefs->g_a, ix, iy, iz, ga_coefs->nsplines);
      if(get_sum!=ghost_sum) {
      printf("ixyz=\t%d\t%d\t%d\t", ix, iy, iz);
          printf("get_sum=%ld ghost_sum=%ld\n", get_sum, ghost_sum);
          unequal++;
      }
  }
  printf("unequal count=%d\n", unequal);
  free(coefs1);
  GA_Terminate();
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 27
0
int main (int argc, char **argv)
{
  double startTime;
  int info;			/* used to check for functions returning nonzeros */
  GAVec ga_x;        		/* solution vector */
  TAO_SOLVER tao;		/* TAO_SOLVER solver context */
  TAO_GA_APPLICATION taoapp;	/* TAO application context */
  TaoTerminateReason reason;
  AppCtx user;			/* user-defined application context */

  /*initialize GA and MPI */
  int heap = 400000, stack = 400000;
  MPI_Init (&argc, &argv);	/* initialize MPI */
  GA_Initialize ();		/* initialize GA */
  user.me = GA_Nodeid ();
  user.nproc = GA_Nnodes ();
  startTime = MPI_Wtime();
  
  if (user.me == 0) {
    if (GA_Uses_fapi ())
      GA_Error ("Program runs with C array API only", 0);
    printf ("Using %ld processes\n", (long) user.nproc);
    fflush (stdout);
  }
  heap /= user.nproc;
  stack /= user.nproc;
  if (!MA_init (MT_F_DBL, stack, heap))
    GA_Error ("MA_init failed", stack + heap);	/* initialize memory allocator */
  
  /* Initialize TAO */
  TaoInitialize (&argc, &argv, (char *) 0, help);

  /* Initialize problem parameters */
  user.ndim = NDIM;
  user.natoms = NATOMS;
  user.BlockSize = BLOCKSIZE;


  /* Allocate vectors for the solution and gradient */
  int dims[2];
  dims[0] = user.ndim*user.natoms;
  ga_x = NGA_Create (C_DBL, 1, dims, "GA_X", NULL);
  if (!ga_x) GA_Error ("lennard-jones.main::NGA_Create ga_x", ga_x);

  /* Set up structures for data distribution */
  info = SetupBlocks(&user); CHKERRQ(info);


  /* The TAO code begins here */
  /* Create TAO solver with desired solution method */
  info = TaoCreate (MPI_COMM_WORLD, "tao_lmvm", &tao); CHKERRQ(info);
  info = TaoGAApplicationCreate (MPI_COMM_WORLD, &taoapp); CHKERRQ(info);

  /* Set the initial solution */
  info = InitializeVariables(ga_x, &user); CHKERRQ(info);
  info = TaoGAAppSetInitialSolutionVec(taoapp, ga_x); CHKERRQ(info);

  /* Set routines for function, gradient */
  info = TaoGAAppSetObjectiveAndGradientRoutine (taoapp, FormFunctionGradient, 
					      (void *) &user); CHKERRQ(info);

  /* Check for TAO command line options */
  info = TaoSetFromOptions (tao); CHKERRQ(info);


  /* SOLVE THE APPLICATION */
  info = TaoSolveGAApplication (taoapp, tao); CHKERRQ(info);

  /*  To View TAO solver information use */
  info = TaoView(tao); CHKERRQ(info);

  /* Get termination information */
  info = TaoGetTerminationReason (tao, &reason);
  if(info) GA_Error("lennard-jones.main.TaoGetTerminationReason",info);
  if (user.me == 0) {
    if (reason <= 0)
      printf("Try a different TAO method, adjust some parameters, or check the function evaluation routines\n");
    
    printf("WALL TIME TAKEN = %lf\n", MPI_Wtime()-startTime);
    /*output the solutions */ 

    printf ("The solution is :\n");
  }
  GA_Print (ga_x);




  /* Free TAO data structures */
  info = TaoDestroy (tao); CHKERRQ(info);
  info = TaoGAAppDestroy (taoapp); CHKERRQ(info);

  /* Free GA data structures */
  GA_Destroy (ga_x);
  if (!MA_pop_stack(user.memHandle)) 
    ga_error("Main::MA_pop_stack for memHandle failed",0);

  /* Finalize TAO, GA, and MPI */
  TaoFinalize ();
  GA_Terminate ();
  MPI_Finalize ();

  return 0;
}
Ejemplo n.º 28
0
int main(int argc, char **argv) {
    int i, matrix_size;
    int heap=20000000, stack=200000000;
    double *A=NULL;
    
    if(argc != 2) 
    {
       printf("Usage Error\n\t Usage: <program> <matrix_size>\n");
       exit(0);
    }

    matrix_size = atoi(argv[1]);
    
    if(matrix_size <= 0) 
    {
       printf("Error: matrix size (%d) should be > 0\n",
              matrix_size);
       GA_Error("matrix size should be >0", 1);
    }
    
    /* *****************************************************************
     * Initialize MPI/TCGMSG-MPI, GA and MA
     * *****************************************************************/
#ifdef MPI
#ifdef DCMF
    int desired = MPI_THREAD_MULTIPLE;
    int provided;
    MPI_Init_thread(&argc, &argv, desired, &provided);
    if ( provided != MPI_THREAD_MULTIPLE ) printf("provided != MPI_THREAD_MULTIPLE\n");
#else
  MPI_Init (&argc, &argv);	/* initialize MPI */
#endif
#else
    PBEGIN_(argc, argv);    /* initialize TCGMSG-MPI */
#endif
    
    GA_Initialize();        /* initialize GA */

    me     = GA_Nodeid();
    nprocs = GA_Nnodes();

    heap /= nprocs;
    stack /= nprocs;

    if(! MA_init(MT_F_DBL, stack, heap)) /* initialize MA */
    {
       GA_Error("MA_init failed",stack+heap);
    }

    /* create/initialize the matrix */    
    if((A = (double*)malloc(matrix_size*matrix_size*sizeof(double))) == NULL) 
    {
       GA_Error("malloc failed", matrix_size*matrix_size*sizeof(double));
    }

    for(i=0; i<2; i++) /* 5 runs */
    {
       init_array(A, matrix_size);
#if DEBUG
       if(me==0) print_array(A, matrix_size);
#endif
       /* *****************************************************************
        * Perform LU Factorization
        * *****************************************************************/
       ga_lu(A, matrix_size);
    }
    
    free(A);
    
    /* *****************************************************************
     * Terminate MPI/TCGMSG-MPI, GA and MA
     * *****************************************************************/
    if(me==0)printf("Terminating ..\n");
    GA_Terminate();

#ifdef MPI
    MPI_Finalize();
#else
    PEND_();
#endif
    
    return 0;
}
Ejemplo n.º 29
0
main(int argc, char **argv)
{
  int rank, nprocs;
  int g_A, dims[D]={5,10}, local_A[N], local_G[N], **sub_array=NULL, **s_array=NULL;
  int i, j, value=5;
  
  MPI_Init(&argc, &argv);
  GA_Initialize();
  MA_init(C_INT, 1000, 1000);
  
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  s_array=(int**)malloc(N*sizeof(int*));
  for(i=0; i<N; i++)
    {
      s_array[i]=(int*)malloc(D*sizeof(int));
      for(j=0; j<D; j++) s_array[i][j]=rand()%5;
    }

  sub_array=(int**)malloc(N*sizeof(int*));
  for(i=0; i<N; i++)
    {
      sub_array[i]=(int*)malloc(D*sizeof(int));
      for(j=0; j<D; j++) sub_array[i][j]=rand()%5;
    }


  for(i=0; i<N; i++)
    //local_A=(int*)malloc(N*sizeof(int));
  
  /*
   * depends on the value of array ..we can generate the location values in randon 
   * we can also use the if-condition
   */
  
  // PRINTing all the genrated array for reference 

  for(i=0; i<N; i++)
    {
      for(j=0; j<D; j++)printf("%d ",s_array[i][j]);
      printf("\n");
    } 

  printf("\n");
  for(i=0; i<N; i++)
    {
      for(j=0; j<D; j++)printf("%d ",sub_array[i][j]);
      printf("\n");
    } 

  printf("\n");
  for(i=0; i<N; i++)printf("%d \n",local_A[i]=rand()%5+1);

  //   PRINT done - now creating array 
  g_A=NGA_Create(C_INT, D, dims, "array_A", NULL);
  GA_Fill(g_A, &value);
  GA_Sync();
                                                   
  NGA_Scatter(g_A, local_A, s_array, N);
  NGA_Gather(g_A, local_G, s_array, N);
  GA_Sync();
  GA_Print(g_A);

  for(i=0; i<N; i++)printf("%d \n",local_G[i]);
  printf("\n");

  if(rank==0)
    {
      for(i=0; i<N; i++)
	if(local_G[i]!=local_A[i]) printf("GA Error: \n");
    }
  GA_Sync();
  if(rank==0)
    GA_PRINT_MSG();

  GA_Terminate();
  MPI_Finalize();
  return 0;
}
Ejemplo n.º 30
0
main(int argc, char **argv)
{
  int rank, nprocs, i, j;
  int g_A, g_B, g_C, **local_C=NULL, dims[DIM]={SIZE,SIZE}, val1=5, val2=4, alpha=3, beta=2;
  int clo[DIM]={SIZE-SIZE,SIZE-SIZE}, chi[DIM]={SIZE-1,SIZE-1}, ld=SIZE;
  int **local_tm=NULL;

  MPI_Init(&argc, &argv);

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  MA_init(C_INT, 1000, 1000);

  GA_Initialize();

  local_C=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    local_C[i]=(int*)malloc(SIZE*sizeof(int));

  local_tm=(int**)malloc(SIZE*sizeof(int*));
  for(i=0; i<SIZE; i++)
    local_tm[i]=(int*)malloc(SIZE*sizeof(int));
  
  g_A = NGA_Create(C_INT, DIM, dims, "array_A", NULL);

  g_B = GA_Duplicate(g_A, "array_B");
  g_C = GA_Duplicate(g_A, "array_C");

  GA_Fill(g_A, &val1);
  GA_Fill(g_B, &val2);
  
  GA_Add(&alpha, g_A, &beta, g_B, g_C);

  GA_Sync();

  GA_Print(g_A);
  GA_Print(g_B);
  GA_Print(g_C);

  //printf("check 1\n");
  NGA_Get(g_C, clo, chi, local_tm, &ld);
  //printf("check 2\n");
  // GA_Sync();
  if(rank==0)
    {
      for(i=0; i<SIZE; i++)
	{
	  for(j=0; j<SIZE; j++)printf("%d ", local_tm[i][j]);
	  printf("\n");
	}
    }
  /*
  if(rank==0)
    {
      NGA_Get(g_C, clo, chi, local_C, &ld);
      
      printf("check 1 \n");
      
      for(i=0; i<SIZE; i++)
	{
	  for(j=0; j<SIZE; j++)printf("%d ", local_C[i][j]);
	  printf("\n");
	}
      
      printf("check 2\n");
      
      for(i=0; i<SIZE; i++)
	{
	  for(j=0; j<SIZE; j++)
	    if(local_C[i][j]!=(alpha*val1)+(beta*val2)) printf("GA Error : \n");
	}
    }

  */
  //GA_Sync();
  if(rank==0)
    printf("Test Completed \n");

  //  GA_Sync();

  /*
  GA_Destroy(g_A);
  GA_Destroy(g_B);
  GA_Destroy(g_C);
  */

  //*******************************************************************

  /* what would be the possible reason for GA_destroy to get failed .., 
   * solve this before consolidate the whole
   */

  GA_Terminate();
  MPI_Finalize();
 
}