Ejemplo n.º 1
0
// -------------------------------------------------------------
// MatSetValues_DenseGA
// -------------------------------------------------------------
static 
PetscErrorCode
MatSetValues_DenseGA(Mat mat, 
                   PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], 
                   const PetscScalar v[],InsertMode addv)
{
  PetscErrorCode ierr = 0;
  struct MatGACtx *ctx;  
  int i, j, idx;
  PetscScalar vij, one(1.0);
  int lo[2], hi[2], ld[2] = {1, 1};
  ierr = MatShellGetContext(mat, (void *)&ctx); CHKERRQ(ierr);

  idx = 0;
  for (i = 0; i < m; ++i) {
    for (j = 0; j < n; ++j, ++idx) {
      lo[0] = idxm[i];
      hi[0] = idxm[i];
      lo[1] = idxn[j];
      hi[1] = idxn[j];
      vij = v[idx];
      switch (addv) {
      case INSERT_VALUES:
        NGA_Put(ctx->ga, lo, hi, (void *)&vij, ld);
        break;
      case ADD_VALUES:
        NGA_Acc(ctx->ga, lo, hi, (void *)&vij, ld, &one);
        break;
      default:
        BOOST_ASSERT_MSG(false, "Unknown set operation");
      }
    }
  }
  return ierr;
}
Ejemplo n.º 2
0
void do_work()
{
int ONE=1 ;   /* useful constants */
int g_a, g_b;
int n=N, type=MT_F_DBL;
int me=GA_Nodeid(), nproc=GA_Nnodes();
int i, row;
int dims[2]={N,N};
int lo[2], hi[2], ld;

/* Note: on all current platforms DoublePrecision == double */
double buf[N], err, alpha, beta;

     if(me==0)printf("Creating matrix A\n");
     g_a = NGA_Create(type, 2, dims, "A", NULL);
     if(!g_a) GA_Error("create failed: A",n); 
     if(me==0)printf("OK\n");

     if(me==0)printf("Creating matrix B\n");
     /* create matrix B  so that it has dims and distribution of A*/
     g_b = GA_Duplicate(g_a, "B");
     if(! g_b) GA_Error("duplicate failed",n); 
     if(me==0)printf("OK\n");

     GA_Zero(g_a);   /* zero the matrix */

     if(me==0)printf("Initializing matrix A\n");
     /* fill in matrix A with random values in range 0.. 1 */ 
     lo[1]=0; hi[1]=n-1;
     for(row=me; row<n; row+= nproc){
         /* each process works on a different row in MIMD style */
         lo[0]=hi[0]=row;   
         for(i=0; i<n; i++) buf[i]=sin((double)i + 0.1*(row+1));
         NGA_Put(g_a, lo, hi, buf, &n);
     }


     if(me==0)printf("Symmetrizing matrix A\n");
     GA_Symmetrize(g_a);   /* symmetrize the matrix A = 0.5*(A+A') */
   

     /* check if A is symmetric */ 
     if(me==0)printf("Checking if matrix A is symmetric\n");
     GA_Transpose(g_a, g_b); /* B=A' */
     alpha=1.; beta=-1.;
     GA_Add(&alpha, g_a, &beta, g_b, g_b);  /* B= A - B */
     err= GA_Ddot(g_b, g_b);
     
     if(me==0)printf("Error=%f\n",(double)err);
     
     if(me==0)printf("\nChecking atomic accumulate \n");

     GA_Zero(g_a);   /* zero the matrix */
     for(i=0; i<n; i++) buf[i]=(double)i;

     /* everybody accumulates to the same location/row */
     alpha = 1.0;
     row = n/2;
     lo[0]=hi[0]=row;
     lo[1]=0; hi[1]=n-1;
     ld = hi[1]-lo[1]+1;
     NGA_Acc(g_a, lo, hi, buf, &ld, &alpha );
     GA_Sync();

     if(me==0){ /* node 0 is checking the result */

        NGA_Get(g_a, lo, hi, buf,&ld);
        for(i=0; i<n; i++) if(buf[i] != (double)nproc*i)
           GA_Error("failed: column=",i);
        printf("OK\n\n");

     }
     
     GA_Destroy(g_a);
     GA_Destroy(g_b);
}
Ejemplo n.º 3
0
Archivo: big.c Proyecto: sg0/Elemental
// note: Sayan: brings down memory requirement to about 268 MB
int main(int argc, char **argv)
{
    int me, nproc, g_a = -1, i, j;

#if defined(USE_ELEMENTAL)
    int ndim=2, dims[2]= {N1,N2};
#else
    int ndim=2, type=MT_F_DBL, dims[2]= {N1,N2};
#endif

    double *buf;

    int lo[2], hi[2], ld[1];
    double alpha = 1.0;

#if defined(USE_ELEMENTAL)
    // initialize Elemental (which will initialize MPI)
    ElInitialize( &argc, &argv );
    ElMPICommRank( MPI_COMM_WORLD, &me );
    ElMPICommSize( MPI_COMM_WORLD, &nproc );

    ElGlobalArrays_d eldga;

    // instantiate el::global array
    ElGlobalArraysConstruct_d( &eldga );
    // initialize global arrays
    ElGlobalArraysInitialize_d( eldga );
    printf ("INITIALIZED elemental global array...\n");
#else
    MP_INIT(argc,argv);
    GA_Initialize_ltd(-1);

    me=GA_Nodeid();
    nproc=GA_Nnodes();
#endif

    if(me==0) printf("Using %ld processes\n",(long)nproc);
    if(me==0) printf("memory = %ld bytes\n",((long)N1)*((long)N2)*8);

#if defined(USE_ELEMENTAL)
    // create and allocate a global array
    printf ("ndim = %d\n", ndim);
    printf ("dim[0] = %d and dim[1] = %d\n", dims[0], dims[1]);
    ElGlobalArraysCreate_d( eldga, ndim, dims, "A", &g_a);
    printf ("CREATED elemental global array...\n");
    // print distribution
    ElGlobalArraysPrint_d( eldga, g_a );
#else
    g_a = NGA_Create(type, ndim, dims, "A", NULL);

    GA_Zero(g_a);   /* zero the matrix */

    GA_Print_distribution(g_a);
#endif

    if(me == 0) {
//        buf = (double*)(malloc(N1*1024*sizeof(double)));
        buf = (double*)(malloc(N1*128*sizeof(double)));
//        for(j = 0; j < N1*1024; ++j) buf[j] = 1.0;
//        for(i = 0; i < N2/1024; ++i) {
        for(j = 0; j < N1*128; ++j) buf[j] = 1.0;
        for(i = 0; i < N2/128; ++i) {

            lo[0] = 0;
            hi[0] = lo[0] + N1   -1;
            /*
                lo[1] = i*1024;
                hi[1] = lo[1] + 1024 -1;
                ld[0] = 1024;
            */
            lo[1] = i*128;
            hi[1] = lo[1] + 128 -1;
            ld[0] = 128;
            printf("NGA_Acc.%d:  %d:%d %d:%d\n",i,lo[0],hi[0],lo[1],hi[1]);

#if defined(USE_ELEMENTAL)
            ElGlobalArraysAccumulate_d( eldga, g_a, lo, hi, buf, ld, &alpha );
            // there is an explicit flush in NGA_Acc/Put, so when it returns, the buffer
            // can be reused and data has reached the destination
#else
            NGA_Init_fence();
            NGA_Acc(g_a, lo, hi, buf, ld, &alpha);
            NGA_Fence();
#endif
        }
    }

#if defined(USE_ELEMENTAL)
    ElGlobalArraysSync_d( eldga );
    ElGlobalArraysDestroy_d( eldga, g_a );
    ElGlobalArraysTerminate_d( eldga );
    // call el::global arrays destructor
    ElGlobalArraysDestruct_d( eldga );
    ElFinalize();
#else
    GA_Sync();

    GA_Destroy(g_a);

    GA_Terminate();
    MP_FINALIZE();
#endif

    return 0;
}
Ejemplo n.º 4
0
double time_op(int g_a, double *buf_, int chunk, int loop, int proc,
               int ndim, int op)
{
  double start_time = 0;
  double stop_time = 0;
  double total_time = 0;
  int lo[2] = {-1,-1};
  int hi[2] = {-1,-1};
  int ld = -1;
  int i = 0;
  int bal = 0;
  double *buf = buf_;
  double alpha = 1;

  /* get the location within the g_a for the given proc */
#if defined(USE_ELEMENTAL)
  ElGlobalArraysDistribution_d( eldga, g_a, proc, lo, hi );
#else
  NGA_Distribution(g_a, proc, lo, hi);
#endif

  /* determine how much data to grab based on the chunk and dimensionality */
  if (ndim == 1) {
    hi[0] = lo[0] + chunk*chunk - 1;
  }
  else if (ndim == 2) {
    hi[0] = lo[0] + chunk - 1;
    hi[1] = lo[1] + chunk - 1;
    ld = chunk;
  }
  else {
    GA_Error("invalid ndim for time_op", ndim);
  }

  start_time = TIMER();
  for (i=0; i<loop; ++i) {
    switch (op) {
      case OP_GET:
#if defined(USE_ELEMENTAL)
        ElGlobalArraysGet_d( eldga, g_a, lo, hi, buf, &ld );
#else
        NGA_Get(g_a, lo, hi, buf, &ld);
#endif
        break;
      case OP_PUT:
#if defined(USE_ELEMENTAL)
        ElGlobalArraysPut_d( eldga, g_a, lo, hi, buf, &ld );
#else
        NGA_Put(g_a, lo, hi, buf, &ld);
#endif
        break;
      case OP_ACC:
#if defined(USE_ELEMENTAL)
        ElGlobalArraysPut_d( eldga, g_a, lo, hi, buf, &ld );
#else
        NGA_Acc(g_a, lo, hi, buf, &ld, &alpha);
#endif
        break;
      default:
        GA_Error("bad case value for op", op);
    }
    /* prepare next src location and dst ptr: avoid cache locality */
    if (bal == 0) {
      lo[0] += 128;
      lo[1] += 128;
      hi[0] += 128;
      hi[1] += 128;
      buf += 128;
      bal = 1;
    }
    else {
      lo[0] -= 128;
      lo[1] -= 128;
      hi[0] -= 128;
      hi[1] -= 128;
      buf -= 128;
      bal = 0;
    }
  }
  stop_time = TIMER();
  total_time = (stop_time - start_time);
  if (total_time == 0.0) {
    total_time = 0.000001; /* workaround for inaccurate timers */
    warn_accuracy++;
  }

  return(total_time / loop);
}