Exemple #1
0
void get_remote(double *buf, int I, int J)
{
    int proc_owner;
    int edge, size;
    double t1;
    
    proc_owner = block_owner(I, J);
    
    edge = n%block_size;
    if (edge == 0) {
        edge = block_size;
    }

    if ((I == nblocks-1) && (J == nblocks-1)) {
        size = edge*edge;
    }
    else if ((I == nblocks-1) || (J == nblocks-1)) {
        size = edge*block_size;
    }
    else {
        size = block_size*block_size;
    }
    size = size * sizeof(double);

    t1 = armci_timer();
#ifdef MPI2_ONESIDED
    {
       int target_disp = ( ((char*)(a[I+J*nblocks])) -
                           ((char*)(ptr[proc_owner])) );
       if(target_disp<0) {
          printf("ERROR!: target disp is < 0, target_disp= %d\n", target_disp);
          MPI_Abort(MPI_COMM_WORLD, 1);
       }
       MPI_Win_lock(MPI_LOCK_EXCLUSIVE, proc_owner, 0, win);
       MPI_Get(buf, size, MPI_CHAR, proc_owner, target_disp, size,
               MPI_CHAR, win);
       MPI_Win_unlock(proc_owner, win);
    }
#else
    ARMCI_Get(a[I+J*nblocks], buf, size, proc_owner);
#endif
    comm_time += armci_timer() - t1;
    get_cntr++;
}
static void sparse_multiply(int n, int non_zero, int *row_ind, int **col_ind,
                            double **values, double **vec, double **svec) {

    int i, j, k, num_elements, offset, *tmp_indices;
    double start_time, comm_time, comp_time, v, vec_local[COL];
    int start, end, prev, nrows, idx;

#if 0
    /* ---- Sequential Case ----  */
    for(i=0; i<n; i++) {
        svec[me][i] = 0;
        for(k=row_ind[i]; k<row_ind[i+1]; k++) {
            j = col_ind[me][k];
            v = values[me][k];
            svec[me][i] += v*vec[me][j];
            printf("%.1f %.1f\n", v, vec[me][j]);
        }
    }
    for(i=0; i<n; i++) printf("%.1f ", svec[me][i]);
    printf("\n");
#else

    num_elements = proc_nz_list[me];
    printf("num_elements = %d\n", num_elements);
    tmp_indices = (int *)malloc(num_elements*sizeof(int));
    for(i=0; i<num_elements; i++) tmp_indices[i] = col_ind[me][i];
    qsort(tmp_indices, num_elements, sizeof(int), compare);

    start_time = armci_timer();

    /* get the required portion of vector you need to local array */
    start = prev = tmp_indices[0];
    for(i=1; i<num_elements; i++) {
        if(tmp_indices[i]>prev+1) {
            end = prev;
            get_data(n, start, end, vec_local, vec);
            start = prev = tmp_indices[i];
        }
        else prev = tmp_indices[i];
    }
    get_data(n, start, prev, vec_local, vec);

#if 1
    if(count>=0) for(i=0; i<=count; i++) ARMCI_Wait(&gHandle[i]);
#endif

    comm_time = armci_timer() - start_time;
    start_time = armci_timer();

    /* Perform Matrix-Vector multiply and store the result in
       solution vector - "svec[]" */

    if(me==0) {
        nrows = proc_row_list[me];
        offset = row_ind[0];
    }
    else {
        nrows = proc_row_list[me]-proc_row_list[me-1];
        offset = row_ind[proc_row_list[me-1]];
    }
    /* printf("%d: My total Work = %d\n", me, nrows); */

    for(i=0; i<nrows; i++) { /* loop over rows owned by me */
        svec[me][i] = 0;
        if(me==0) idx = i;
        else idx = proc_row_list[me-1] + i;
        for(k=row_ind[idx]; k<row_ind[idx+1]; k++) {
            j = col_ind[me][k-offset];
            v = values[me][k-offset];
            svec[me][i] += v*vec_local[j];
        }
    }
    comp_time = armci_timer()-start_time;
    printf("%d: %f + %f = %f  (count = %d)\n", me, comm_time, comp_time,
           comm_time+comp_time, count+1);
#endif
}
Exemple #3
0
void test_aggregate(int dryrun)
{

  int i, j, rc, bytes, elems[2] = {MAXPROC, MAXELEMS};
  double *ddst_put[MAXPROC];
  double *ddst_get[MAXPROC];
  double *dsrc[MAXPROC];
  armci_hdl_t aggr_hdl_put[MAXPROC];
  armci_hdl_t aggr_hdl_get[MAXPROC];
  armci_hdl_t hdl_put[MAXELEMS];
  armci_hdl_t hdl_get[MAXELEMS];
  armci_giov_t darr;
  void *src_ptr[MAX_REQUESTS], *dst_ptr[MAX_REQUESTS];
  int start = 0, end = 0;
  double start_time;

  create_array(ddst_put, 2, elems);
  create_array(ddst_get, 2, elems);
  create_array(dsrc, 1, &elems[1]);

  for (i = 0; i < elems[1]; i++) {
    dsrc[me][i] = i * 1.001 * (me + 1);
  }
  for (i = 0; i < elems[0]*elems[1]; i++) {
    ddst_put[me][i] = 0.0;
    ddst_get[me][i] = 0.0;
  }

  ARMCI_Barrier();

  /* only proc 0 does the work */
  if (me == 0) {
    if (!dryrun) {
      printf("Transferring %d doubles (Not an array of %d doubles)\n", MAXELEMS, MAXELEMS);
    }

    /* initializing non-blocking handles */
    for (i = 0; i < elems[1]; i++) {
      ARMCI_INIT_HANDLE(&hdl_put[i]);
    }
    for (i = 0; i < elems[1]; i++) {
      ARMCI_INIT_HANDLE(&hdl_get[i]);
    }

    /* aggregate handles */
    for (i = 0; i < nproc; i++) {
      ARMCI_INIT_HANDLE(&aggr_hdl_put[i]);
    }
    for (i = 0; i < nproc; i++) {
      ARMCI_INIT_HANDLE(&aggr_hdl_get[i]);
    }
    for (i = 0; i < nproc; i++) {
      ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_put[i]);
    }
    for (i = 0; i < nproc; i++) {
      ARMCI_SET_AGGREGATE_HANDLE(&aggr_hdl_get[i]);
    }

    bytes = sizeof(double);

    /* **************** PUT **************** */
    /* register put */
    start_time = armci_timer();
    start = 0;
    end = elems[1];
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        ARMCI_NbPutValueDouble(dsrc[me][j], &ddst_put[i][me*elems[1] + j], i,
                               &hdl_put[j]);
      }
      for (j = start; j < end; j++) {
        ARMCI_Wait(&hdl_put[j]);
      }
    }
    if (!dryrun) {
      printf("%d: Value Put time      = %.2es\n", me, armci_timer() - start_time);
    }

    /* vector put */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        src_ptr[j] = (void *)&dsrc[me][j];
        dst_ptr[j] = (void *)&ddst_put[i][me*elems[1] + j];
      }
      darr.src_ptr_array = src_ptr;
      darr.dst_ptr_array = dst_ptr;
      darr.bytes = sizeof(double);
      darr.ptr_array_len = elems[1];
      if ((rc = ARMCI_NbPutV(&darr, 1, i, &hdl_put[i]))) {
        ARMCI_Error("armci_nbputv failed\n", rc);
      }
    }
    for (i = 1; i < nproc; i++) {
      ARMCI_Wait(&hdl_put[i]);
    }
    if (!dryrun) {
      printf("%d: Vector Put time     = %.2es\n", me, armci_timer() - start_time);
    }

    /* regular put */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        if ((rc = ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1] + j], bytes,
                              i, &hdl_put[j]))) {
          ARMCI_Error("armci_nbput failed\n", rc);
        }
      }
      for (j = start; j < end; j++) {
        ARMCI_Wait(&hdl_put[j]);
      }
    }
    if (!dryrun) {
      printf("%d: Regular Put time    = %.2es\n", me, armci_timer() - start_time);
    }

    /* aggregate put */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        if ((rc = ARMCI_NbPut(&dsrc[me][j], &ddst_put[i][me*elems[1] + j], bytes,
                              i,  &aggr_hdl_put[i]))) {
          ARMCI_Error("armci_nbput failed\n", rc);
        }
      }
    }
    for (i = 1; i < nproc; i++) {
      ARMCI_Wait(&aggr_hdl_put[i]);
    }
    if (!dryrun) {
      printf("%d: Aggregate Put time  = %.2es\n\n", me, armci_timer() - start_time);
    }


    /* **************** GET **************** */

    /* vector get */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        src_ptr[j] = (void *)&dsrc[i][j];
        dst_ptr[j] = (void *)&ddst_get[me][i*elems[1] + j];
      }
      darr.src_ptr_array = src_ptr;
      darr.dst_ptr_array = dst_ptr;
      darr.bytes = sizeof(double);
      darr.ptr_array_len = elems[1];
      if ((rc = ARMCI_NbGetV(&darr, 1, i, &hdl_get[i]))) {
        ARMCI_Error("armci_nbgetv failed\n", rc);
      }
      ARMCI_Wait(&hdl_get[i]);
    }
    if (!dryrun) {
      printf("%d: Vector Get time     = %.2es\n", me, armci_timer() - start_time);
    }

    /* regular get */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        if ((rc = ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1] + j], bytes,
                              i, &hdl_get[j]))) {
          ARMCI_Error("armci_nbget failed\n", rc);
        }
      }
      for (j = start; j < end; j++) {
        ARMCI_Wait(&hdl_get[j]);
      }
    }
    if (!dryrun) {
      printf("%d: Regular Get time    = %.2es\n", me, armci_timer() - start_time);
    }

    /* aggregate get */
    start_time = armci_timer();
    for (i = 1; i < nproc; i++) {
      for (j = start; j < end; j++) {
        ARMCI_NbGet(&dsrc[i][j], &ddst_get[me][i*elems[1] + j], bytes,
                    i, &aggr_hdl_get[i]);
      }
    }
    for (i = 1; i < nproc; i++) {
      ARMCI_Wait(&aggr_hdl_get[i]);
    }
    if (!dryrun) {
      printf("%d: Aggregate Get time  = %.2es\n", me, armci_timer() - start_time);
    }
  }

  ARMCI_Barrier();
  ARMCI_AllFence();
  ARMCI_Barrier();

  /* Verify */
  if (!(me == 0))
    for (j = 0; j < elems[1]; j++) {
      if (ARMCI_ABS(ddst_put[me][j] - j * 1.001) > 0.1) {
        ARMCI_Error("aggregate put failed...1", 0);
      }
    }
  ARMCI_Barrier();
  if (!dryrun)if (me == 0) {
      printf("\n  aggregate put ..O.K.\n");
    }
  fflush(stdout);

  if (me == 0) {
    for (i = 1; i < nproc; i++) {
      for (j = 0; j < elems[1]; j++) {
        if (ARMCI_ABS(ddst_get[me][i*elems[1] + j] - j * 1.001 *(i + 1)) > 0.1) {
          ARMCI_Error("aggregate get failed...1", 0);
        }
      }
    }
  }
  ARMCI_Barrier();
  if (!dryrun)if (me == 0) {
      printf("  aggregate get ..O.K.\n");
    }
  fflush(stdout);


  ARMCI_AllFence();
  ARMCI_Barrier();

  if (!dryrun)if (me == 0) {
      printf("O.K.\n");
      fflush(stdout);
    }
  destroy_array(ddst_put);
  destroy_array(ddst_get);
  destroy_array(dsrc);
}