Пример #1
0
int main(int argc, char **argv)
{
    int rank, nproc;
    int errs = 0;
    int array[1024];
    int val = 0;
    int target_rank;
    MPI_Aint bases[2];
    MPI_Aint disp, offset;
    MPI_Win  win;

    MTest_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nproc);

    if (rank == 0 && nproc != 2) {
        MTestError("Must run with 2 ranks\n");
    }

    /* Get the base address in the middle of the array */
    if (rank == 0) {
        target_rank = 1;
        array[0] = 1234;
        MPI_Get_address(&array[512], &bases[0]);
    } else if (rank == 1) {
        target_rank = 0;
        array[1023] = 1234;
        MPI_Get_address(&array[512], &bases[1]);
    }

    /* Exchange bases */
    MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, bases, 1, MPI_AINT, MPI_COMM_WORLD);

    MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win);
    MPI_Win_attach(win, array, sizeof(int)*1024);

    /* Do MPI_Aint addressing arithmetic */
    if (rank == 0) {
        disp = sizeof(int)*511;
        offset = MPIX_Aint_add(bases[1], disp); /* offset points to array[1023]*/
    } else if (rank == 1) {
        disp = sizeof(int)*512;
        offset = MPIX_Aint_diff(bases[0], disp); /* offset points to array[0] */
    }

    /* Get val and verify it */
    MPI_Win_fence(MPI_MODE_NOPRECEDE, win);
    MPI_Get(&val, 1, MPI_INT, target_rank, offset, 1, MPI_INT, win);
    MPI_Win_fence(MPI_MODE_NOSUCCEED, win);

    if (val != 1234) {
        errs++;
        printf("%d -- Got %d, expected 1234\n", rank, val);
    }

    MPI_Win_detach(win, array);
    MPI_Win_free(&win);

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;
}
void run_rma_test(int nprocs_per_node)
{
  int myrank, nprocs;
  int mem_rank;
  MPI_Win win;
  int *baseptr;
  MPI_Aint local_size;

  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  if (nprocs < nprocs_per_node * 2)
  {
    if (!myrank) printf("should start program with at least %d processes\n", nprocs_per_node * 2);
    MPI_Finalize();
    exit(EXIT_FAILURE);
  }


  mem_rank = nprocs_per_node + nprocs_per_node / 2;

  local_size = (myrank == mem_rank) ? COUNT : 0;

  MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &win);

  MPI_Win_lock_all(0, win);



  int type_size;
  MPI_Type_size(MPI_INT, &type_size);

  size_t nbytes = COUNT * type_size;

  assert(MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &baseptr) == MPI_SUCCESS);
  assert(MPI_Win_attach(win, baseptr, nbytes) == MPI_SUCCESS);

  MPI_Aint ldisp;
  MPI_Aint *disps = malloc(nprocs * sizeof(MPI_Aint));

  assert(MPI_Get_address(baseptr, &ldisp) == MPI_SUCCESS);

  assert(MPI_Allgather(&ldisp, 1, MPI_AINT, disps, nprocs, MPI_AINT, MPI_COMM_WORLD) == MPI_SUCCESS);

  if (myrank == 0)
  {
    for (size_t idx = 0; idx < COUNT; ++idx) {
      baseptr[idx] = idx * COUNT + 1;
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  if (myrank == mem_rank) {
    assert(MPI_Get(baseptr, 10, MPI_INT, 0, disps[0], 10, MPI_INT, win) == MPI_SUCCESS);
    assert(MPI_Win_flush(0, win) == MPI_SUCCESS);

    for (size_t idx = 0; idx < COUNT; ++idx) {
      assert(baseptr[idx] == idx * 10 + 1);
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Win_unlock_all(win);

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Win_free(&win);

  MPI_Free_mem(baseptr);

  printf("Test finished\n");
}
Пример #3
0
int main(int argc, char **argv) {
    int i, j, rank, nranks, peer, bufsize, errors;
    double *win_buf, *src_buf, *dst_buf;
    MPI_Win buf_win;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf);

    if (rank == 0)
        if (verbose) printf("MPI RMA Strided Put Test:\n");

    for (i = 0; i < XDIM*YDIM; i++) {
        *(win_buf  + i) = 1.0 + rank;
        *(src_buf + i) = 1.0 + rank;
    }

    MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win);

    peer = (rank+1) % nranks;

    /* Perform ITERATIONS strided put operations */

    for (i = 0; i < ITERATIONS; i++) {
      MPI_Aint idx_loc[SUB_YDIM];
      int idx_rem[SUB_YDIM];
      int blk_len[SUB_YDIM];
      MPI_Datatype src_type, dst_type;

      if (rank == 0)
        if (verbose) printf(" + iteration %d\n", i);

      for (j = 0; j < SUB_YDIM; j++) {
        MPI_Get_address(&src_buf[j*XDIM], &idx_loc[j]);
        idx_rem[j] = j*XDIM*sizeof(double);
        blk_len[j] = SUB_XDIM*sizeof(double);
      }

      MPI_Type_create_hindexed(SUB_YDIM, blk_len, idx_loc, MPI_BYTE, &src_type);
      MPI_Type_create_indexed_block(SUB_YDIM, SUB_XDIM*sizeof(double), idx_rem, MPI_BYTE, &dst_type);

      MPI_Type_commit(&src_type);
      MPI_Type_commit(&dst_type);

      MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);
      MPI_Put(MPI_BOTTOM, 1, src_type, peer, 0, 1, dst_type, buf_win);
      MPI_Win_unlock(peer, buf_win);

      MPI_Type_free(&src_type);
      MPI_Type_free(&dst_type);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win);
    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
      for (j = 0; j < SUB_YDIM; j++) {
        const double actual   = *(win_buf + i + j*XDIM);
        const double expected = (1.0 + ((rank+nranks-1)%nranks));
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    for (i = SUB_XDIM; i < XDIM; i++) {
      for (j = 0; j < SUB_YDIM; j++) {
        const double actual   = *(win_buf + i + j*XDIM);
        const double expected = 1.0 + rank;
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    for (i = 0; i < XDIM; i++) {
      for (j = SUB_YDIM; j < YDIM; j++) {
        const double actual   = *(win_buf + i + j*XDIM);
        const double expected = 1.0 + rank;
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    MPI_Win_unlock(rank, buf_win);

    MPI_Win_free(&buf_win);
    MPI_Free_mem(win_buf);
    MPI_Free_mem(src_buf);
    MPI_Free_mem(dst_buf);

    MPI_Finalize();

    if (errors == 0) {
      if (rank == 0)
        printf(" No Errors\n");
      return 0;
    } else {
      printf("%d: Fail\n", rank);
      return 1;
    }
}
Пример #4
0
/** Optimized implementation of the ARMCI IOV operation that uses an MPI
  * datatype to achieve a one-sided gather/scatter.  Does not use MPI_BOTTOM.
  */
int ARMCII_Iov_op_datatype_no_bottom(enum ARMCII_Op_e op, void **src, void **dst, int count, int elem_count,
    MPI_Datatype type, int proc) {

    gmr_t *mreg;
    MPI_Datatype  type_loc, type_rem;
    MPI_Aint      disp_loc[count];
    int           disp_rem[count];
    int           block_len[count];
    void         *dst_win_base;
    int           dst_win_size, i, type_size;
    void        **buf_rem, **buf_loc;
    MPI_Aint      base_rem;
    MPI_Aint      base_loc;
    void         *base_loc_ptr;

    switch(op) {
      case ARMCII_OP_ACC:
      case ARMCII_OP_PUT:
        buf_rem = dst;
        buf_loc = src;
        break;
      case ARMCII_OP_GET:
        buf_rem = src;
        buf_loc = dst;
        break;
      default:
        ARMCII_Error("unknown operation (%d)", op);
        return 1;
    }

    MPI_Type_size(type, &type_size);

    mreg = gmr_lookup(buf_rem[0], proc);
    ARMCII_Assert_msg(mreg != NULL, "Invalid remote pointer");

    dst_win_base = mreg->slices[proc].base;
    dst_win_size = mreg->slices[proc].size;

    MPI_Get_address(dst_win_base, &base_rem);

    /* Pick a base address for the start of the origin's datatype */
    base_loc_ptr = buf_loc[0];
    MPI_Get_address(base_loc_ptr, &base_loc);

    for (i = 0; i < count; i++) {
      MPI_Aint target_rem, target_loc;
      MPI_Get_address(buf_loc[i], &target_loc);
      MPI_Get_address(buf_rem[i], &target_rem);
      disp_loc[i]  =  target_loc - base_loc;
      disp_rem[i]  = (target_rem - base_rem)/type_size;
      block_len[i] = elem_count;

      ARMCII_Assert_msg((target_rem - base_rem) % type_size == 0, "Transfer size is not a multiple of type size");
      ARMCII_Assert_msg(disp_rem[i] >= 0 && disp_rem[i] < dst_win_size, "Invalid remote pointer");
      ARMCII_Assert_msg(((uint8_t*)buf_rem[i]) + block_len[i] <= ((uint8_t*)dst_win_base) + dst_win_size, "Transfer exceeds buffer length");
    }

    MPI_Type_create_hindexed(count, block_len, disp_loc, type, &type_loc);
    MPI_Type_create_indexed_block(count, elem_count, disp_rem, type, &type_rem);
    //MPI_Type_indexed(count, block_len, disp_rem, type, &type_rem);

    MPI_Type_commit(&type_loc);
    MPI_Type_commit(&type_rem);

    gmr_lock(mreg, proc);

    switch(op) {
      case ARMCII_OP_ACC:
        gmr_accumulate_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc);
        break;
      case ARMCII_OP_PUT:
        gmr_put_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc);
        break;
      case ARMCII_OP_GET:
        gmr_get_typed(mreg, MPI_BOTTOM, 1, type_rem, base_loc_ptr, 1, type_loc, proc);
        break;
      default:
        ARMCII_Error("unknown operation (%d)", op);
        return 1;
    }

    gmr_unlock(mreg, proc);

    MPI_Type_free(&type_loc);
    MPI_Type_free(&type_rem);

    return 0;
}    
Пример #5
0
int main(int argc, char* argv[]) {

	int* bodies_off;
	int* n_bodies_split;
	int n_local_bodies;
	const MPI_Comm comm = MPI_COMM_WORLD;
	FILE *inputf;
	FILE *outputf;
	double clockStart, clockEnd;
	int rc, n_proc, rank;

	rc = MPI_Init(&argc, &argv);
	if (rc != MPI_SUCCESS) {
		puts("MPI_Init failed");
		exit(-1);
	}

	MPI_Comm_size(comm, &n_proc);
	MPI_Comm_rank(comm, &rank);

	//creazione datatype per mpi!
	MPI_Datatype bodytype;
	MPI_Datatype type[6] = { MPI_LB, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_UB };
	int block_len[6] = {1, 1, 3, 3, 3, 1};
	MPI_Aint disp[6];
	leaf_t example[2];

	MPI_Get_address(&example[0], &disp[0]);
	MPI_Get_address(&(example[0].mass), &disp[1]);
	MPI_Get_address(&(example[0].pos), &disp[2]);
	MPI_Get_address(&(example[0].vel), &disp[3]);
	MPI_Get_address(&(example[0].acc), &disp[4]);
	MPI_Get_address(&(example[1].acc), &disp[5]);
//	int i;
//	for(i = 6; i >= 0; --i)
//		disp[i] -= disp[0];

	disp[1] = disp[1] - disp[0];
	disp[2] = disp[2] - disp[0];
	disp[3] = disp[3] - disp[0];
	disp[4] = disp[4] - disp[0];
	disp[5] = disp[5] - disp[0];



	MPI_Type_create_struct(6, block_len, disp, type, &bodytype);

	MPI_Type_commit(&bodytype);
	bodies_off = malloc((n_proc + 1) * sizeof(int));
	n_bodies_split = malloc((n_proc) * sizeof(int));
	bodies = malloc(nbodies * sizeof(node_t*));
	leafs = malloc(nbodies * sizeof(leaf_t));
	char* inputfile = argv[1];
	inputf = fopen(inputfile, "r");

	if (inputf == NULL) {
		printf("impossibile leggere da file");
		exit(1);
	}

	fscanf(inputf, "%d", &nbodies);
	fscanf(inputf, "%d", &steps);
	fscanf(inputf, "%lf", &dt);
	fscanf(inputf, "%lf", &eps);
	fscanf(inputf, "%lf", &tol);

	fclose(inputf);

	if (rank == 0) {
		int i;

		create_bodies();

		quicksort(0, nbodies - 1);

		//	bublesort();
		//	int i = 0;
		//	for (i = 0; i < nbodies; i++) {
		//		printf("%lf, %lf, %lf \n", bodies[i]->pos[0], bodies[i]->pos[1],
		//				bodies[i]->pos[2]);
		//	}
		n_local_bodies = nbodies / n_proc;

		//split delle particelle secondo shark & fish
		//		split_bodies(n_proc, bodies_off, n_bodies_split);
		//		n_local_bodies = n_bodies_split[rank];
		//
		//		MPI_Bcast(n_bodies_split, n_proc, MPI_INT, 0, comm);

		MPI_Bcast(leafs, nbodies, bodytype, 0, comm);

		dthf = 0.5 * dt;
		epssq = eps * eps;
		itolsq = 1.0 / (tol * tol);

		clockStart = MPI_Wtime();
		int step = 0;
		root = NULL;
		for (step = 0; step < steps; step++) {
			compute_center_and_diameter();

			root = malloc(sizeof(struct node_t)); // "new" is like "malloc"
			double mass_root = 0.0;

			root->type = 1;
			root->mass = &mass_root;
			root->pos = center;
			root->cell.childs[0] = NULL;
			root->cell.childs[1] = NULL;
			root->cell.childs[2] = NULL;
			root->cell.childs[3] = NULL;
			root->cell.childs[4] = NULL;
			root->cell.childs[5] = NULL;
			root->cell.childs[6] = NULL;
			root->cell.childs[7] = NULL;

			double radius = diameter * 0.5;

			int i = 0;
			for (i = 0; i < nbodies; i++) {
				insert(root, bodies[i], radius); // questo è il modo per passare i dati per riferimento... cioè mandare l'indirizzo della struttura puntata dal puntatore
			}
			curr = 0;
			compute_center_of_mass(&(*root));

			for (i = 0; i < n_local_bodies; i++) {
				compute_force(&(*root), &(*bodies[i]), diameter, step);
			}
			//		for (i = 0; i < nbodies; i++) {
			//		}

			deallocate_tree(root);

			//inserire all gather
			MPI_Allgather(leafs, n_local_bodies, bodytype, leafs,
					n_local_bodies, bodytype, comm);

			for (i = 0; i < nbodies; i++) {
				advance(&(*bodies[i]));
			}

			//		int p = 0;
			//		for (p = 0; p < nbodies; p++)
			//			printf("%lf, %lf, %lf \n", bodies[p]->pos[0], bodies[p]->pos[1],
			//					bodies[p]->pos[2]);
			//		printf("*************************************** \n");
		}
		//	int i = 0;
		// dopo l'esecuzione!!
		//		int proc_rec = 1;
		//		while (proc_rec < n_proc) {
		//			MPI_Status status;
		//			int proc_rank;
		//			int cap = nbodies / n_proc;
		//			node_t temp[cap];
		//			MPI_Recv(temp, cap, bodytype, MPI_ANY_SOURCE, MPI_ANY_TAG, comm,
		//					&status);
		//			proc_rank = status.MPI_SOURCE;
		//
		//			int idx = 0;
		//			for (idx = proc_rec * (cap); idx < cap; idx++)
		//				*bodies[idx] = temp[idx];
		//			proc_rec++;
		//		}
		clockEnd = MPI_Wtime();
		if (nbodies == 16384) {
			system("echo 'Host:' `hostname` >> output16384 ");
			outputf = fopen("output16384", "a");
			fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd
					- clockStart);
			for (i = 0; i < nbodies; i++) {
				fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0],
						bodies[i]->pos[1], bodies[i]->pos[2]);
			}
		} else if (nbodies == 32768) {
			system("echo 'Host:' `hostname` >> output32768 ");
			outputf = fopen("output32768", "a");
			fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd
					- clockStart);
			for (i = 0; i < nbodies; i++) {
				fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0],
						bodies[i]->pos[1], bodies[i]->pos[2]);
			}
		} else if (nbodies == 65536) {
			system("echo 'Host:' `hostname` >> output65536 ");
			outputf = fopen("output65536", "a");
			fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd
					- clockStart);
			for (i = 0; i < nbodies; i++) {
				fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0],
						bodies[i]->pos[1], bodies[i]->pos[2]);
			}
		} else {
			system("echo 'Host:' `hostname` >> output ");
			outputf = fopen("output", "a");
			fprintf(outputf, "Tempo di esecuzione: %lf \n", clockEnd
					- clockStart);
			for (i = 0; i < nbodies; i++) {
				fprintf(outputf, "%lf, %lf, %lf \n", bodies[i]->pos[0],
						bodies[i]->pos[1], bodies[i]->pos[2]);
			}
		}

		fflush(outputf);
		fclose(outputf);
		printf("Esecuzione completata\n");

	} else {

		int low = 1, up = 0;
		int i;
		dthf = 0.5 * dt;
		epssq = eps * eps;
		itolsq = 1.0 / (tol * tol);

		//	if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) {
		//		printf("Inizializzazione della libreria di papi fallita \n");
		//		exit(1);
		//	}
		//
		//	if (PAPI_create_eventset(&event_set) != PAPI_OK) {
		//		printf("E' andata a male la creazione dell'eventSet \n");
		//		exit(1);
		//	}
		//
		//	if (PAPI_add_events(event_set, events, 2) != PAPI_OK) {
		//		printf("E' andata a male l'aggiunta degli eventi\n");
		//		exit(1);
		//	}

		n_local_bodies = nbodies / n_proc;
		MPI_Bcast(leafs, nbodies, bodytype, 0, comm);
		int step = 0;
		root = NULL;

		low += (rank * n_local_bodies);

		up = low + n_local_bodies;

		//	PAPI_start(event_set);
		//	clockStart = PAPI_get_real_usec();
		for (step = 0; step < steps; step++) {
			compute_center_and_diameter();

			root = malloc(sizeof(struct node_t)); // "new" is like "malloc"

			root->type = 1;
			*(root->mass) = 0.0;
			root->pos = center;
			root->cell.childs[0] = NULL;
			root->cell.childs[1] = NULL;
			root->cell.childs[2] = NULL;
			root->cell.childs[3] = NULL;
			root->cell.childs[4] = NULL;
			root->cell.childs[5] = NULL;
			root->cell.childs[6] = NULL;
			root->cell.childs[7] = NULL;

			double radius = diameter * 0.5;

			for (i = 0; i < nbodies; i++) {
				bodies[i] = malloc(sizeof(node_t));
				bodies[i]->cell.leaf = &leafs[i];
				bodies[i]->mass = &leafs[i].mass;
				bodies[i]->pos = leafs[i].pos;
				insert(&(*root), &(*bodies[i]), radius); // questo è il modo per passare i dati per riferimento... cioè mandare l'indirizzo della struttura puntata dal puntatore
			}
			curr = 0;
			compute_center_of_mass(&(*root));

			for (i = low; i < up; i++) {
				compute_force(&(*root), &(*bodies[i]), diameter, step);
			}
			//		for (i = 0; i < nbodies; i++) {
			//		}

			deallocate_tree(root);

			local_leafs = &leafs[low];
			//inserire all_gather
			MPI_Allgather(local_leafs, up - low, bodytype, leafs, up - low,
					bodytype, comm);

			for (i = 0; i < nbodies; i++) {
				advance(&(*bodies[i]));
			}
			//		int p = 0;
			//		for (p = 0; p < nbodies; p++)
			//			printf("%lf, %lf, %lf \n", bodies[p]->pos[0], bodies[p]->pos[1],
			//					bodies[p]->pos[2]);
			//		printf("*************************************** \n");
		}
		//	clockEnd = PAPI_get_real_usec();
		//	PAPI_stop(event_set, values);
		//	int i = 0;
		//		MPI_Send(bodies[low], up - low + 1, bodytype, 0, MPI_ANY_TAG, comm);

	}

	MPI_Finalize();
	return 0;
}
int main(int argc, char **argv) {
    int i, j, rank, nranks, peer, bufsize, errors;
    double *win_buf, *loc_buf;
    MPI_Win buf_win;

    MPI_Aint idx_loc[SUB_YDIM];
    int idx_rem[SUB_YDIM];
    int blk_len[SUB_YDIM];
    MPI_Datatype loc_type, rem_type;

    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nranks);

    bufsize = XDIM * YDIM * sizeof(double);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf);
    MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &loc_buf);

    if (rank == 0)
        if (verbose) printf("MPI RMA Strided Get Test:\n");

    for (i = 0; i < XDIM*YDIM; i++)
        *(win_buf + i) = 1.0 + rank;

    MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win);

    peer = (rank+1) % nranks;

    /* Build the datatype */

    for (i = 0; i < SUB_YDIM; i++) {
      MPI_Get_address(&loc_buf[i*XDIM], &idx_loc[i]);
      idx_rem[i] = i*XDIM;
      blk_len[i] = SUB_XDIM;
    }

    MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &loc_type);
    MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &rem_type);

    MPI_Type_commit(&loc_type);
    MPI_Type_commit(&rem_type);

    /* Perform get operation */

    MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win);

    MPI_Get(loc_buf, 1, loc_type, peer, 0, 1, rem_type, buf_win);

    /* Use the datatype only on the remote side (must have SUB_XDIM == XDIM) */
    /* MPI_Get(loc_buf, SUB_XDIM*SUB_YDIM, MPI_DOUBLE, peer, 0, 1, rem_type, buf_win); */

    MPI_Win_unlock(peer, buf_win);

    MPI_Type_free(&loc_type);
    MPI_Type_free(&rem_type);

    MPI_Barrier(MPI_COMM_WORLD);

    /* Verify that the results are correct */

    errors = 0;
    for (i = 0; i < SUB_XDIM; i++) {
      for (j = 0; j < SUB_YDIM; j++) {
        const double actual   = *(loc_buf + i + j*XDIM);
        const double expected = (1.0 + peer);
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    for (i = SUB_XDIM; i < XDIM; i++) {
      for (j = 0; j < SUB_YDIM; j++) {
        const double actual   = *(loc_buf + i + j*XDIM);
        const double expected = 1.0 + rank;
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }
    for (i = 0; i < XDIM; i++) {
      for (j = SUB_YDIM; j < YDIM; j++) {
        const double actual   = *(loc_buf + i + j*XDIM);
        const double expected = 1.0 + rank;
        if (actual - expected > 1e-10) {
          printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n",
              rank, j, i, expected, actual);
          errors++;
          fflush(stdout);
        }
      }
    }

    MPI_Win_free(&buf_win);
    MPI_Free_mem(win_buf);
    MPI_Free_mem(loc_buf);

    MPI_Finalize();

    if (errors == 0) {
      if (rank == 0) 
        printf(" No Errors\n");
      return 0;
    } else {
      printf("%d: Fail\n", rank);
      return 1;
    }
}
Пример #7
0
int main(int argc, char **argv)
{
    int vcount, vstride;
    int32_t counts[2];
    int packsize, i, position, errs = 0;
    double *outbuf, *outbuf2;
    double *vsource;
    MPI_Datatype vtype, stype;
    double t0, t1;
    double tspack, tvpack, tmanual;
    int ntry;
    int blocklengths[2];
    MPI_Aint displacements[2];
    MPI_Datatype typesArray[2];

    MPI_Init(&argc, &argv);

    /* Create a struct consisting of a two 32-bit ints, followed by a
     * vector of stride 3 but count 128k (less than a few MB of data area) */
    vcount = 128000;
    vstride = 3;
    MPI_Type_vector(vcount, 1, vstride, MPI_DOUBLE, &vtype);

    vsource = (double *) malloc((vcount + 1) * (vstride + 1) * sizeof(double));
    if (!vsource) {
        fprintf(stderr, "Unable to allocate vsource\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    for (i = 0; i < vcount * vstride; i++) {
        vsource[i] = i;
    }
    blocklengths[0] = 2;
    MPI_Get_address(&counts[0], &displacements[0]);
    blocklengths[1] = 1;
    MPI_Get_address(vsource, &displacements[1]);
    if (verbose) {
        printf("%p = %p?\n", vsource, (void *) displacements[1]);
    }
    typesArray[0] = MPI_INT32_T;
    typesArray[1] = vtype;
    MPI_Type_create_struct(2, blocklengths, displacements, typesArray, &stype);
    MPI_Type_commit(&stype);
    MPI_Type_commit(&vtype);

#if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
    /* To use MPIDU_Datatype_debug to print the datatype internals,
     * you must configure MPICH with --enable-g=log */
    if (verbose) {
        printf("Original struct datatype:\n");
        MPIDU_Datatype_debug(stype, 10);
    }
#endif

    MPI_Pack_size(1, stype, MPI_COMM_WORLD, &packsize);
    outbuf = (double *) malloc(packsize);
    outbuf2 = (double *) malloc(packsize);
    if (!outbuf) {
        fprintf(stderr, "Unable to allocate %ld for outbuf\n", (long) packsize);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    if (!outbuf2) {
        fprintf(stderr, "Unable to allocate %ld for outbuf2\n", (long) packsize);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    position = 0;
    /* Warm up the code and data */
    MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);

    tspack = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
        position = 0;
        t0 = MPI_Wtime();
        MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);
        t1 = MPI_Wtime() - t0;
        if (t1 < tspack)
            tspack = t1;
    }
    MPI_Type_free(&stype);

    /* An equivalent packing, using the 2 ints and the vector separately */
    tvpack = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
        position = 0;
        t0 = MPI_Wtime();
        MPI_Pack(counts, 2, MPI_INT32_T, outbuf, packsize, &position, MPI_COMM_WORLD);
        MPI_Pack(vsource, 1, vtype, outbuf, packsize, &position, MPI_COMM_WORLD);
        t1 = MPI_Wtime() - t0;
        if (t1 < tvpack)
            tvpack = t1;
    }
    MPI_Type_free(&vtype);

    /* Note that we exploit the fact that the vector type contains vblock
     * instances of a contiguous type of size 24, or a single block of 24*vblock
     * bytes.
     */
    tmanual = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
        const double *restrict ppe = (const double *) vsource;
        double *restrict ppo = outbuf2;
        int j;
        t0 = MPI_Wtime();
        position = 0;
        *(int32_t *) ppo = counts[0];
        *(((int32_t *) ppo) + 1) = counts[1];
        ppo++;
        /* Some hand optimization because this file is not normally
         * compiled with optimization by the test suite */
        j = vcount;
        while (j) {
            *ppo++ = *ppe;
            ppe += vstride;
            *ppo++ = *ppe;
            ppe += vstride;
            *ppo++ = *ppe;
            ppe += vstride;
            *ppo++ = *ppe;
            ppe += vstride;
            j -= 4;
        }
        position += (1 + vcount);
        position *= sizeof(double);
        t1 = MPI_Wtime() - t0;
        if (t1 < tmanual)
            tmanual = t1;

        /* Check on correctness */
#ifdef PACK_IS_NATIVE
        if (memcmp(outbuf, outbuf2, position) != 0) {
            printf("Panic(manual) - pack buffers differ\n");
            for (j = 0; j < 8; j++) {
                printf("%d: %llx\t%llx\n", j, (long long unsigned) outbuf[j],
                       (long long unsigned) outbuf2[j]);
            }
        }
#endif
    }

    if (verbose) {
        printf("Bytes packed = %d\n", position);
        printf("MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n",
               tspack, tvpack, tmanual);
    }

    if (4 * tmanual < tspack) {
        errs++;
        printf("MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack,
               tmanual);
        printf("MPI_Pack time should be less than 4 times the manual time\n");
        printf("For most informative results, be sure to compile this test with optimization\n");
    }
    if (4 * tmanual < tvpack) {
        errs++;
        printf("MPI_Pack using vector = %e, manual pack time = %e\n", tvpack, tmanual);
        printf("MPI_Pack time should be less than 4 times the manual time\n");
        printf("For most informative results, be sure to compile this test with optimization\n");
    }
    if (4 * tvpack < tspack) {
        errs++;
        printf("MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack);
        printf
            ("MPI_Pack time using vector should be about the same as the struct containing the vector\n");
        printf("For most informative results, be sure to compile this test with optimization\n");
    }

    if (errs) {
        printf(" Found %d errors\n", errs);
    }
    else {
        printf(" No Errors\n");
    }

    free(vsource);
    free(outbuf);
    free(outbuf2);

    MPI_Finalize();
    return 0;
}