Exemple #1
0
/* Sort the communication schedule sched for npes so that the schedule
   on process sortpe is ascending or descending (!ascending).  This is
   necessary to allow in-place transposes when the problem does not
   divide equally among the processes.  In this case there is one
   process where the incoming blocks are bigger/smaller than the
   outgoing blocks and thus have to be received in
   descending/ascending order, respectively, to avoid overwriting data
   before it is sent. */
static void sort1_comm_sched(int *sched, int npes, int sortpe, int ascending)
{
     int *sortsched, i;
     sortsched = (int *) MALLOC(npes * sizeof(int) * 2, OTHER);
     fill1_comm_sched(sortsched, sortpe, npes);
     if (ascending)
	  for (i = 0; i < npes; ++i)
	       sortsched[npes + sortsched[i]] = sched[i];
     else
	  for (i = 0; i < npes; ++i)
	       sortsched[2*npes - 1 - sortsched[i]] = sched[i];
     for (i = 0; i < npes; ++i)
	  sched[i] = sortsched[npes + i];
     X(ifree)(sortsched);
}
Exemple #2
0
static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
{
     const S *ego = (const S *) ego_;
     const problem_mpi_transpose *p;
     P *pln;
     plan *cld1 = 0, *cld2 = 0, *cld2rest = 0, *cld3 = 0;
     INT b, bt, vn, rest_Ioff, rest_Ooff;
     INT *sbs, *sbo, *rbs, *rbo;
     int pe, my_pe, n_pes, sort_pe = -1, ascending = 1;
     R *I, *O;
     static const plan_adt padt = {
          XM(transpose_solve), awake, print, destroy
     };

     UNUSED(ego);

     if (!applicable(ego, p_, plnr))
          return (plan *) 0;

     p = (const problem_mpi_transpose *) p_;
     vn = p->vn;
     I = p->I; O = p->O;

     MPI_Comm_rank(p->comm, &my_pe);
     MPI_Comm_size(p->comm, &n_pes);

     b = XM(block)(p->nx, p->block, my_pe);
     
     if (!(p->flags & TRANSPOSED_IN)) { /* b x ny x vn -> ny x b x vn */
	  cld1 = X(mkplan_f_d)(plnr, 
			       X(mkproblem_rdft_0_d)(X(mktensor_3d)
						     (b, p->ny * vn, vn,
						      p->ny, vn, b * vn,
						      vn, 1, 1),
						     I, O),
			       0, 0, NO_SLOW);
	  if (XM(any_true)(!cld1, p->comm)) goto nada;
     }
     if (ego->preserve_input || NO_DESTROY_INPUTP(plnr)) I = O;

     if (XM(any_true)(!XM(mkplans_posttranspose)(p, plnr, I, O, my_pe,
						 &cld2, &cld2rest, &cld3,
						 &rest_Ioff, &rest_Ooff),
		      p->comm)) goto nada;

     pln = MKPLAN_MPI_TRANSPOSE(P, &padt, apply);

     pln->cld1 = cld1;
     pln->cld2 = cld2;
     pln->cld2rest = cld2rest;
     pln->rest_Ioff = rest_Ioff;
     pln->rest_Ooff = rest_Ooff;
     pln->cld3 = cld3;
     pln->preserve_input = ego->preserve_input ? 2 : NO_DESTROY_INPUTP(plnr);

     MPI_Comm_dup(p->comm, &pln->comm);

     n_pes = (int) X(imax)(XM(num_blocks)(p->nx, p->block),
			   XM(num_blocks)(p->ny, p->tblock));

     /* Compute sizes/offsets of blocks to exchange between processors */
     sbs = (INT *) MALLOC(4 * n_pes * sizeof(INT), PLANS);
     sbo = sbs + n_pes;
     rbs = sbo + n_pes;
     rbo = rbs + n_pes;
     b = XM(block)(p->nx, p->block, my_pe);
     bt = XM(block)(p->ny, p->tblock, my_pe);
     for (pe = 0; pe < n_pes; ++pe) {
	  INT db, dbt; /* destination block sizes */
	  db = XM(block)(p->nx, p->block, pe);
	  dbt = XM(block)(p->ny, p->tblock, pe);

	  sbs[pe] = b * dbt * vn;
	  sbo[pe] = pe * (b * p->tblock) * vn;
	  rbs[pe] = db * bt * vn;
	  rbo[pe] = pe * (p->block * bt) * vn;

	  if (db * dbt > 0 && db * p->tblock != p->block * dbt) {
	       A(sort_pe == -1); /* only one process should need sorting */
	       sort_pe = pe;
	       ascending = db * p->tblock > p->block * dbt;
	  }
     }
     pln->n_pes = n_pes;
     pln->my_pe = my_pe;
     pln->send_block_sizes = sbs;
     pln->send_block_offsets = sbo;
     pln->recv_block_sizes = rbs;
     pln->recv_block_offsets = rbo;

     if (my_pe >= n_pes) {
	  pln->sched = 0; /* this process is not doing anything */
     }
     else {
	  pln->sched = (int *) MALLOC(n_pes * sizeof(int), PLANS);
	  fill1_comm_sched(pln->sched, my_pe, n_pes);
	  if (sort_pe >= 0)
	       sort1_comm_sched(pln->sched, n_pes, sort_pe, ascending);
     }

     X(ops_zero)(&pln->super.super.ops);
     if (cld1) X(ops_add2)(&cld1->ops, &pln->super.super.ops);
     if (cld2) X(ops_add2)(&cld2->ops, &pln->super.super.ops);
     if (cld2rest) X(ops_add2)(&cld2rest->ops, &pln->super.super.ops);
     if (cld3) X(ops_add2)(&cld3->ops, &pln->super.super.ops);
     /* FIXME: should MPI operations be counted in "other" somehow? */

     return &(pln->super.super);

 nada:
     X(plan_destroy_internal)(cld3);
     X(plan_destroy_internal)(cld2rest);
     X(plan_destroy_internal)(cld2);
     X(plan_destroy_internal)(cld1);
     return (plan *) 0;
}
Exemple #3
0
int main(int argc, char **argv)
{
     int **sched;
     int npes = -1, sortpe = -1, steps, i;

     if (argc >= 2) {
	  npes = atoi(argv[1]);
	  if (npes <= 0) {
	       fprintf(stderr,"npes must be positive!");
	       return 1;
	  }
     }
     if (argc >= 3) {
	  sortpe = atoi(argv[2]);
	  if (sortpe < 0 || sortpe >= npes) {
	       fprintf(stderr,"sortpe must be between 0 and npes-1.\n");
	       return 1;
	  }
     }

     if (npes != -1) {
	  printf("Computing schedule for npes = %d:\n",npes);
	  sched = make_comm_schedule(npes);
	  if (!sched) {
	       fprintf(stderr,"Out of memory!");
	       return 6;
	  }
	  
	  if (steps = check_comm_schedule(sched,npes))
	       printf("schedule OK (takes %d steps to complete).\n", steps);
	  else
	       printf("schedule not OK.\n");

	  print_comm_schedule(sched, npes);
	  
	  if (sortpe != -1) {
	       printf("\nRe-creating schedule for pe = %d...\n", sortpe);
	       int *sched1 = (int*) malloc(sizeof(int) * npes);
	       for (i = 0; i < npes; ++i) sched1[i] = -1;
	       fill1_comm_sched(sched1, sortpe, npes);
	       printf("  =");
	       for (i = 0; i < npes; ++i) 
		    printf("  %*d", npes < 10 ? 1 : (npes < 100 ? 2 : 3),
			   sched1[i]);
	       printf("\n");

	       printf("\nSorting schedule for sortpe = %d...\n", sortpe);
	       sort_comm_schedule(sched,npes,sortpe);
	       
	       if (steps = check_comm_schedule(sched,npes))
		    printf("schedule OK (takes %d steps to complete).\n", 
			   steps);
	       else
		    printf("schedule not OK.\n");

	       print_comm_schedule(sched, npes);

	       printf("\nInverting schedule...\n");
	       invert_comm_schedule(sched,npes);
	       
	       if (steps = check_comm_schedule(sched,npes))
		    printf("schedule OK (takes %d steps to complete).\n", 
			   steps);
	       else
		    printf("schedule not OK.\n");

	       print_comm_schedule(sched, npes);
	       
	       free_comm_schedule(sched,npes);

	       free(sched1);
	  }
     }
     else {
	  printf("Doing infinite tests...\n");
	  for (npes = 1; ; ++npes) {
	       int *sched1 = (int*) malloc(sizeof(int) * npes);
	       printf("npes = %d...",npes);
	       sched = make_comm_schedule(npes);
	       if (!sched) {
		    fprintf(stderr,"Out of memory!\n");
		    return 5;
	       }
	       for (sortpe = 0; sortpe < npes; ++sortpe) {
		    empty_comm_schedule(sched,npes);
		    fill_comm_schedule(sched,npes);
		    if (!check_comm_schedule(sched,npes)) {
			 fprintf(stderr,
				 "\n -- fill error for sortpe = %d!\n",sortpe);
			 return 2;
		    }

		    for (i = 0; i < npes; ++i) sched1[i] = -1;
		    fill1_comm_sched(sched1, sortpe, npes);
		    for (i = 0; i < npes; ++i)
			 if (sched1[i] != sched[sortpe][i])
			      fprintf(stderr,
				      "\n -- fill1 error for pe = %d!\n",
				      sortpe);

		    sort_comm_schedule(sched,npes,sortpe);
		    if (!check_comm_schedule(sched,npes)) {
			 fprintf(stderr,
				 "\n -- sort error for sortpe = %d!\n",sortpe);
			 return 3;
		    }
		    invert_comm_schedule(sched,npes);
		    if (!check_comm_schedule(sched,npes)) {
			 fprintf(stderr,
				 "\n -- invert error for sortpe = %d!\n",
				 sortpe);
			 return 4;
		    }
	       }
	       free_comm_schedule(sched,npes);
	       printf("OK\n");
	       if (npes % 50 == 0)
		    printf("(...Hit Ctrl-C to stop...)\n");
	       free(sched1);
	  }
     }

     return 0;
}