Exemplo n.º 1
0
void transpose (Real **b, int size, int *len, int *disp, int rank, int m){
  int i, *sendcounts, *rdispls;
  Real  *sendbuf, *recvbuf;
  sendbuf = createRealArray (m * len[rank]);
  recvbuf = createRealArray (m * len[rank]);
  sendcounts = calloc(size,sizeof(int));
  rdispls = calloc(size,sizeof(int));
  matrixToVector(b,sendbuf,len,disp, size, rank);

  int index = 0;
  for (int i = 0; i < size; ++i)
  {
    sendcounts[i]= len[rank]*len[i];
    rdispls[i]=index;
    index=index+sendcounts[i];
  }
  MPI_Alltoallv(sendbuf, sendcounts, rdispls, MPI_DOUBLE, recvbuf, sendcounts, rdispls, MPI_DOUBLE, MPI_COMM_WORLD);
  vectorToMatrix(b,recvbuf,len,disp, size, rank);
}
Exemplo n.º 2
0
void Tableau::allocate(size_t constraintCount) {
  m_slackCount      = 0;
  m_artificialCount = 0;

  m_table.clear();
  for(UINT row = 0; row <= constraintCount; row++) {
    m_table.add(TableauRow(getMaxColumnCount(constraintCount)));
  }

  m_costFactor = createRealArray(getXCount()+1);
}
Exemplo n.º 3
0
void transpose (Real **A, int m, int n, int size, int bb, int bre)
{
  int se[size], sd[size], re[size], rd[size];
  Real *V = createRealArray (n*m);
  Real *Vt = createRealArray (n*m);

  for (int i = 0; i < size; ++i) {
    se[i] = bb;
    sd[i] = bb*i;
    re[i] = bb;
    rd[i] = bb*i;
  }
  se[size-1] = bre;
  re[size-1] = bre;

  for(int i = 0; i < n; i++) {
    for(int j = 0; j < m; j++) {
      V[j + i*m] = A[j][i];
    }
  }

  MPI_Alltoallv(V, se, sd, MPI_DOUBLE, Vt, re, rd, MPI_DOUBLE, MPI_COMM_WORLD);
  fillA(A, Vt, re, rd, m, n, size);
}
Exemplo n.º 4
0
// Used to gather solution to one matrix on root process
void gatherMatrix(Real** Matrix, int matrixSize, Real* gatherRecvBuf, int* len, int* disp, int root){
  int size, rank, *sendcounts, *rdispls, index;
  Real *gatherSendBuf;
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  gatherSendBuf = createRealArray (matrixSize * len[rank]);
  for (int i = 0; i < len[rank]; ++i)
  {
    for (int j = 0; j < matrixSize; ++j)
    {
      gatherSendBuf[i*matrixSize+j]=Matrix[i][j];
    }
  }
  sendcounts = calloc(size,sizeof(int));
  rdispls = calloc(size,sizeof(int));
  index=0;
  for (int i = 0; i < size; ++i)
  {
    sendcounts[i]= len[i]*matrixSize;
    rdispls[i]=index;
    index=index+sendcounts[i];
  }
  MPI_Gatherv(gatherSendBuf, matrixSize * len[rank], MPI_DOUBLE, gatherRecvBuf, sendcounts, rdispls, MPI_DOUBLE, 0,MPI_COMM_WORLD );
}
Exemplo n.º 5
0
int main(int argc, char **argv)
{
  double wall_start = MPI_Wtime();
  Real *diag, **b, **bt, **z;
  Real pi, h, omp_local_max, local_max, global_max;
  int i, j, omp_id;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
  MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);

  omp_tot_threads = omp_get_max_threads();

  /* the total number of grid points in each spatial direction is (n+1) */
  /* the total number of degrees-of-freedom in each spatial direction is (n-1) */
  /* this version requires n to be a power of 2 */

  if (argc < 2) {
    if (mpi_rank == 0){
      printf("need a problem size\n");
    }
    MPI_Finalize();
    return 0;
  }

  n  = atoi(argv[1]);
  m  = n-1;
  // mpi_work is the amount of work needed to be done by each mpi node. The last
  // mpi node may do slightly less work than the others, but that's the closest
  // we'll get to proper load balancing.
  mpi_work = 1 + ((m - 1) / mpi_size);
  nn = 4*n;

  diag = createRealArray (m);
  b    = createReal2DArray (mpi_work, mpi_size*mpi_work);
  bt   = createReal2DArray (mpi_work, mpi_size*mpi_work);
  z    = createReal2DArray (omp_tot_threads, nn);

  h    = 1./(Real)n;
  pi   = 4.*atan(1.);
  
  #pragma omp parallel for private(i)
  for (i=0; i < m; i++) {
    diag[i] = 2.*(1.-cos((i+1)*pi/(Real)n));
  }

  #pragma omp parallel for private(j, i)
  for (j=0; j < mpi_work; j++) { // MPI
    for (i=0; j + mpi_work * mpi_rank < m && i < m; i++) { // OMP
      int k = j + mpi_work * mpi_rank;
      b[j][i] = exp((Real) k) * sin(2.0 * pi * k) * sin(2.0 * i);
    }
  }

  #pragma omp parallel for private(omp_id, i)
  for (j=0; j < mpi_work; j++) { // MPI cut + OMP
    omp_id = omp_get_thread_num();
    fst_(b[j], &n, z[omp_id], &nn);
  }
  
  transpose (bt,b);

  #pragma omp parallel for private(i, omp_id) schedule(static)
  for (i=0; i < mpi_work; i++) { // MPI cut + OMP
    omp_id = omp_get_thread_num();
    fstinv_(bt[i], &n, z[omp_id], &nn);
  }

  #pragma omp parallel for private(j, i)
  for (j=0; j < mpi_work; j++) { // MPI
    for (i=0; i < m; i++) {
      bt[j][i] = bt[j][i]/(diag[i]+diag[j + mpi_work * mpi_rank]);
    }
  }

  #pragma omp parallel for private(i, omp_id) schedule(static)
  for (i=0; i < mpi_work; i++) { // MPI cut + OMP
    omp_id = omp_get_thread_num();
    fst_(bt[i], &n, z[omp_id], &nn);
  }

  transpose (b,bt);

  #pragma omp parallel for private(j, omp_id)
  for (j=0; j < mpi_work; j++) { // MPI cut + OMP
    omp_id = omp_get_thread_num();
    fstinv_(b[j], &n, z[omp_id], &nn);
  }

  local_max = 0.0;
  omp_local_max = 0.0;

  #pragma omp parallel shared(local_max) private(j,i) firstprivate(omp_local_max)
  {
    // MPI, work in range (and handle last node overflow)
    #pragma omp for nowait
    for (j=0; j < mpi_work; j++) {
      for (i=0; j + mpi_work * mpi_rank < m && i < m; i++) {
        if (b[j][i] > omp_local_max) omp_local_max = b[j][i];
      }
    }
    #pragma omp critical
    {
      if (omp_local_max > local_max) {
        local_max = omp_local_max;
      }
    }
  }

  MPI_Reduce(&local_max, &global_max, 1,
             MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
               
  free(diag);
  free(b[0]);
  free(b);
  free(bt[0]);
  free(bt);
  free(z[0]);
  free(z);
  MPI_Finalize();
  
  double wall_end = MPI_Wtime();

  if (mpi_rank == 0) {
    printf (" umax = %e, time = %.3fs \n", global_max,wall_end-wall_start);
    printf(" mpi_size = %d, omp_max_threads = %d, n = %d\n", mpi_size, omp_tot_threads, n);
  }
}
Exemplo n.º 6
0
void runPoisson(int rank, int size, int n){
  double time=MPI_Wtime();
  Real **b, *diag, *RecvBuf,*z, h, maxError;
  int i, j, m, nn, *len, *disp;

  m  = n-1;
  nn = 4*n;
  splitVector(m, size, &len, &disp);
  diag = createRealArray (m);
  b    = createReal2DArray (len[rank],m);
  z    = createRealArray (nn);
  h    = 1./(Real)n;

  #pragma omp parallel for schedule(static)
  for (i=0; i < m; i++) {
    diag[i] = 2.*(1.-cos((i+1)*M_PI/(Real)n));
  }

  #pragma omp for
  for (j=0; j < len[rank]; j++) {
  #pragma omp parallel for schedule(static)
    for (i=0; i < m; i++) {
      Real x=(Real)(j+1+disp[rank])/n;
      Real y=(Real) (i+1)/n;
      b[j][i] = h*h * funcf(x,y);
    }
  }

  #pragma omp parallel for schedule(static)
  for (j=0; j < len[rank]; j++) {
    Real* zt = createRealArray (nn);
    fst_(b[j], &n, zt, &nn);
    free(zt);
  }

  transpose(b, size, len, disp, rank, m);

  #pragma omp parallel for schedule(static)
  for (i=0; i < len[rank]; i++) {
    Real* zt  = createRealArray (nn);
    fstinv_(b[i], &n, zt, &nn);
    free(zt);
  }

  #pragma omp for
  for (j=0; j < len[rank]; j++) {
  #pragma omp parallel for schedule(static)
    for (i=0; i < m; i++) {
      b[j][i] = b[j][i]/(diag[i]+diag[j+disp[rank]]);
    }
  }

  #pragma omp parallel for schedule(static)
  for (i=0; i < len[rank]; i++) {
    Real* zt  = createRealArray (nn);
    fst_(b[i], &n, zt, &nn);
    free(zt);
  }

  transpose(b, size, len, disp, rank, m);

  #pragma omp parallel for schedule(static)
  for (j=0; j < len[rank]; j++) {
    Real* zt  = createRealArray (nn);
    fstinv_(b[j], &n, zt, &nn);
    free(zt);
  }




  if (rank==0)
  {
    RecvBuf = createRealArray (m*m);
  }
  gatherMatrix(b, m, RecvBuf, len, disp,0);

  if (rank==0)
  {
    for (int j=0; j < m; j++) {
      for (int i=0; i < m; i++) {
        printf("%e %e %e \n",(Real)i/m,(Real)j/m,RecvBuf[j*m+i] );
      }
    }
  }
}
Exemplo n.º 7
0
main(int argc, char **argv )
{
	Real *diag, **b, **bt, *z;
	Real pi, h, umax;
	int i, j, n, m, nn;

	/* the total number of grid points in each spatial direction is (n+1) */
	/* the total number of degrees-of-freedom in each spatial direction is (n-1) */
	/* this version requires n to be a power of 2 */

	if( argc < 2 ) {
		printf("need a problem size\n");
		return;
	}

	n  = atoi(argv[1]);
	m  = n-1;
	nn = 4*n;

	diag = createRealArray (m);
	b    = createReal2DArray (m,m);
	bt   = createReal2DArray (m,m);
	z    = createRealArray (nn);

	h    = 1./(Real)n;
	pi   = 4.*atan(1.);

	for (i=0; i < m; i++) {
		diag[i] = 2.*(1.-cos((i+1)*pi/(Real)n));
	}
	for (j=0; j < m; j++) {
		for (i=0; i < m; i++) {
			b[j][i] = h*h;
		}
	}
	for (j=0; j < m; j++) {
		fst_(b[j], &n, z, &nn);
	}

	transpose (bt,b,m);

	for (i=0; i < m; i++) {
		fstinv_(bt[i], &n, z, &nn);
	}

	for (j=0; j < m; j++) {
		for (i=0; i < m; i++) {
			bt[j][i] = bt[j][i]/(diag[i]+diag[j]);
		}
	}

	for (i=0; i < m; i++) {
		fst_(bt[i], &n, z, &nn);
	}

	transpose (b,bt,m);

	for (j=0; j < m; j++) {
		fstinv_(b[j], &n, z, &nn);
	}

	umax = 0.0;
	for (j=0; j < m; j++) {
		for (i=0; i < m; i++) {
			if (b[j][i] > umax) umax = b[j][i];
		}
	}
	printf (" umax = %e \n",umax);
}
Exemplo n.º 8
0
main(int argc, char **argv )
{
  Real *diag, **b, **bt, **z;
  Real pi, h, umax, f;
  int i,j,n, m, nn, numthreads, currentthread;

  

  /* the total number of grid points in each spatial direction is (n+1) */
  /* the total number of degrees-of-freedom in each spatial direction is (n-1) */
  /* this version requires n to be a power of 2 */

 if( argc < 2 ) {
    printf("need a problem size\n");
    return 1;
  }
  
 Real starttime, endtime, runtime, maxtime, mintime, temptime, avgruntime;
 Real timer1, timer2, timer3, timer4,timer5,timer6,timer7,timer8;
  int size, rank;
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);


  n  = atoi(argv[1]);
  m  = n-1;
  nn = 4*n;
#ifdef HAVE_OPENMP
  numthreads = omp_get_max_threads();
#else
  numthreads = 1;
#endif
  
  
  // --- Distribute Work ---
  int *gsize;
  int *ssize;
  gsize = (int *)malloc(size*sizeof(int));
  ssize = (int *)malloc(size*sizeof(int));
  int remain = m % size;
  ssize[0] = 0;
  for (int q = 0;q<size;++q)
  {
    gsize[q] = m/size;
    if (q<remain) gsize[q]++;
    if (q<(size-1)) ssize[q+1] = ssize[q]+gsize[q];
  }
  // -----------------------
  

  diag = createRealArray (m);
  b    = createReal2DArray (gsize[rank],m);
  bt   = createReal2DArray (gsize[rank],m);
  z    = createReal2DArray (numthreads,nn); //GJØR NOE HER DA

  h    = 1./(Real)n;
  pi   = 4.*atan(1.);

  starttime = MPI_Wtime();

  #pragma omp parallel for schedule(static)
  for ( i=0; i < m; i++) {
    diag[i] = 2.*(1.-cos((i+1)*pi/(Real)n));
  }
  
  #pragma omp parallel for private(i,f) schedule(static)
  for ( j=0; j < gsize[rank]; j++) {
    for ( i=0; i < m; i++) {

      f = 5*pi*pi*sin(pi*h*(j+ssize[rank]+1))*sin(2*pi*h*(i+1));
      //f = 1.0;
      //f =2*( ((j+1+ssize[rank])*h)*((j+1+ssize[rank])*h) + (i+1)*(i+1)*h*h  );
      
      b[j][i] = h*h*f;
      //b[j][i] = (j+ssize[rank])*m+i; transpose test
    }
  }
  timer1 = MPI_Wtime();

  
  
  // ---------- DEBUG PART ---------------
    /*if (rank == 0)
  {
    printVector(b[0],m*gsize[rank]);
  }
  */
  /*
  if(rank ==0)
  {
    printf("\n\n\n");
    printMatrix(b,m,gsize[rank]);
    printf("\n\n\n");
  }
  
    transpose (bt,b,&m,m,size,rank,gsize,ssize);
  
  
  if(rank ==0)
  {
    printMatrix(bt,m,gsize[rank]);
    printf("\n\n\n");
  }
  */
  // ----------- DEBUG END ---------------
  
  // Alternativt ha #ifdef utenfor loopen, og operere med 2 loops
  #pragma omp parallel for private(currentthread) schedule(static)
  for ( j=0; j < gsize[rank]; j++) {
    currentthread = 0;
    #ifdef HAVE_OPENMP
      currentthread = omp_get_thread_num();
    #endif
    fst_(b[j], &n, z[omp_get_thread_num()], &nn);
  }
  timer2 = MPI_Wtime();
/*
  if (rank == 0)
  {
    printf("\n\n\n");
    printMatrix(b,m);
  }
*/  

  transpose (bt,b,&m,m,size,rank,gsize,ssize);
  timer3 = MPI_Wtime();
/*
  if (rank == 0)
    {
      printf("\n\n\n");
      printMatrix(bt,m);
      printf("\n\n\n");
  }
*/

  

  #pragma omp parallel for private(currentthread) schedule(static)
  for ( i=0; i < gsize[rank]; i++) {
    currentthread = 0;
    #ifdef HAVE_OPENMP
      currentthread = omp_get_thread_num();
    #endif
    fstinv_(bt[i], &n, z[currentthread], &nn);
  }
  timer4 = MPI_Wtime();
  #pragma omp parallel for private(i) schedule(static)
  for ( j=0; j < gsize[rank]; j++) {
    for ( i=0; i < m; i++) {
      bt[j][i] = bt[j][i]/(diag[i]+diag[j+ssize[rank]]); //offset implemented
    }
  }
  timer5 = MPI_Wtime();
  #pragma omp parallel for private(currentthread) schedule(static)
  for ( i=0; i < gsize[rank]; i++) {
    currentthread = 0;
    #ifdef HAVE_OPENMP
      currentthread = omp_get_thread_num();
    #endif
    fst_(bt[i], &n, z[currentthread], &nn);
  }
  timer6 = MPI_Wtime();
  
  transpose (b,bt,&m,m,size,rank,gsize,ssize);
  timer7 = MPI_Wtime();
  #pragma omp parallel for private(currentthread) schedule(static)
  for ( j=0; j < gsize[rank]; j++) {
    currentthread = 0;
    #ifdef HAVE_OPENMP
      currentthread = omp_get_thread_num();
    #endif
    fstinv_(b[j], &n, z[currentthread], &nn);
  }
  timer8 = MPI_Wtime();
  endtime = MPI_Wtime();
  
  runtime = endtime-starttime;
  timer8 -=timer7;timer7 -=timer6;timer6 -=timer5;timer5 -=timer4;
  timer4 -=timer3;timer3 -=timer2;timer2 -=timer1;timer1 -=starttime;
  
  MPI_Allreduce(&runtime, &maxtime, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
  MPI_Allreduce(&runtime, &mintime, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
  MPI_Allreduce(&runtime, &avgruntime, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  avgruntime /= (Real) size;

  MPI_Allreduce(&timer1, &timer1, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(&timer2, &timer2, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(&timer3, &timer3, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(&timer4, &timer4, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(&timer5, &timer5, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(&timer6, &timer6, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(&timer7, &timer7, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  MPI_Allreduce(&timer8, &timer8, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
  timer1/=(Real)size;timer2/=(Real)size;timer3/=(Real)size;timer4/=(Real)size;
  timer5/=(Real)size;timer6/=(Real)size;timer7/=(Real)size;timer8/=(Real)size;

  if (rank == 0) printf("Slowest runtime: %e s\n", maxtime);
  if (rank == 0) printf("Fastest runtime: %e s\n", mintime);
  if (rank == 0) printf("Average runtime: %e s\n", avgruntime);
  
  if (rank == 0) printf("Init: \t\t %f p.c.\n", timer1/avgruntime);
  if (rank == 0) printf("FST1: \t\t %f p.c.\n", timer2/avgruntime);
  if (rank == 0) printf("Transpose1: \t %f p.c.\n", timer3/avgruntime);
  if (rank == 0) printf("FSTINV1: \t %f p.c.\n", timer4/avgruntime);
  if (rank == 0) printf("Diag: \t\t %f p.c.\n", timer5/avgruntime);
  if (rank == 0) printf("FST2: \t\t %f p.c.\n", timer6/avgruntime);
  if (rank == 0) printf("Transpose2: \t %f p.c.\n", timer7/avgruntime);
  if (rank == 0) printf("FSTINV2: \t %f p.c.\n", timer8/avgruntime);
  if (rank == 0) printf("\nFST TOTAL: \t\t %f p.c.\t%es\n",(timer2+timer4+timer6+timer8)/avgruntime,timer2+timer4+timer6+timer8);
  if (rank == 0) printf("\nTranspose TOTAL: \t %f p.c.\t%es\n",(timer3+timer7)/avgruntime,timer3+timer7);





  umax = 0.0;

 
  for ( j=0; j < gsize[rank]; j++) {
    for ( i=0; i < m; i++) {
      if (b[j][i] > umax) umax = b[j][i];
    }
  }
  
  
  
  
  MPI_Allreduce(&umax ,&umax , 1 ,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
  
  if (rank == 0) printf (" umax = %e \n",umax);
  
  //printVector(b[m/4],m);
  
  
  Real locerr, err;
  
  locerr = checkError( b,m, h,size, rank, gsize, ssize);
  
  MPI_Allreduce(&locerr ,&err , 1 ,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
  
  if (rank == 0) printf("\nMaximal error: %e\n", err);
  
  
  //printMatrix(b,m,m);
  
  /***********************************************************/
  /***********************************************************/
  /*                       FREE MEMORY                       */
  /***********************************************************/
  /***********************************************************/
  
  free(b[0]);
  free(bt[0]);
  free(b);
  free(bt);
  free(z[0]);
  free(z);
  free(diag);
  free(gsize);
  free(ssize);
  /***********************************************************/
  /***********************************************************/
  /***********************************************************/
  MPI_Finalize();
  
}
Exemplo n.º 9
0
void transpose (Real **bt, Real **b, int *sizemap, int m, int size, int rank, int *gsize, int *ssize)
{

  //gsize = "array med str av hver blokk (e.g. [3,3,2])" 
  //ssize = "array med startposisjon av hver blokk (e.g [0,3,6]), kan uttrykkes vha gsize"
  
  int p = size;// antall prosessorer
  int l = gsize[rank];//"size of babymatrix (som ikke er n)"
  int n = m;//"size of the mothermatrix (one side)"

  /*
  1) Generere sendbuffer
  2) Send
  3) unwrap recievebuffer
  4) kall transpose
  */
  

  Real *sendbuf;
  sendbuf = createRealArray(gsize[rank]*m); 

  // kan ha en if-loekke for aa velge hvor pragma skal vaere
  for (int i = 0; i < size; ++i)
  {
#pragma omp parallel for schedule(static)
    for (int j = 0; j < gsize[rank] ; ++j)
      for (int k = 0;k < gsize[i] ; ++k)
        sendbuf[ gsize[rank]*ssize[i] + j*gsize[i] + k ] = b[ j ][ ssize[i] + k ];
  }
      
  Real *recbuf;
  recbuf = createRealArray(l*n); 
  int *sendcount;
  int *senddisp;
  
  sendcount = (int *)malloc(size*sizeof(int));
  senddisp = (int *)malloc(size*sizeof(int));
  
  senddisp[0] = 0;
  for (int i = 0; i < size; ++i)
  {
    sendcount[i] = gsize[i]*gsize[rank];
    if (i<(size-1))
      senddisp[i+1] = senddisp[i]+sendcount[i];
    /*if (rank == 0)
    {
    printf("i = %u ... rank = %u ...  %u,%u\n",i,rank,gsize[i],ssize[i]);
    printf("i = %u ... rank = %u ...  %u,%u\n",i,rank,sendcount[i],senddisp[i]);
    }*/
  }
  
  /*
  if (rank == 1)
    for (int q = 0;q<(gsize[rank]*m);++q)
      printf("%f\n",sendbuf[q]);
    */
  
  //if (rank == 0)
  //  printVector(b[0],m*l);
  
  
  
  
  MPI_Alltoallv(sendbuf, sendcount, senddisp, MPI_DOUBLE, recbuf, sendcount , senddisp, MPI_DOUBLE, MPI_COMM_WORLD);
  
  
  
  /*if (rank == 0)
    for (int q = 0;q<(gsize[rank]*m);++q)
      printf("%f\n",recbuf[q]);
  */
  
  for (int i = 0; i < size; ++i)
  {
#pragma omp parallel for schedule(static)
    for (int j = 0; j < gsize[i] ; ++j )
      for (int k = 0;k < gsize[rank] ; ++k)
      {
        //if (rank == 0)
        //  printf("(Col: %u ,Row: %u)\n",k,  ssize[i] + j);
        bt[ k ][ ssize[i] + j ] = recbuf[ ssize[i]*gsize[rank] + j*gsize[rank] + k ];
    }
  }
  free(sendcount);
  free(senddisp);
  free(recbuf);
  free(sendbuf);
}
Exemplo n.º 10
0
void addBenchArg(struct benchmark* bench, struct benchArg* arg)
{
    long i;
    if (bench->start == NULL)
    {
        bench->start=arg;
        bench->current=bench->start;
    }
    else
    {
        bench->current->next=arg;
        bench->current=bench->current->next;
    }

    /* We need to create the storage for the output as it was not transmitted by the client */
    if (bench->current->isOutput == OUT_MODE)
    {
        switch(bench->current->data.dataType)
        {
        case kINT_STREAM:
            bench->current->data.value.intArray=createIntArray(bench->current->data.value.arrayLength);
            break;
        case kREAL_STREAM:
            bench->current->data.value.realArray=createRealArray(bench->current->data.value.arrayLength);
            break;
        case kFLOAT4_STREAM:
            bench->current->data.value.float4Array=createFloat4Array(bench->current->data.value.arrayLength);
            break;
        }
    }

    /* We need to generate the data for the generators */
    switch(bench->current->gen.generatorType)
    {
    case kCONST_GENERATOR:
        switch(bench->current->data.dataType)
        {
        case kINT_STREAM:
            bench->current->data.value.intArray=createIntArray(bench->current->gen.genData.constant.nb);
            for(i=0; i<bench->current->gen.genData.constant.nb; i++)
            {
                bench->current->data.value.intArray->samples[i]=bench->current->gen.genData.constant.intValue;
            }
            break;
        case kREAL_STREAM:
            bench->current->data.value.realArray=createRealArray(bench->current->gen.genData.constant.nb);
            for(i=0; i<bench->current->gen.genData.constant.nb; i++)
            {
                bench->current->data.value.realArray->samples[i]=bench->current->gen.genData.constant.floatValue;
            }
            break;
        case kFLOAT4_STREAM:
            bench->current->data.value.float4Array=createFloat4Array(bench->current->gen.genData.constant.nb);
            for(i=0; i<bench->current->gen.genData.constant.nb; i++)
            {
                bench->current->data.value.float4Array->samples[i].s[0]=bench->current->gen.genData.constant.floatValue;
                bench->current->data.value.float4Array->samples[i].s[1]=0;
                bench->current->data.value.float4Array->samples[i].s[2]=0;
                bench->current->data.value.float4Array->samples[i].s[3]=0;
            }
            break;
        }
        break;
    }
}
Exemplo n.º 11
0
int main(int argc, char **argv )
{
  Real *diag, **A, *z;
  Real pi, h, umax, globalumax, emax, globalemax, error, time;
  int i, j, n, m, nn, b, re, l, bb, bre, rank, size;

  MPI_Init (&argc, &argv);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Comm_size (MPI_COMM_WORLD, &size);

  /* the total number of grid points in each spatial direction is (n+1) */
  /* the total number of degrees-of-freedom in each spatial direction is (n-1) */
  /* this version requires n to be a power of 2 */

 if( argc < 2 ) {
    printf("need a problem size\n");
	return 0;
  }

  n  = atoi(argv[1]);
  m  = n-1;
  nn = 4*n;

  h    = 1./(Real)n;
  pi   = 4.*atan(1.);

  b   = floor(m/size);
  re  = m - (size-1)*b;
  l   = b;
  bb  = b*b;
  bre = b*re;

  if(rank+1 == size) {
    l   = re;
    bb  = bre;
    bre = re*re;
  }

  diag = createRealArray (m);
  A    = createReal2DArray (l,m);
  z    = createRealArray (nn);

  time = MPI_Wtime();

  for (i=0; i < m; i++) {
    diag[i] = 2.*(1.-cos((i+1)*pi/(Real)n));
  }

  for (j=0; j < l; j++) {
    for (i=0; i < m; i++) {
      //        h^2 * f(x,y)
      A[j][i] = h*h*5*pi*pi*sin(pi*i*h)*sin(2*pi*(j + rank*b)*h);
    }
  }
  
  for (j=0; j < l; j++) {
    fst_(A[j], &n, z, &nn);
  }

  transpose(A, l, m, size, bb, bre);

  for (i=0; i < l; i++) {
    fstinv_(A[i], &n, z, &nn);
  }  

  for (j=0; j < l; j++) {
    for (i=0; i < m; i++) {
      A[j][i] = A[j][i]/(diag[i]+diag[j + rank*b]);
    }
  }
  
  for (i=0; i < l; i++) {
    fst_(A[i], &n, z, &nn);
  }

  transpose(A, l, m, size, bb, bre);

  for (j=0; j < l; j++) {
    fstinv_(A[j], &n, z, &nn);
  }

  umax = 0.0;
  emax = 0.0;
  for (j=0; j < l; j++) {
    for (i=0; i < m; i++) {
      // error =  abs( numerical u(x,y) - exact u(x,y) )
      error = fabs(A[j][i] - sin(pi*i*h)*sin(2*pi*(j + rank*b)*h));
      if (A[j][i] > umax) umax = A[j][i];
      if (error > emax) emax = error;
    }
  }

  MPI_Reduce (&umax, &globalumax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  MPI_Reduce (&emax, &globalemax, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
  if (rank == 0)
  {
    printf("elapsed: %f\n", MPI_Wtime()-time);
    printf ("umax = %e \n",globalumax);
    printf ("emax = %e \n",globalemax);
  }

  MPI_Finalize();
  return 0;
}