Ejemplo n.º 1
0
int exch_addr(void)
{
    MPI_Status status;

	int i, rc;

    rc = MPI_Alltoall((void *)conn.qp_num, sizeof(uint32_t), MPI_BYTE, 
            (void *)rbuf.qp_num, sizeof(uint32_t), MPI_BYTE, l_state.world_comm);
   
    assert(!rc); 
    rc = MPI_Alltoall((void *)conn.lid, sizeof(uint16_t), MPI_BYTE, 
            (void *)rbuf.lid, sizeof(uint16_t), MPI_BYTE, l_state.world_comm);
    assert(!rc); 

#ifdef DEBUG
    for (i = 0; i < nprocs; i++) {
        if (me == i)
            continue;
        fprintf(stdout,"[%d] Remote QP %d, Remote LID %u, Rkey %u, Lkey %u\n"
                " LBuf %p, RBuf %p\n", 
                me, rbuf.qp_num[i], rbuf.lid[i], rbuf.rkey[i], lbuf.mr->lkey,
                lbuf.buf, rbuf.buf[i]);
        fflush(stdout);
    }
#endif

    return 0;
}
Ejemplo n.º 2
0
static void apply(const plan *ego_, R *I, R *O)
{
     const P *ego = (const P *) ego_;
     plan_rdft *cld1, *cld2, *cld2rest, *cld3;

     /* transpose locally to get contiguous chunks */
     cld1 = (plan_rdft *) ego->cld1;
     if (cld1) {
	  cld1->apply(ego->cld1, I, O);
	  
	  /* transpose chunks globally */
	  if (ego->equal_blocks)
	       MPI_Alltoall(O, ego->send_block_sizes[0], FFTW_MPI_TYPE,
			    I, ego->recv_block_sizes[0], FFTW_MPI_TYPE,
			    ego->comm);
	  else
	       MPI_Alltoallv(O, ego->send_block_sizes, ego->send_block_offsets,
			     FFTW_MPI_TYPE,
			     I, ego->recv_block_sizes, ego->recv_block_offsets,
			     FFTW_MPI_TYPE,
			     ego->comm);
     }
     else { /* TRANSPOSED_IN, no need to destroy input */
	  /* transpose chunks globally */
	  if (ego->equal_blocks)
	       MPI_Alltoall(I, ego->send_block_sizes[0], FFTW_MPI_TYPE,
			    O, ego->recv_block_sizes[0], FFTW_MPI_TYPE,
			    ego->comm);
	  else
	       MPI_Alltoallv(I, ego->send_block_sizes, ego->send_block_offsets,
			     FFTW_MPI_TYPE,
			     O, ego->recv_block_sizes, ego->recv_block_offsets,
			     FFTW_MPI_TYPE,
			     ego->comm);
	  I = O; /* final transpose (if any) is in-place */
     }
     
     /* transpose locally, again, to get ordinary row-major */
     cld2 = (plan_rdft *) ego->cld2;
     if (cld2) {
	  cld2->apply(ego->cld2, I, O);
	  cld2rest = (plan_rdft *) ego->cld2rest;
	  if (cld2rest) { /* leftover from unequal block sizes */
	       cld2rest->apply(ego->cld2rest,
			       I + ego->rest_Ioff, O + ego->rest_Ooff);
	       cld3 = (plan_rdft *) ego->cld3;
	       if (cld3)
		    cld3->apply(ego->cld3, O, O);
	       /* else TRANSPOSED_OUT is true and user wants O transposed */
	  }
     }
}
Ejemplo n.º 3
0
/*
 * Class:     mpi_Intracomm
 * Method:    Alltoall
 * Signature:
 (Ljava/lang/Object;IILmpi/Datatype;Ljava/lang/Object;IILmpi/Datatype;)V
*/
JNIEXPORT void JNICALL Java_mpi_Intracomm_alltoall(JNIEnv *env, jobject jthis,
                                                   jobject sendbuf, jint sendoffset,
                                                   jint sendcount, jobject sendtype,
                                                   jobject recvbuf, jint recvoffset,
                                                   jint recvcount, jobject recvtype)
{
    MPI_Comm mpi_comm =
        (MPI_Comm)((*env)->GetLongField(env,jthis,ompi_java.CommhandleID)) ;

    MPI_Datatype mpi_stype = (MPI_Datatype)
        ((*env)->GetLongField(env,sendtype,ompi_java.DatatypehandleID)) ;
    MPI_Datatype mpi_rtype = (MPI_Datatype)
        ((*env)->GetLongField(env, recvtype, ompi_java.DatatypehandleID)) ;

    int sbaseType = (*env)->GetIntField(env, sendtype, ompi_java.DatatypebaseTypeID) ;
    int rbaseType = (*env)->GetIntField(env, recvtype, ompi_java.DatatypebaseTypeID) ;

    void *sendptr, *recvptr ;
    void *sbufbase, *rbufbase ;

    ompi_java_clearFreeList(env) ;

    recvptr = ompi_java_getBufPtr(&rbufbase, env, recvbuf, rbaseType, recvoffset) ;
    sendptr = ompi_java_getBufPtr(&sbufbase, env, sendbuf, sbaseType, sendoffset) ;

    MPI_Alltoall(sendptr, sendcount, mpi_stype,
                 recvptr, recvcount, mpi_rtype, mpi_comm) ;

    ompi_java_releaseBufPtr(env, sendbuf, sbufbase, sbaseType) ;
    ompi_java_releaseBufPtr(env, recvbuf, rbufbase, rbaseType) ;
}
Ejemplo n.º 4
0
int main( int argc, char* argv[] )
{
  int i, j;
  int myrank, nprocs;
  char *sbuf,  *rbuf;
  int dsize;

  MPI_Init( &argc, &argv );
  
  MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
  MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
  MPI_Type_size(DATATYPE, &dsize);

  sbuf=(char*)malloc(SIZE*dsize*nprocs);
  rbuf=(char*)malloc(SIZE*dsize*nprocs);

  for( i=0; i<REPEAT; i++ )
    {
      MPI_Alltoall( sbuf, SIZE, DATATYPE,
		    rbuf, SIZE, DATATYPE,
		    MPI_COMM_WORLD );
    }

  MPI_Finalize();
  return 0;
}
Ejemplo n.º 5
0
static inline void execute_predefined_op(int opnum, void* args, void* scratch) {
	if (opnum == -1) {
		MPI_Barrier(G_GOAL_WorldComm);
	}
	else if (opnum == -2) {
		struct bcast_args* bc = (struct bcast_args*) args;
		MPI_Bcast(bc->buffer, bc->count, MPI_BYTE, bc->root, G_GOAL_WorldComm);
	}
	else if (opnum == -3) {
		struct scatter_args* sc = (struct scatter_args*) args;
		MPI_Scatter(sc->sendbuffer, sc->count, MPI_BYTE, sc->recvbuffer, sc->count, MPI_BYTE, sc->root, G_GOAL_WorldComm);
	}
	else if (opnum == -4) {
		struct scatter_args* ga = (struct scatter_args*) args;
		MPI_Gather(ga->sendbuffer, ga->count, MPI_BYTE, ga->recvbuffer, ga->count, MPI_BYTE, ga->root, G_GOAL_WorldComm);
	}
	else if (opnum == -5) {
		struct alltoall_args* aa = (struct alltoall_args*) args;
		MPI_Alltoall(aa->sendbuffer, aa->count, MPI_BYTE, aa->recvbuffer, aa->count, MPI_BYTE, G_GOAL_WorldComm);
	}
	else if (opnum == -99) {
		/* dummy op - do nothing */
	}
	else  {
		printf("Predefined op number %i is not implemented yet\n", opnum);
	}
}
Ejemplo n.º 6
0
/* Out-of-place version of transpose_mpi (or rather, in place using
   a scratch array): */
static void transpose_mpi_out_of_place(transpose_mpi_plan p, int el_size,
				       TRANSPOSE_EL_TYPE *local_data,
				       TRANSPOSE_EL_TYPE *work)
{
     local_transpose_copy(local_data, work, el_size, p->local_nx, p->ny);

     if (p->all_blocks_equal)
	  MPI_Alltoall(work, p->send_block_size * el_size, p->el_type,
		       local_data, p->recv_block_size * el_size, p->el_type,
		       p->comm);
     else {
	  int i, n_pes = p->n_pes;

	  for (i = 0; i < n_pes; ++i) {
	       p->send_block_sizes[i] *= el_size;
	       p->recv_block_sizes[i] *= el_size;
	       p->send_block_offsets[i] *= el_size;
	       p->recv_block_offsets[i] *= el_size;
	  }
	  MPI_Alltoallv(work, p->send_block_sizes, p->send_block_offsets,
			p->el_type,
			local_data, p->recv_block_sizes, p->recv_block_offsets,
			p->el_type,
			p->comm);
	  for (i = 0; i < n_pes; ++i) {
	       p->send_block_sizes[i] /= el_size;
	       p->recv_block_sizes[i] /= el_size;
	       p->send_block_offsets[i] /= el_size;
	       p->recv_block_offsets[i] /= el_size;
	  }
     }

     do_permutation(local_data, p->perm_block_dest, p->num_perm_blocks,
		    p->perm_block_size * el_size);
}
Ejemplo n.º 7
0
int main(int argc, char** argv)
{
  // Initialize MPI
  MPI_Init(&argc, &argv);

  int size, rank;

  // Figure out the number of processes and our rank in the world group
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  if (size % 2) {
    printf("Need an even number of processes\n");
    MPI_Finalize();
    return 1;
  }

  // setup new communicators
  MPI_Comm twocomm;
  MPI_Comm_split(MPI_COMM_WORLD, rank/2, rank%2, &twocomm);

  int senddata[2], recvdata[2];
  senddata[(rank+1)%2] = rank;
  senddata[rank%2] = 0;
  MPI_Alltoall(senddata, 1, MPI_INT, recvdata, 1, MPI_INT, twocomm);

  // print to tty
  printf("process %i: received %i\n", rank, recvdata[(rank+1)%2]);

  // close down MPI
  MPI_Finalize();

  // ay-oh-kay
  return 0;
}
Ejemplo n.º 8
0
void MADRE_exchange(MC* mc, int *myRecvCount, int *mySendCount){
  int i;
  Particle *p;
  p = mc->particles;
  //cache blockLength
  int blockLength = MADRE_BLOCK_LENGTH;

  /* MADRE_pack should have constructed an integer number of blocks */
  assert(mc->nparticles % (int)MADRE_BLOCK_LENGTH == 0);
  int liveBlocks = mc->nparticles/blockLength;
  for (i=0; i<liveBlocks; ++i) destRanks[i] = p[i*blockLength].proc;

  /* By default, this was set to zero */
  myRecvCount[mc->mype] = mySendCount[mc->mype];

  /* Organize destIndices by proc-rank order */
  displ[0] = 0; 
  for (i=1;i<(mc->nprocs);++i) displ[i] = displ[i-1] + myRecvCount[i-1]/blockLength;

  /* Alltoall where each proc can start receiving particles to get destIndices */
  MPI_Alltoall(displ, 1, MPI_INT, sdispl, 1, MPI_INT, MPI_COMM_WORLD);

  for (i=0; i<liveBlocks; ++i){
    destIndices[i]= sdispl[p[i*blockLength].proc];
    sdispl[p[i*blockLength].proc]++;
  }

  MADRE_redistribute(MADRE_particle, liveBlocks, destRanks, destIndices); 

  mc->nparticles = isum(myRecvCount, mc->nprocs);
  /* Each proc should have an integer number of blocks after exchanges */
  assert(mc->nparticles % (int)MADRE_BLOCK_LENGTH == 0);
}
Ejemplo n.º 9
0
int main( int argc, char **argv )
{
    int send[4], recv[4];
    int rank, size, k;
    
    MPI_Init( &argc, &argv );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    MPI_Comm_size( MPI_COMM_WORLD, &size );
    
    if (size != 4) {
        printf("Error!:# of processors must be equal to 4\n");
        printf("Programm aborting....\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    for (k=0;k<size;k++) send[k] = (k+1) + rank*size;
    
    printf("%d : send = %d %d %d %d\n", rank, send[0], send[1], send[2], send[3]);
    
    MPI_Alltoall(send, 1, MPI_INT, recv, 1, MPI_INT, MPI_COMM_WORLD);
    
    printf("%d : recv = %d %d %d %d\n", rank, recv[0], recv[1], recv[2], recv[3]);
    
    MPI_Finalize();
    return 0;
}
Ejemplo n.º 10
0
/* run an exchange test with msgsz bytes per proc with bytes transferred
 * actually nproc*msgsz per exchange (all-to-all).
 */
double exchangetest(int iters, int msgsz) {
  int64_t starttime, endtime;
  int i;
  char *sendbuf, *recvbuf;

  sendbuf = malloc(msgsz*nproc);
  recvbuf = malloc(msgsz*nproc);

  if (sendbuf == NULL || recvbuf == NULL) {
    fprintf(stderr, "malloc");
    exit(-1);
  }

  barrier();

  starttime = getMicrosecondTimeStamp();
  for (i=0; i<iters; i++) {
    MPI_Alltoall(sendbuf, msgsz, MPI_CHAR, 
		 recvbuf, msgsz, MPI_CHAR, MPI_COMM_WORLD);
  }
  endtime = getMicrosecondTimeStamp();

  free(sendbuf);
  free(recvbuf);

  return (endtime-starttime);
}
Ejemplo n.º 11
0
int main(int argc, char *argv[])
{
    int rank, size;
    int chunk = 128;
    int i;
    int *sb;
    int *rb;
    int status, gstatus;

    MTest_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    for (i = 1; i < argc; ++i) {
        if (argv[i][0] != '-')
            continue;
        switch (argv[i][1]) {
        case 'm':
            chunk = atoi(argv[++i]);
            break;
        default:
            fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
            MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
        }
    }

    sb = (int *) malloc(size * chunk * sizeof(int));
    if (!sb) {
        perror("can't allocate send buffer");
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }
    rb = (int *) malloc(size * chunk * sizeof(int));
    if (!rb) {
        perror("can't allocate recv buffer");
        free(sb);
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }
    for (i = 0; i < size * chunk; ++i) {
        sb[i] = rank + 1;
        rb[i] = 0;
    }

    /* fputs("Before MPI_Alltoall\n",stdout); */

    /* This should really send MPI_CHAR, but since sb and rb were allocated
     * as chunk*size*sizeof(int), the buffers are large enough */
    status = MPI_Alltoall(sb, chunk, MPI_INT, rb, chunk, MPI_INT, MPI_COMM_WORLD);

    /* fputs("Before MPI_Allreduce\n",stdout); */

    MTest_Finalize(status);

    free(sb);
    free(rb);

    MPI_Finalize();

    return MTestReturnValue(status);
}
Ejemplo n.º 12
0
void mpi_alltoall (void *sendbuf, MPI_Fint *sendcount, MPI_Fint *sendtype,
		   void *recvbuf, MPI_Fint *recvcount, MPI_Fint *recvtype, 
		   MPI_Fint *comm, MPI_Fint *__ierr)
{
  *__ierr = MPI_Alltoall (sendbuf, *sendcount, MPI_Type_f2c(*sendtype),
			  recvbuf, *recvcount, MPI_Type_f2c(*recvtype), 
			  MPI_Comm_f2c (*comm));
}
Ejemplo n.º 13
0
int ReAllocateRasterBlock( void * SendBuf, int SendCount, MPI_Datatype SendType,
                           void * RecvBuf, int RecvCount, MPI_Datatype RecvType,
                           MPI_Comm Comm )
{
    return MPI_Alltoall(SendBuf, SendCount, SendType,
                        RecvBuf, RecvCount, RecvType,
                        Comm);
}
Ejemplo n.º 14
0
 static void all_to_all(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, int n = 1)
 {
   // NB: this will fail if T is a vector
   MPI_Alltoall(Datatype::address(const_cast<T&>(in[0])), n,
                Datatype::datatype(),
                Datatype::address(out[0]), n,
                Datatype::datatype(),
                comm);
 }
Ejemplo n.º 15
0
int
kmr_exchange_sizes(KMR *mr, long *sbuf, long *rbuf)
{
    MPI_Comm comm = mr->comm;
    int cc;
    cc = MPI_Alltoall(sbuf, 1, MPI_LONG, rbuf, 1, MPI_LONG, comm);
    assert(cc == MPI_SUCCESS);
    return MPI_SUCCESS;
}
Ejemplo n.º 16
0
FC_FUNC( mpi_alltoall , MPI_ALLTOALL )
                        ( void *sendbuf, int *sendcount, int *sendtype,
			  void *recvbuf, int *recvcount, int *recvtype,
                          int *comm, int *ierror )
{
  *ierror=MPI_Alltoall(sendbuf, *sendcount, *sendtype,
		       recvbuf, *recvcount, *recvtype,
		       *comm);
}
Ejemplo n.º 17
0
int binGraph::exchange_edges(uint64_t m_read, uint64_t* read_edges,
                             int32_t* ranks,etype t)
{
  int32_t* scounts = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t));
  int32_t* rcounts = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t));
  int32_t* sdispls = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t));
  int32_t* sdispls_cpy = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t));
  int32_t* rdispls = (int32_t*)malloc(PCU_Comm_Peers()*sizeof(int32_t));
  for (int i = 0; i < PCU_Comm_Peers(); ++i)
  {
    scounts[i] = 0;
    rcounts[i] = 0;
    sdispls[i] = 0;
    sdispls_cpy[i] = 0;
    rdispls[i] = 0;
  }

  uint64_t n_per_rank = num_global_verts / PCU_Comm_Peers() + 1;
  for (uint64_t i = 0; i < m_read*2; i+=2)
  {
    uint64_t vert = read_edges[i];
    int vert_task = ranks[vert];
    scounts[vert_task] += 2;
  }

  MPI_Alltoall(scounts, 1, MPI_INT32_T,
               rcounts, 1, MPI_INT32_T, PCU_Get_Comm());

  for (uint64_t i = 1; i < PCU_Comm_Peers(); ++i) {
    sdispls[i] = sdispls[i-1] + scounts[i-1];
    sdispls_cpy[i] = sdispls[i];
    rdispls[i] = rdispls[i-1] + rcounts[i-1];
  }
 
  int32_t total_send = sdispls[PCU_Comm_Peers()-1] + scounts[PCU_Comm_Peers()-1];
  int32_t total_recv = rdispls[PCU_Comm_Peers()-1] + rcounts[PCU_Comm_Peers()-1];
  uint64_t* sendbuf = (uint64_t*)malloc(total_send*sizeof(uint64_t));
  edge_list[t] = (uint64_t*)malloc(total_recv*sizeof(uint64_t));
  num_local_edges[t] = total_recv / 2;

  for (uint64_t i = 0; i < m_read*2; i+=2)
  {
    uint64_t vert1 = read_edges[i];
    uint64_t vert2 = read_edges[i+1];
    int vert_task = ranks[vert1];

    sendbuf[sdispls_cpy[vert_task]++] = vert1;
    sendbuf[sdispls_cpy[vert_task]++] = vert2;
  }

  MPI_Alltoallv(sendbuf, scounts, sdispls, MPI_UINT64_T,
                edge_list[t], rcounts, rdispls, MPI_UINT64_T, PCU_Get_Comm());
  free(sendbuf);

  return 0;
}
Ejemplo n.º 18
0
void transpose(Real** recv, Real** send) {
  int i;
  for (i = 0; i < mpi_work; i++) {
    MPI_Alltoall(send[i], mpi_work, MPI_DOUBLE,
                 recv[i], mpi_work, MPI_DOUBLE, MPI_COMM_WORLD);
  }
  for (i = 0; i < mpi_size; i++) {
    local_transpose(recv, i*mpi_work);
  }
}
Ejemplo n.º 19
0
double measure_delayed_Alltoall(int send_count, MPI_Datatype send_dt, int recv_count, MPI_Datatype recv_dt, double delay, int node)
{
  double start_time, end_time;

  start_time = start_synchronization();
  if( get_measurement_rank() == node )  while( wtime() < start_time + delay ) ;
  MPI_Alltoall(get_send_buffer(), send_count, send_dt,
	       get_recv_buffer(), recv_count, recv_dt, get_measurement_comm());
  end_time = stop_synchronization();
  return end_time - start_time;
}
Ejemplo n.º 20
0
// Given how many numbers each process is sending to the other processes, find
// out how many numbers you are receiving from each process. This function
// returns an array of counts indexed on the rank of the process from which it
// will receive the numbers.
int *get_recv_amounts_per_proc(int *send_amounts_per_proc, int world_size) {
  int *recv_amounts_per_proc = (int *)malloc(sizeof(int) * world_size);

  // Perform an Alltoall for the send counts. This will send the send counts
  // from each process and place them in the recv_amounts_per_proc array of
  // the receiving processes to let them know how many numbers they will
  // receive when binning occurs.
  MPI_Alltoall(send_amounts_per_proc, 1, MPI_INT, recv_amounts_per_proc, 1,
               MPI_INT, MPI_COMM_WORLD);
  return recv_amounts_per_proc;
}
Ejemplo n.º 21
0
void timing_basic_alltoall_nelements( int DIM1, int procs, int loop, char* testname, MPI_Comm local_communicator) {
      
  float* send_array;
  float* recv_array;
  
  int myrank;
  int base, typesize, bytes, i;
  char method[50];

  send_array = malloc( DIM1 * procs * sizeof(float));
  recv_array = malloc( DIM1 * procs * sizeof(float));

  MPI_Comm_rank( local_communicator, &myrank );

  base = myrank * DIM1 + 1;
  utilities_fill_unique_array_1D_float( &send_array[0], DIM1, base );

  if ( myrank == 0 ) {
    snprintf(method, 50, "reference");
        
    MPI_Type_size( MPI_FLOAT, &typesize );
    bytes = typesize * DIM1 * procs;

    timing_init( testname, &method[0], bytes );
  }

  for( i=0 ; i<loop ; i++ ) {
    MPI_Alltoall(&send_array[0], DIM1, MPI_FLOAT, &recv_array[0], DIM1, MPI_FLOAT, local_communicator );
    MPI_Alltoall(&recv_array[0], DIM1, MPI_FLOAT, &send_array[0], DIM1, MPI_FLOAT, local_communicator );
    if ( myrank == 0 ) {
      timing_record(3);
    }
  }

  if ( myrank == 0 ) {
    timing_print( 1 );
  }

  free(send_array);
  free(recv_array);
}     
Ejemplo n.º 22
0
HYPRE_Int
hypre_MPI_Alltoall( void               *sendbuf,
                    HYPRE_Int           sendcount,
                    hypre_MPI_Datatype  sendtype,
                    void               *recvbuf,
                    HYPRE_Int           recvcount,
                    hypre_MPI_Datatype  recvtype,
                    hypre_MPI_Comm      comm )
{
   return (HYPRE_Int) MPI_Alltoall(sendbuf, (hypre_int)sendcount, sendtype,
                                   recvbuf, (hypre_int)recvcount, recvtype, comm);
}
char *
avtSamplePointCommunicator::CommunicateMessages(char **sendmessages,
                                                int   *sendcount,
                                                char **recvmessages,
                                                int   *recvcount)
{
#ifdef PARALLEL
    //
    // Figure out how much each processor needs to send/receive.
    //
    MPI_Alltoall(sendcount, 1, MPI_INT, recvcount, 1, MPI_INT, VISIT_MPI_COMM);

    //
    // Create a buffer we can receive into.
    //
    char *recvConcatList = CreateMessageStrings(recvmessages, recvcount,
                                                numProcs);
    
    //
    // Calculate the displacement lists.
    //
    int *senddisp = new int[numProcs];
    int *recvdisp = new int[numProcs];
    senddisp[0] = 0;
    recvdisp[0] = 0;
    for (int i = 1 ; i < numProcs ; i++)
    {
        senddisp[i] = senddisp[i-1] + sendcount[i-1];
        recvdisp[i] = recvdisp[i-1] + recvcount[i-1];
    }

    //
    // Do the actual transfer of sample points.   The messages arrays are
    // actually indexes into one big array.  Since MPI expects that big
    // array, give that (which is at location 0).
    //
    MPI_Alltoallv(sendmessages[0], sendcount, senddisp, MPI_CHAR,
                  recvmessages[0], recvcount, recvdisp, MPI_CHAR,
                  VISIT_MPI_COMM);

    delete [] senddisp;
    delete [] recvdisp;

    //
    // We need to return this buffer so the calling function can delete it.
    //
    return recvConcatList;
#else
    return 0;
#endif
}
Ejemplo n.º 24
0
int main(int argc, char **argv)
{
   int *out, *in,j,k;
   int me,tasks,i, errcount=0;
   double start,end,diff,avg_diff_usec;

   MPI_Init(&argc, &argv);
   MPI_Comm_size(MPI_COMM_WORLD,&tasks);
   if(tasks < 2) {
     printf("MUST RUN WITH AT LEAST 2 TASKS\n");
     errcount++;
     MPI_Finalize();
     exit(0);
   }

   MPI_Comm_rank(MPI_COMM_WORLD,&me);

   out=(int *)calloc(tasks, sizeof(int));
   in=(int *)calloc(tasks,sizeof(int));
   for(i=0;i<tasks;i++)  out[i] = me;

   MPI_Barrier(MPI_COMM_WORLD);
   if (!me) {
     start = MPI_Wtime();
   }

   for(i=0;i<ALLTOALL_COUNT;i++)
     MPI_Alltoall(out,1,MPI_INT,in,1,MPI_INT,MPI_COMM_WORLD);

   if (!me) {
     end = MPI_Wtime();
     diff = end - start;
     avg_diff_usec = diff * (1000000/ALLTOALL_COUNT);
     printf("AFTER ALLTOALLS, START TIME = %f, END TIME = %f, DIFF (sec) = %f,\n",start,end,diff);
     printf("\t\tITERS = %d, AVG (usec) = %f, EXPECTED = %d\n",ALLTOALL_COUNT,avg_diff_usec, EXPECTED_AVG_uSEC);
     if (avg_diff_usec < EXPECTED_AVG_uSEC) {
       printf ("Passed\n");
     }
     else if (avg_diff_usec < (2* EXPECTED_AVG_uSEC)) {
       printf ("Acceptable\n");
     }
     else {
       printf ("SLOW\n");
     }
     fflush (stdout);
   }

   MPI_Finalize();
return 0;
}
Ejemplo n.º 25
0
//does global transposition from sendbuffer to recvbuffer of count data (in BYTE). be careful with 2GB limits
QMP_status_t
QMP_comm_alltoall_mpi(QMP_comm_t comm, char* recvbuffer, char* sendbuffer, int count)
{
  QMP_status_t status=QMP_SUCCESS;
  ENTER;

  int err=MPI_Alltoall( (void*)sendbuffer, count, MPI_BYTE,
			(void*)recvbuffer, count, MPI_BYTE,
			comm->mpicomm);
  if(err != MPI_SUCCESS) status = err;

  LEAVE;
  return status;
}
Ejemplo n.º 26
0
// ===========================================================================
// ===========================================================================
void test_mpi_alltoall(int rank, int num_cores) {

  int           i;
  unsigned int  time[10];
  int           *buf_in;
  int           *buf_out;


  buf_in = kt_malloc(512 * 4);
  buf_out = kt_malloc(512 * 4);
  for (i = 0; i < 512 * 1; i++) {
    buf_in[i] = 0;
    buf_out[i] = 0;
  }

  for (i = 0; i < 10; i++) {
    if (!rank) {
      kt_printf("entering %d\r\n", i);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    ar_timer_reset();
    MPI_Alltoall(buf_out, 1, MPI_INT, 
                 buf_in,  1, MPI_INT, 
                 MPI_COMM_WORLD);
    time[i] = ar_timer_get_cycles();
    MPI_Barrier(MPI_COMM_WORLD);
    if (!rank) {
      kt_printf("done %d\r\n", i);
    }
    MPI_Barrier(MPI_COMM_WORLD);
  }

  kt_free(buf_in);
  kt_free(buf_out);

  if (!rank) {
    kt_printf("Alltoall time = %12d\r\n"
              "                %12d\r\n"
              "                %12d\r\n"
              "                %12d\r\n"
              "                %12d\r\n"
              "                %12d\r\n"
              "                %12d\r\n"
              "                %12d\r\n"
              "                %12d\r\n"
              "                %12d cycles\r\n",
              time[0], time[1], time[2], time[3], time[4],
              time[5], time[6], time[7], time[8], time[9]);
  }
}
Ejemplo n.º 27
0
static void p_fill_ineed_ptrs(
  sptensor_t const * const tt,
  idx_t const mode,
  rank_info * const rinfo,
  MPI_Comm const comm)
{
  idx_t const m = mode;
  int size;
  int rank;
  MPI_Comm_size(comm, &size);
  MPI_Comm_rank(comm, &rank);

  rinfo->nlocal2nbr[m] = 0;
  rinfo->local2nbr_ptr[m] = (int *) calloc((size+1),  sizeof(int));
  rinfo->nbr2globs_ptr[m] = (int *) splatt_malloc((size+1) * sizeof(int));

  int * const local2nbr_ptr = rinfo->local2nbr_ptr[m];
  int * const nbr2globs_ptr = rinfo->nbr2globs_ptr[m];
  idx_t const * const mat_ptrs = rinfo->mat_ptrs[m];

  int pdest = 0;
  /* count recvs for each process */
  for(idx_t i=0; i < tt->dims[m]; ++i) {
    /* grab global index */
    idx_t const gi = (tt->indmap[m] == NULL) ? i : tt->indmap[m][i];
    /* move to the next processor if necessary */
    while(gi >= mat_ptrs[pdest+1]) {
      ++pdest;
    }

    assert(pdest < size);
    assert(gi >= mat_ptrs[pdest]);
    assert(gi < mat_ptrs[pdest+1]);

    /* if it is non-local */
    if(pdest != rank) {
      local2nbr_ptr[pdest] += 1;
      rinfo->nlocal2nbr[m] += 1;
    }
  }

  /* communicate local2nbr and receive nbr2globs */
  MPI_Alltoall(local2nbr_ptr, 1, MPI_INT, nbr2globs_ptr, 1, MPI_INT, comm);

  rinfo->nnbr2globs[m] = 0;
  for(int p=0; p < size; ++p) {
    rinfo->nnbr2globs[m] += nbr2globs_ptr[p];
  }
  nbr2globs_ptr[size] = rinfo->nnbr2globs[m];
}
Ejemplo n.º 28
0
/**
  * Opens channel and initializes all fields of the channel_t structure.
  */
void channel_open(channel_t * ch, int direction, int *swap_me)
{
	direction = (direction != 0);	// Clamps direction flag to {0, 1}.
	if(channel_registerTag(ch->tag)) error("channel_open(%s): tag '%d' is used already.", ch->name, ch->tag);

	int *swap_they = (int *) calloc(cpu_total, sizeof(int));	// Allocates second exchange list.
	MPI_Alltoall(swap_me, 1, MPI_INT, swap_they, 1, MPI_INT, MPI_COMM_WORLD);	// Exchanges by invitations.
			
	channel_allocateSide(ch, direction, swap_me);	// Allocates two array of sockets/requests.
	channel_allocateSide(ch, 1 - direction, swap_they);

	free(swap_they);	// Returns memory.
	ch->open = 1;		// Marks success.
}
Ejemplo n.º 29
0
int main(int argc, char **argv)
{
  MPI_Init(&argc, &argv);

  int i, myrank, numranks, groupsize;
  int dims[3] = {0, 0, 0};          
  int temp[3] = {0, 0, 0};          
  int coord[3] = {0, 0, 0};          
  int periods[3] = {1, 1, 1};
  double startTime, stopTime;

  MPI_Comm cartcomm, subcomm;

  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  MPI_Comm_size(MPI_COMM_WORLD, &numranks);

  dims[MP_X] = atoi(argv[1]);
  dims[MP_Y] = atoi(argv[2]);
  dims[MP_Z] = atoi(argv[3]);
  MPI_Dims_create(numranks, 3, dims);
  MPI_Cart_create(MPI_COMM_WORLD, 3, dims, periods, 1, &cartcomm);
  MPI_Cart_get(cartcomm, 3, dims, periods, coord);
  temp[MP_X] = 0; temp[MP_Y] = 1; temp[MP_Z] = 0;
  MPI_Cart_sub(cartcomm, temp, &subcomm);

  MPI_Comm_size(subcomm,&groupsize);
  int perrank = atoi(argv[4]);
  char *sendbuf = (char*)malloc(perrank*groupsize);
  char *recvbuf = (char*)malloc(perrank*groupsize);

  MPI_Barrier(cartcomm);
  MPI_Pcontrol(1);
  startTime = MPI_Wtime();

  for (i=0; i<MAX_ITER; i++) {
    MPI_Alltoall(sendbuf, perrank, MPI_CHAR, recvbuf, perrank, MPI_CHAR, subcomm);
  }

  MPI_Barrier(cartcomm);
  stopTime = MPI_Wtime();
  MPI_Pcontrol(0);

  if(myrank == 0) {
    printf("Completed %d iterations for subcom size %d, perrank %d\n", i, groupsize, perrank);
    printf("Time elapsed: %f\n", stopTime - startTime);
  }

  MPI_Finalize();
  return 0;
}
Ejemplo n.º 30
0
int main(int argc, char *argv[]) {
  int myid, size;
  int *each_vector, data_id, send_count;
  int *recv_vector, recv_count;
  //char output_msg[16];
  int i, c;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &myid);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  // print size by root
  //if (myid == 0) {
  //  printf("Process Count: %d\n", size);
  //}

  for (c = 0; c < 10; c++) {
    send_count = size * (c+1);
    recv_count = size * (c+1);

    // init vector on each process
    each_vector = (int*)malloc(send_count * sizeof(int));
    data_id = myid * 1000;
    for (i = 0; i < send_count; i++) {
      each_vector[i] = data_id + i;
    }
    // print vector
    //snprintf(output_msg, 16, "Rank[%d]: ", myid);
    //print_array(each_vector, send_count, output_msg);

    // init recv vector
    recv_vector = (int*)malloc(recv_count * sizeof(int));
    for (i = 0; i < recv_count; i++) {
      recv_vector[i] = -1;
    }

    // do alltoall
    MPI_Alltoall(each_vector, c+1, MPI_INT,
		 recv_vector, c+1, MPI_INT, MPI_COMM_WORLD);

    //print_array(recv_vector, recv_count, output_msg);

    free(each_vector);
    free(recv_vector);
  }
  
  MPI_Finalize();
  return 0;
}