Beispiel #1
0
int main( int argc, char **argv )
{
    int      *sendbuf, *recvcounts;
    int      block_size;
    int      *recvbuf;
    int      size, rank, i;
    MPI_Comm comm;
    MPI_Op left_op, right_op, nc_sum_op;

    MTest_Init( &argc, &argv );
    comm = MPI_COMM_WORLD;

    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );

    MPI_Op_create(&left, 0/*non-commutative*/, &left_op);
    MPI_Op_create(&right, 0/*non-commutative*/, &right_op);
    MPI_Op_create(&nc_sum, 0/*non-commutative*/, &nc_sum_op);

    for (block_size = 1; block_size < MAX_BLOCK_SIZE; block_size *= 2) {
        sendbuf = (int *) malloc( block_size * size * sizeof(int) );
        recvbuf = malloc( block_size * sizeof(int) );

        for (i=0; i<(size*block_size); i++)
            sendbuf[i] = rank + i;
        for (i=0; i<block_size; i++)
            recvbuf[i] = 0xdeadbeef;
        recvcounts = (int *)malloc( size * sizeof(int) );
        for (i=0; i<size; i++)
            recvcounts[i] = block_size;

        MPI_Reduce_scatter( sendbuf, recvbuf, recvcounts, MPI_INT, left_op, comm );
        for (i = 0; i < block_size; ++i)
            if (recvbuf[i] != (rank * block_size + i)) ++err;

        MPI_Reduce_scatter( sendbuf, recvbuf, recvcounts, MPI_INT, right_op, comm );
        for (i = 0; i < block_size; ++i)
            if (recvbuf[i] != ((size - 1) + (rank * block_size) + i)) ++err;

        MPI_Reduce_scatter( sendbuf, recvbuf, recvcounts, MPI_INT, nc_sum_op, comm );
        for (i = 0; i < block_size; ++i) {
            int x = rank * block_size + i;
            if (recvbuf[i] != (size*x + (size-1)*size/2)) ++err;
        }

        free(recvbuf);
        free(sendbuf);
    }

    MPI_Op_free(&left_op);
    MPI_Op_free(&right_op);
    MPI_Op_free(&nc_sum_op);

    MTest_Finalize( err );
    MPI_Finalize( );

    return err;
}
Beispiel #2
0
int main(int argc, char **argv)
{
	MPI_Init(&argc, &argv);
	int rank, size;
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	int *nsizes = (int*) malloc(size * size);
	int total = 0;
	for (int i = 0; i < size; i++) {
		// ERROR HERE, different sizes for each process
		nsizes[i] = (i % 3 + 1) + rank; 
		total += nsizes[i];
	}
	int *data = (int*) malloc(sizeof(int) * total);

	int *out = (int*) malloc(sizeof(int) * nsizes[rank]);
	for (int i = 0; i < total; i++) {
		data[i] = (rank + 1) * 100 + i;
	}
	MPI_Reduce_scatter(data, out, nsizes, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
	free(data);
	for (int i = 0; i < nsizes[rank]; i++) {
		printf("%i %i\n", rank, out[i]);
	}
	free(out);
	MPI_Finalize();
	return 0;
}
void SparseMsg::setPattern(UInt num, const UInt *proc) {

  UInt csize = Par::Size();

  // Set dest proc
  nsend = num;

  std::vector<int> sendto(nproc, 0);
  std::vector<int> counts(nproc, 1);
  for (UInt i = 0; i < num; i++) {
    ThrowRequire(proc[i] < csize);
    sendto[proc[i]] = 1;
    if (proc[i] == (UInt) rank) {
      sendself = true;
      self_idx = i;
    }
  }

  !Par::Serial() ? MPI_Reduce_scatter(&sendto[0], &num_incoming, &counts[0], MPI_INT, MPI_SUM, comm)
    : num_incoming = sendto[0];
//std::cout << "Proc:" << rank << "to receive " << num_incoming << " messages" << std::endl;

  // Set up send buffers (so we don't have save the proc ids
  if (nsend > 0) outBuffers.resize(nsend); else outBuffers.clear();
  for (UInt i = 0; i < nsend; i++) {
    outBuffers[i].proc = proc[i];
  }
}
void mpi_reduce_scatter_f(char *sendbuf, char *recvbuf, 
			  MPI_Fint *recvcounts, MPI_Fint *datatype,
			  MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr)
{
    MPI_Comm c_comm;
    MPI_Datatype c_type;
    MPI_Op c_op;
    int size;
    OMPI_ARRAY_NAME_DECL(recvcounts);

    c_comm = MPI_Comm_f2c(*comm);
    c_type = MPI_Type_f2c(*datatype);
    c_op = MPI_Op_f2c(*op);

    MPI_Comm_size(c_comm, &size);
    OMPI_ARRAY_FINT_2_INT(recvcounts, size);

    sendbuf = (char *) OMPI_F2C_IN_PLACE(sendbuf);
    sendbuf = (char *) OMPI_F2C_BOTTOM(sendbuf);
    recvbuf = (char *) OMPI_F2C_BOTTOM(recvbuf);
    
    *ierr = OMPI_INT_2_FINT(MPI_Reduce_scatter(sendbuf, recvbuf,
				       OMPI_ARRAY_NAME_CONVERT(recvcounts),
				       c_type, c_op, c_comm));
}
Beispiel #5
0
int main(int argc,char *argv[])
{
    int rank, num_of_processes;
    int *rdata,*sdata,*rcounts,*expected_data;
    int i,j;

    MPI_Comm comm = MPI_COMM_WORLD;

    MPI_Init(&argc,&argv);
    MPI_Comm_size( comm, &num_of_processes);
    MPI_Comm_rank( comm, &rank);
    sdata = malloc(sizeof(int)*num_of_processes); 
    rcounts = malloc(sizeof(int)*num_of_processes); 
    // malloc more than necessary .. its a test
    rdata = malloc(sizeof(int)*num_of_processes);
    expected_data = malloc(sizeof(int)*num_of_processes); 

    /**
     * Building an send arrays like the following.
     * rank_0 = 0 1 2
     * rank_1 = 1 2 3
     * rank_2 = 2 3 4
     *
     * After reduce the receive buffers should each get equal number of items
     * rank_0 = 3 
     * rank_1 = 6 
     * rank_2 = 9
     *
     * 
     **/

    for(i=0; i<num_of_processes; i++) {
        sdata[i] = rank+i;
        expected_data[i] = 0;
        rcounts[i] = 1;
    }

    if(rank == 0) {
        printf("Checking mpi_reduce_scatter... (if you see no output then you are good)\n");
    }

    // need to get expected values!
    for(i=0; i<num_of_processes; i++) {
        for(j=0; j<rcounts[i]; j++) {
            expected_data[j] += i+rank;
        }
    }

    MPI_Reduce_scatter( sdata, rdata, rcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD );

    for(j=0; j<rcounts[rank]; j++) {
        if(rdata[j] != expected_data[j]) {
            printf("ERROR: Received at rank %d rdata[%d] = %d expected %d\n", rank, j, rdata[j], expected_data[j]);
        } 
    }
    MPI_Finalize();
    return 0;
}
int PLA_MPI_Reduce_scatter(	/* aka Distributed Reduce */
	void *		sendbuf, 
	void *		recvbuf, 
	int *		rcounts,
	MPI_Datatype 	datatype,
	MPI_Op 		op, 
	MPI_Comm 	comm)
{
    return (MPI_Reduce_scatter ( sendbuf, recvbuf, rcounts, datatype, op, comm ));
}
Beispiel #7
0
// MPI_Allreduce with Reduce_scatter and Allgather
inline void execute_GL_Allreduce_as_ReducescatterAllgather(collective_params_t* params) {

    MPI_Reduce_scatter(params->sbuf, params->tmp_buf, params->counts_array,
            params->datatype, params->op, MPI_COMM_WORLD);

    MPI_Allgather(params->tmp_buf, params->count, params->datatype,
            params->rbuf, params->count, params->datatype,
            MPI_COMM_WORLD);

}
Beispiel #8
0
//------------------------------------------------------------------------
int mirrorProcs(MPI_Comm comm, std::vector<int>& toProcs, std::vector<int>& fromProcs)
{
  fromProcs.resize(0);
#ifdef FEI_SER
  fromProcs.push_back(0);
  return(0);
#else
  int num_procs = fei::numProcs(comm);
  std::vector<int> tmpIntData(num_procs*3, 0);

  int* buf = &tmpIntData[0];
  int* recvbuf = buf+num_procs;

  for(unsigned i=0; i<toProcs.size(); ++i) {
    buf[toProcs[i]] = 1;
  }

  for(int ii=2*num_procs; ii<3*num_procs; ++ii) {
    buf[ii] = 1;
  }

  CHK_MPI( MPI_Reduce_scatter(buf, &(buf[num_procs]), &(buf[2*num_procs]),
                              MPI_INT, MPI_SUM, comm) );

  int numRecvProcs = buf[num_procs];

  int tag = 11116;
  std::vector<MPI_Request> mpiReqs(numRecvProcs);

  int offset = 0;
  for(int ii=0; ii<numRecvProcs; ++ii) {
    CHK_MPI( MPI_Irecv(&(recvbuf[ii]), 1, MPI_INT, MPI_ANY_SOURCE, tag,
                       comm, &(mpiReqs[offset++])) );
  }

  for(unsigned i=0; i<toProcs.size(); ++i) {
    CHK_MPI( MPI_Send(&(toProcs[i]), 1, MPI_INT, toProcs[i], tag, comm) );
  }

  MPI_Status status;
  for(int ii=0; ii<numRecvProcs; ++ii) {
    int index;
    MPI_Waitany(numRecvProcs, &mpiReqs[0], &index, &status);
    fromProcs.push_back(status.MPI_SOURCE);
  }

  std::sort(fromProcs.begin(), fromProcs.end());

  return(0);
#endif
}
Beispiel #9
0
void compute_reduce_scatter(ATYPE *matrix, ATYPE *vector, ATYPE *result, int local_rank, int proc_size , long N, long partition) {
  ATYPE *temp_result = init_vector(N,0);
  int recvcounts[proc_size];


  for( int i=0; i < proc_size; i++) {
    recvcounts[i] = N;
  }

  for ( int i = 0 ; i < partition ; ++i ){
    for ( int j = 0 ; j < N ; ++j ){
      temp_result[j] = temp_result[j] + matrix[i*N+j] * vector[i];
    }
  }

  MPI_Reduce_scatter(temp_result, result, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

}
int main( int argc, char **argv )
{
    int      err = 0, toterr;
    int      *sendbuf, *recvbuf, *recvcounts;
    int      size, rank, i, sumval;
    MPI_Comm comm;


    MPI_Init( &argc, &argv );
    comm = MPI_COMM_WORLD;

    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );
    sendbuf = (int *) malloc( size * sizeof(int) );
    for (i=0; i<size; i++) 
  sendbuf[i] = rank + i;
    recvcounts = (int *)malloc( size * sizeof(int) );
    recvbuf = (int *)malloc( size * sizeof(int) );
    for (i=0; i<size; i++) 
    recvcounts[i] = 1;
    MPI_Reduce_scatter( sendbuf, recvbuf, recvcounts, MPI_INT, MPI_SUM, comm );
    sumval = size * rank + ((size - 1) * size)/2;
/* recvbuf should be size * (rank + i) */
    if (recvbuf[0] != sumval) {
  err++;
  fprintf( stdout, "Did not get expected value for reduce scatter\n" );
  fprintf( stdout, "[%d] Got %d expected %d\n", rank, recvbuf[0], sumval );
    }

    MPI_Allreduce( &err, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if (rank == 0 && toterr == 0) {
  printf( " No Errors\n" );
    }
    free(sendbuf);
    free(recvcounts);
    free(recvbuf);
    
    MPI_Finalize( );

    return toterr;
}
int main(int argc, char **argv) {
    MPI_Init(&argc, &argv);

    int rank, num_procs;
    MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    if(num_procs < 4) {
      MPI_Finalize();
      return 1;
    }

    srand(rank);

    int *buffer = (int *)malloc(num_procs * sizeof(int));
    int i;
    printf("process %d:\n", rank);
    for (i = 0; i < num_procs; i++) {
      buffer[i] = rand() % 10;
      printf("%d ", buffer[i]);
    }
    printf("\n");
    
    int local_sum = 0;
    for (i = 0; i < num_procs; i++) {
      local_sum += buffer[i];
    }

    printf("local sum of process %d is %d\n", rank, local_sum);

    int *recv_counts = (int *)malloc(num_procs * sizeof(int));
    for (i = 0; i < num_procs; i++) {
        recv_counts[i] = 1;
    }

    int recv_buffer = 0;
    MPI_Reduce_scatter(buffer, &recv_buffer, recv_counts, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
    
    printf("recv buffer of process %d is %d\n", rank, recv_buffer);
    
    MPI_Finalize();
}
Beispiel #12
0
/*
 * Class:     mpi_Intracomm
 * Method:    Reduce_scatter
 * Signature:
 (Ljava/lang/Object;ILjava/lang/Object;I[ILmpi/Datatype;Lmpi/Op;)V
*/
JNIEXPORT void JNICALL Java_mpi_Intracomm_reduce_1scatter(JNIEnv *env,
                                                          jobject jthis,
                                                          jobject sendbuf, jint sendoffset,
                                                          jobject recvbuf, jint recvoffset,
                                                          jintArray recvcount,
                                                          jobject type, jobject op)
{
    jint *rcount;
    jboolean isCopy ;

    MPI_Comm mpi_comm =
        (MPI_Comm)((*env)->GetLongField(env,jthis,ompi_java.CommhandleID)) ;

    MPI_Datatype mpi_type =
        (MPI_Datatype)((*env)->GetLongField(env,type,ompi_java.DatatypehandleID)) ;

    int baseType = (*env)->GetIntField(env, type, ompi_java.DatatypebaseTypeID) ;

    void *sendptr, *recvptr ;
    void *sbufbase, *rbufbase ;

    ompi_java_clearFreeList(env) ;

    rcount=(*env)->GetIntArrayElements(env,recvcount,&isCopy);

    recvptr = ompi_java_getBufPtr(&rbufbase, env, recvbuf, baseType, recvoffset) ;
    sendptr = ompi_java_getBufPtr(&sbufbase, env, sendbuf, baseType, sendoffset) ;

    MPI_Reduce_scatter(sendptr, recvptr, (int*) rcount, mpi_type,
                       (MPI_Op)((*env)->GetLongField(env,op,ompi_java.OphandleID)),
                       mpi_comm) ;

    ompi_java_releaseBufPtr(env, sendbuf, sbufbase, baseType) ;
    ompi_java_releaseBufPtr(env, recvbuf, rbufbase, baseType) ;

    (*env)->ReleaseIntArrayElements(env,recvcount,rcount,JNI_ABORT);
}
Beispiel #13
0
int ZMPI_Reduce_scatter_block(const void *sendbuf, void *recvbuf, int recvcount, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) /* zmpi_func ZMPI_Reduce_scatter_block */
{
#if MPI_VERSION >= 2 && MPI_SUBVERSION >= 2

  return MPI_Reduce_scatter_block((void *) sendbuf, recvbuf, recvcount, datatype,op, comm);

#else

  int comm_size, *recvcounts, i, exit_code;


  MPI_Comm_size(comm, &comm_size);

  recvcounts = z_alloc(comm_size, sizeof(int));

  for (i = 0; i < comm_size; ++i) recvcounts[i] = recvcount;

  exit_code = MPI_Reduce_scatter((void *) sendbuf, recvbuf, recvcounts, datatype, op, comm);

  z_free(recvcounts);

  return exit_code;
#endif
}
Beispiel #14
0
void mpi_reduce_scatter_(void* sendbuf, void* recvbuf, int* recvcounts, int* datatype,
                     int* op, int* comm, int* ierr) {
  *ierr = MPI_Reduce_scatter(sendbuf, recvbuf, recvcounts, get_datatype(*datatype),
                        get_op(*op), get_comm(*comm));
}
Beispiel #15
0
int main(int argc, char *argv[])
{
    int provided;
    int size, rank;
    int rc;

    MPI_Init_thread( &argc, &argv, MPI_THREAD_SINGLE, &provided );
    //assert( provided == MPI_THREAD_SINGLE );

    MPI_Comm_size( MPI_COMM_WORLD, &size );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );

    //printf( "Hello from %d of %d processors\n", rank, size );
    //fflush( stdout );

    int count = ( argc > 1 ? atoi(argv[1]) : 1 );

    MPI_Barrier( MPI_COMM_WORLD );

    /* reduce_scatter bandwidth test */
    if ( rank == 0 ) printf( "begin reduce_scatter bandwidth test\n" );
    {
        int i;
        double t0, t1;
        int * snd_buffer;
        int * rcv_buffer;
        int * counts;

        snd_buffer = malloc( size * count * sizeof(int) );
        assert( snd_buffer != NULL );

        rcv_buffer = malloc( count * sizeof(int) );
        assert( rcv_buffer != NULL );

        counts = malloc( size * sizeof(int) );
        assert( counts != NULL );

        for ( i = 0 ; i < size ; i++ ) snd_buffer[i] = 0;
        for ( i = 0 ; i < count ; i++ ) snd_buffer[ rank * count + i] = rank;
        for ( i = 0 ; i < count ; i++ ) rcv_buffer[i] = 0;
        for ( i = 0 ; i < size ; i++) counts[i] = count;

        MPI_Barrier( MPI_COMM_WORLD );

        t0 = MPI_Wtime();
        rc = MPI_Reduce_scatter( snd_buffer, rcv_buffer, counts, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
        t1 = MPI_Wtime();
        assert ( rc == MPI_SUCCESS );

        for ( i = 0 ; i < count ; i++ ) assert( rcv_buffer[i] == rank );

        if ( rank == 0 ) printf( "MPI_Reduce_scatter(MPI_SUM): %u bytes transferred in %lf seconds (%lf MB/s)\n", 
                                 count * (int) sizeof(int), t1 - t0, 1e-6 * count * (int) sizeof(int) / (t1-t0) );

        free(snd_buffer);
        free(rcv_buffer);
        free(counts);
    }

    if ( rank == 0 ) printf( "done with all tests\n" );
    fflush( stdout );

    MPI_Barrier( MPI_COMM_WORLD );

    MPI_Finalize();

    return 0;
}
Beispiel #16
0
void IMB_reduce_scatter(struct comm_info* c_info, int size, struct iter_schedule* ITERATIONS,
                        MODES RUN_MODE, double* time)
/*

                      
                      MPI-1 benchmark kernel
                      Benchmarks MPI_Reduce_scatter
                      


Input variables: 

-c_info               (type struct comm_info*)                      
                      Collection of all base data for MPI;
                      see [1] for more information
                      

-size                 (type int)                      
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)                      
                      (only MPI-2 case: see [1])


Output variables: 

-time                 (type double*)                      
                      Timing result per sample


*/
{
    double t1, t2;
    int    i;
    size_t pos1,pos2;
#ifdef CHECK
    size_t pos;
    int    Locsize;
#endif

    Type_Size s_size;
  
#ifdef CHECK
    defect=0.;
#endif
    ierr = 0;

    /*  GET SIZE OF DATA TYPE */  
    MPI_Type_size(c_info->red_data_type,&s_size);

    for (i=0;i<c_info->num_procs ;i++)
    {
	if( size > 0)
	{
	    IMB_get_rank_portion(i, c_info->num_procs, size, s_size, 
				 &pos1, &pos2);
	    c_info->reccnt[i] = (pos2-pos1+1)/s_size;
    #ifdef CHECK
	    if( i==c_info->rank ) {pos=pos1; Locsize= s_size*c_info->reccnt[i];}
    #endif
	} else
	{
	    c_info->reccnt[i] = 0;
    #ifdef CHECK
	    if( i==c_info->rank ) {pos=0; Locsize= 0;}
    #endif
	}
    }

    if(c_info->rank!=-1)
    {
	for(i=0; i<N_BARR; i++) MPI_Barrier(c_info->communicator);

	t1 = MPI_Wtime();
	for(i=0;i< ITERATIONS->n_sample;i++)
	{
	    ierr = MPI_Reduce_scatter ((char*)c_info->s_buffer+i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs,
				       (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
				       c_info->reccnt,
				       c_info->red_data_type,c_info->op_type,
				       c_info->communicator);
	    MPI_ERRHAND(ierr);

	    CHK_DIFF("Reduce_scatter",c_info, (char*)c_info->r_buffer+i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs,
		     pos,
		     Locsize, size, asize,
		     put, 0, ITERATIONS->n_sample, i,
		     -1, &defect);

	}
	t2 = MPI_Wtime();
	*time=(t2 - t1)/ITERATIONS->n_sample;
    } else /*if(c_info->rank==-1)*/
    { 
	*time = 0.; 
    }
}
void MpiComm<Ordinal>::reduceAllAndScatter(
    const ValueTypeReductionOp<Ordinal,char> &reductOp
    ,const Ordinal sendBytes, const char sendBuffer[]
    ,const Ordinal recvCounts[], char myGlobalReducts[]
) const
{

    (void)sendBytes; // Ignore if not in debug mode

    TEUCHOS_COMM_TIME_MONITOR(
        "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::reduceAllAndScatter(...)"
    );

#ifdef TEUCHOS_DEBUG
    Ordinal sumRecvBytes = 0;
    for( Ordinal i = 0; i < size_; ++i ) {
        sumRecvBytes += recvCounts[i];
    }
    TEST_FOR_EXCEPT(!(sumRecvBytes==sendBytes));
#endif // TEUCHOS_DEBUG

#ifdef TEUCHOS_MPI_COMM_DUMP
    if(show_dump) {
        dumpBuffer<Ordinal,char>(
            "Teuchos::MpiComm<Ordinal>::reduceAllAndScatter(...)",
            "sendBuffer", sendBytes, sendBuffer );
        dumpBuffer<Ordinal,Ordinal>(
            "Teuchos::MpiComm<Ordinal>::reduceAllAndScatter(...)",
            "recvCounts", as<Ordinal>(size_), recvCounts );
        dumpBuffer<Ordinal,char>(
            "Teuchos::MpiComm<Ordinal>::reduceAllAndScatter(...)",
            "myGlobalReducts", as<char>(recvCounts[rank_]), myGlobalReducts );
    }
#endif // TEUCHOS_MPI_COMM_DUMP

    // Create a new recvCount[] if Ordinal!=int
    WorkspaceStore* wss = get_default_workspace_store().get();
    const bool Ordinal_is_int = typeid(int)==typeid(Ordinal);
    Workspace<int> ws_int_recvCounts(wss,Ordinal_is_int?0:size_);
    const int *int_recvCounts = 0;
    if(Ordinal_is_int) {
        int_recvCounts = reinterpret_cast<const int*>(recvCounts);
        // Note: We must do an reinterpet cast since this must
        // compile even if it is not executed.  I could implement
        // code that would not need to do this using template
        // conditionals but I don't want to bother.
    }
    else {
        std::copy(recvCounts, recvCounts+size_, &ws_int_recvCounts[0]);
        int_recvCounts = &ws_int_recvCounts[0];
    }

    // Perform the operation
    MpiReductionOpSetter op(mpiReductionOp(rcp(&reductOp, false)));
    MPI_Reduce_scatter(
        const_cast<char*>(sendBuffer), myGlobalReducts,
        const_cast<int*>(int_recvCounts),
        MPI_CHAR,
        op.mpi_op(),
        *rawMpiComm_
    );

}
Beispiel #18
0
int main(int argc, char* argv[])
{
    MPI_Init(&argc, &argv);

    int rank, size;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    if (size!=4) {
        if (rank==0) printf("Use 4 processes\n");
        MPI_Finalize();
        return size;
    }

    {
        if (rank==0) printf("MPI_Reduce_scatter(sendbuf, recvbuf...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int sendbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvbuf[1] = {0};
        int recvcounts[4] = {1,1,1,1};
        MPI_Reduce_scatter(sendbuf, recvbuf, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
        printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n",
                rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce_scatter(MPI_IN_PLACE, recvbuf...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int recvbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvcounts[4] = {1,1,1,1};
        MPI_Reduce_scatter(MPI_IN_PLACE, recvbuf, recvcounts, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
        printf("%d: recvbuf = {%d,%d,%d,%d} \n",
                rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce_scatter_block(sendbuf, recvbuf...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int sendbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvbuf[1] = {0};
        int recvcount = 1;
        MPI_Reduce_scatter_block(sendbuf, recvbuf, recvcount, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
        printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n",
                rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce_scatter_block(MPI_IN_PLACE, recvbuf...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int recvbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvcount = 1;
        MPI_Reduce_scatter_block(MPI_IN_PLACE, recvbuf, recvcount, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
        printf("%d: recvbuf = {%d,%d,%d,%d} \n",
                rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce(sendbuf, tempbuf... + MPI_Scatter(tempbuf, recvcount...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int sendbuf[4] = {junk, junk*2, junk*3, junk*4};
        int tempbuf[4] = {0,0,0,0};
        int recvbuf[1] = {0};
        int recvcount = 1;
        MPI_Reduce(sendbuf, tempbuf, 4*recvcount, MPI_INT, MPI_SUM, 0 /* root */, MPI_COMM_WORLD);
        MPI_Scatter(tempbuf, recvcount, MPI_INT, recvbuf, recvcount, MPI_INT, 0 /* root */, MPI_COMM_WORLD);
        printf("%d: sendbuf = {%d,%d,%d,%d}, recvbuf = {%d} \n",
                rank, sendbuf[0], sendbuf[1], sendbuf[2], sendbuf[3], recvbuf[0]);
    }

    fflush(stdout);
    usleep(1000);
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank==0) printf("===================\n");

    {
        if (rank==0) printf("MPI_Reduce(MPI_IN_PLACE, recvbuf... + MPI_Scatter(MPI_IN_PLACE, recvcount...\n");
        fflush(stdout);
        MPI_Barrier(MPI_COMM_WORLD);

        int junk = rank+1;
        int recvbuf[4] = {junk, junk*2, junk*3, junk*4};
        int recvcount = 1;
        MPI_Reduce(rank==0 ? MPI_IN_PLACE : recvbuf, rank==0 ? recvbuf : NULL,
                   4*recvcount, MPI_INT, MPI_SUM, 0 /* root */, MPI_COMM_WORLD);
        MPI_Scatter(recvbuf, recvcount, MPI_INT, rank==0 ? MPI_IN_PLACE : recvbuf, recvcount, MPI_INT, 0 /* root */, MPI_COMM_WORLD);
        printf("%d: recvbuf = {%d,%d,%d,%d} \n",
                rank, recvbuf[0], recvbuf[1], recvbuf[2], recvbuf[3]);
    }

    MPI_Finalize();

    return 0;
}
Beispiel #19
0
FC_FUNC(mpi_reduce_scatter, MPI_REDUCE_SCATTER)
                (void * sendbuf, void * recvbuf, int *recvcounts,
                 int *datatype, int *op, int *comm, int *ierr)
{
  *ierr = MPI_Reduce_scatter(sendbuf, recvbuf, recvcounts, *datatype, *op, *comm);
}
Beispiel #20
0
int main(int argc, char **argv)
{
    int errs = 0;
    int i;
    int rank, size;
    int *sbuf = NULL;
    int *rbuf = NULL;
    int *scounts = NULL;
    int *rcounts = NULL;
    int *sdispls = NULL;
    int *rdispls = NULL;
    MPI_Datatype *types = NULL;
    MPI_Comm comm;

    /* intentionally not using MTest_Init/MTest_Finalize in order to make it
     * easy to take this test and use it as an NBC sanity test outside of the
     * MPICH test suite */
    MPI_Init(&argc, &argv);

    comm = MPI_COMM_WORLD;

    MPI_Comm_size(comm, &size);
    MPI_Comm_rank(comm, &rank);

    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);

    /* enough space for every process to contribute at least NUM_INTS ints to any
     * collective operation */
    sbuf = malloc(NUM_INTS * size * sizeof(int));
    my_assert(sbuf);
    rbuf = malloc(NUM_INTS * size * sizeof(int));
    my_assert(rbuf);
    scounts = malloc(size * sizeof(int));
    my_assert(scounts);
    rcounts = malloc(size * sizeof(int));
    my_assert(rcounts);
    sdispls = malloc(size * sizeof(int));
    my_assert(sdispls);
    rdispls = malloc(size * sizeof(int));
    my_assert(rdispls);
    types = malloc(size * sizeof(MPI_Datatype));
    my_assert(types);

    for (i = 0; i < size; ++i) {
        sbuf[2 * i] = i;
        sbuf[2 * i + 1] = i;
        rbuf[2 * i] = i;
        rbuf[2 * i + 1] = i;
        scounts[i] = NUM_INTS;
        rcounts[i] = NUM_INTS;
        sdispls[i] = i * NUM_INTS;
        rdispls[i] = i * NUM_INTS;
        types[i] = MPI_INT;
    }

    if (rank == 0 && MPI_SUCCESS ==
        MPI_Gather(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm))
        errs++;

    if (rank == 0 && MPI_SUCCESS ==
        MPI_Gatherv(sbuf, NUM_INTS, MPI_INT, sbuf, rcounts, rdispls, MPI_INT, 0, comm))
        errs++;

    if (rank == 0 && MPI_SUCCESS ==
        MPI_Scatter(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm))
        errs++;

    if (rank == 0 && MPI_SUCCESS ==
        MPI_Scatterv(sbuf, scounts, sdispls, MPI_INT, sbuf, NUM_INTS, MPI_INT, 0, comm))
        errs++;

    if (MPI_SUCCESS == MPI_Allgather(&sbuf[rank], 1, MPI_INT, sbuf, 1, MPI_INT, comm))
        errs++;

    if (MPI_SUCCESS ==
        MPI_Allgatherv(&sbuf[rank * rcounts[rank]], rcounts[rank], MPI_INT, sbuf, rcounts, rdispls,
                       MPI_INT, comm))
        errs++;

    if (MPI_SUCCESS == MPI_Alltoall(sbuf, NUM_INTS, MPI_INT, sbuf, NUM_INTS, MPI_INT, comm))
        errs++;

    if (MPI_SUCCESS ==
        MPI_Alltoallv(sbuf, scounts, sdispls, MPI_INT, sbuf, scounts, sdispls, MPI_INT, comm))
        errs++;

    if (MPI_SUCCESS ==
        MPI_Alltoallw(sbuf, scounts, sdispls, types, sbuf, scounts, sdispls, types, comm))
        errs++;

    if (rank == 0 && MPI_SUCCESS == MPI_Reduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm))
        errs++;

    if (MPI_SUCCESS == MPI_Allreduce(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm))
        errs++;

    if (MPI_SUCCESS == MPI_Reduce_scatter(sbuf, sbuf, rcounts, MPI_INT, MPI_SUM, comm))
        errs++;

    if (MPI_SUCCESS == MPI_Reduce_scatter_block(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm))
        errs++;

    if (MPI_SUCCESS == MPI_Scan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm))
        errs++;

    if (MPI_SUCCESS == MPI_Exscan(sbuf, sbuf, NUM_INTS, MPI_INT, MPI_SUM, comm))
        errs++;

    if (sbuf)
        free(sbuf);
    if (rbuf)
        free(rbuf);
    if (scounts)
        free(scounts);
    if (rcounts)
        free(rcounts);
    if (sdispls)
        free(sdispls);
    if (rdispls)
        free(rdispls);
    if (types)
        free(types);

    if (rank == 0) {
        if (errs)
            fprintf(stderr, "Found %d errors\n", errs);
        else
            printf(" No errors\n");
    }
    MPI_Finalize();
    return 0;
}
Beispiel #21
0
Datei: MPI-api.c Projekt: 8l/rose
void declareBindings (void)
{
  /* === Point-to-point === */
  void* buf;
  int count;
  MPI_Datatype datatype;
  int dest;
  int tag;
  MPI_Comm comm;
  MPI_Send (buf, count, datatype, dest, tag, comm); // L12
  int source;
  MPI_Status status;
  MPI_Recv (buf, count, datatype, source, tag, comm, &status); // L15
  MPI_Get_count (&status, datatype, &count);
  MPI_Bsend (buf, count, datatype, dest, tag, comm);
  MPI_Ssend (buf, count, datatype, dest, tag, comm);
  MPI_Rsend (buf, count, datatype, dest, tag, comm);
  void* buffer;
  int size;
  MPI_Buffer_attach (buffer, size); // L22
  MPI_Buffer_detach (buffer, &size);
  MPI_Request request;
  MPI_Isend (buf, count, datatype, dest, tag, comm, &request); // L25
  MPI_Ibsend (buf, count, datatype, dest, tag, comm, &request);
  MPI_Issend (buf, count, datatype, dest, tag, comm, &request);
  MPI_Irsend (buf, count, datatype, dest, tag, comm, &request);
  MPI_Irecv (buf, count, datatype, source, tag, comm, &request);
  MPI_Wait (&request, &status);
  int flag;
  MPI_Test (&request, &flag, &status); // L32
  MPI_Request_free (&request);
  MPI_Request* array_of_requests;
  int index;
  MPI_Waitany (count, array_of_requests, &index, &status); // L36
  MPI_Testany (count, array_of_requests, &index, &flag, &status);
  MPI_Status* array_of_statuses;
  MPI_Waitall (count, array_of_requests, array_of_statuses); // L39
  MPI_Testall (count, array_of_requests, &flag, array_of_statuses);
  int incount;
  int outcount;
  int* array_of_indices;
  MPI_Waitsome (incount, array_of_requests, &outcount, array_of_indices,
		array_of_statuses); // L44--45
  MPI_Testsome (incount, array_of_requests, &outcount, array_of_indices,
		array_of_statuses); // L46--47
  MPI_Iprobe (source, tag, comm, &flag, &status); // L48
  MPI_Probe (source, tag, comm, &status);
  MPI_Cancel (&request);
  MPI_Test_cancelled (&status, &flag);
  MPI_Send_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Bsend_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Ssend_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Rsend_init (buf, count, datatype, dest, tag, comm, &request);
  MPI_Recv_init (buf, count, datatype, source, tag, comm, &request);
  MPI_Start (&request);
  MPI_Startall (count, array_of_requests);
  void* sendbuf;
  int sendcount;
  MPI_Datatype sendtype;
  int sendtag;
  void* recvbuf;
  int recvcount;
  MPI_Datatype recvtype;
  MPI_Datatype recvtag;
  MPI_Sendrecv (sendbuf, sendcount, sendtype, dest, sendtag,
		recvbuf, recvcount, recvtype, source, recvtag,
		comm, &status); // L67--69
  MPI_Sendrecv_replace (buf, count, datatype, dest, sendtag, source, recvtag,
			comm, &status); // L70--71
  MPI_Datatype oldtype;
  MPI_Datatype newtype;
  MPI_Type_contiguous (count, oldtype, &newtype); // L74
  int blocklength;
  {
    int stride;
    MPI_Type_vector (count, blocklength, stride, oldtype, &newtype); // L78
  }
  {
    MPI_Aint stride;
    MPI_Type_hvector (count, blocklength, stride, oldtype, &newtype); // L82
  }
  int* array_of_blocklengths;
  {
    int* array_of_displacements;
    MPI_Type_indexed (count, array_of_blocklengths, array_of_displacements,
		      oldtype, &newtype); // L87--88
  }
  {
    MPI_Aint* array_of_displacements;
    MPI_Type_hindexed (count, array_of_blocklengths, array_of_displacements,
                       oldtype, &newtype); // L92--93
    MPI_Datatype* array_of_types;
    MPI_Type_struct (count, array_of_blocklengths, array_of_displacements,
                     array_of_types, &newtype); // L95--96
  }
  void* location;
  MPI_Aint address;
  MPI_Address (location, &address); // L100
  MPI_Aint extent;
  MPI_Type_extent (datatype, &extent); // L102
  MPI_Type_size (datatype, &size);
  MPI_Aint displacement;
  MPI_Type_lb (datatype, &displacement); // L105
  MPI_Type_ub (datatype, &displacement);
  MPI_Type_commit (&datatype);
  MPI_Type_free (&datatype);
  MPI_Get_elements (&status, datatype, &count);
  void* inbuf;
  void* outbuf;
  int outsize;
  int position;
  MPI_Pack (inbuf, incount, datatype, outbuf, outsize, &position, comm); // L114
  int insize;
  MPI_Unpack (inbuf, insize, &position, outbuf, outcount, datatype,
	      comm); // L116--117
  MPI_Pack_size (incount, datatype, comm, &size);

  /* === Collectives === */
  MPI_Barrier (comm); // L121
  int root;
  MPI_Bcast (buffer, count, datatype, root, comm); // L123
  MPI_Gather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
	      root, comm); // L124--125
  int* recvcounts;
  int* displs;
  MPI_Gatherv (sendbuf, sendcount, sendtype,
               recvbuf, recvcounts, displs, recvtype,
	       root, comm); // L128--130
  MPI_Scatter (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
               root, comm); // L131--132
  int* sendcounts;
  MPI_Scatterv (sendbuf, sendcounts, displs, sendtype,
		recvbuf, recvcount, recvtype, root, comm); // L134--135
  MPI_Allgather (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
                 comm); // L136--137
  MPI_Allgatherv (sendbuf, sendcount, sendtype,
		  recvbuf, recvcounts, displs, recvtype,
		  comm); // L138--140
  MPI_Alltoall (sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype,
		comm); // L141--142
  int* sdispls;
  int* rdispls;
  MPI_Alltoallv (sendbuf, sendcounts, sdispls, sendtype,
                 recvbuf, recvcounts, rdispls, recvtype,
		 comm); // L145--147
  MPI_Op op;
  MPI_Reduce (sendbuf, recvbuf, count, datatype, op, root, comm); // L149
#if 0
  MPI_User_function function;
  int commute;
  MPI_Op_create (function, commute, &op); // L153
#endif
  MPI_Op_free (&op); // L155
  MPI_Allreduce (sendbuf, recvbuf, count, datatype, op, comm);
  MPI_Reduce_scatter (sendbuf, recvbuf, recvcounts, datatype, op, comm);
  MPI_Scan (sendbuf, recvbuf, count, datatype, op, comm);

  /* === Groups, contexts, and communicators === */
  MPI_Group group;
  MPI_Group_size (group, &size); // L162
  int rank;
  MPI_Group_rank (group, &rank); // L164
  MPI_Group group1;
  int n;
  int* ranks1;
  MPI_Group group2;
  int* ranks2;
  MPI_Group_translate_ranks (group1, n, ranks1, group2, ranks2); // L170
  int result;
  MPI_Group_compare (group1, group2, &result); // L172
  MPI_Group newgroup;
  MPI_Group_union (group1, group2, &newgroup); // L174
  MPI_Group_intersection (group1, group2, &newgroup);
  MPI_Group_difference (group1, group2, &newgroup);
  int* ranks;
  MPI_Group_incl (group, n, ranks, &newgroup); // L178
  MPI_Group_excl (group, n, ranks, &newgroup);
  extern int ranges[][3];
  MPI_Group_range_incl (group, n, ranges, &newgroup); // L181
  MPI_Group_range_excl (group, n, ranges, &newgroup);
  MPI_Group_free (&group);
  MPI_Comm_size (comm, &size);
  MPI_Comm_rank (comm, &rank);
  MPI_Comm comm1;
  MPI_Comm comm2;
  MPI_Comm_compare (comm1, comm2, &result);
  MPI_Comm newcomm;
  MPI_Comm_dup (comm, &newcomm);
  MPI_Comm_create (comm, group, &newcomm);
  int color;
  int key;
  MPI_Comm_split (comm, color, key, &newcomm); // L194
  MPI_Comm_free (&comm);
  MPI_Comm_test_inter (comm, &flag);
  MPI_Comm_remote_size (comm, &size);
  MPI_Comm_remote_group (comm, &group);
  MPI_Comm local_comm;
  int local_leader;
  MPI_Comm peer_comm;
  int remote_leader;
  MPI_Comm newintercomm;
  MPI_Intercomm_create (local_comm, local_leader, peer_comm, remote_leader, tag,
			&newintercomm); // L204--205
  MPI_Comm intercomm;
  MPI_Comm newintracomm;
  int high;
  MPI_Intercomm_merge (intercomm, high, &newintracomm); // L209
  int keyval;
#if 0
  MPI_Copy_function copy_fn;
  MPI_Delete_function delete_fn;
  void* extra_state;
  MPI_Keyval_create (copy_fn, delete_fn, &keyval, extra_state); // L215
#endif
  MPI_Keyval_free (&keyval); // L217
  void* attribute_val;
  MPI_Attr_put (comm, keyval, attribute_val); // L219
  MPI_Attr_get (comm, keyval, attribute_val, &flag);
  MPI_Attr_delete (comm, keyval);

  /* === Environmental inquiry === */
  char* name;
  int resultlen;
  MPI_Get_processor_name (name, &resultlen); // L226
  MPI_Errhandler errhandler;
#if 0
  MPI_Handler_function function;
  MPI_Errhandler_create (function, &errhandler); // L230
#endif
  MPI_Errhandler_set (comm, errhandler); // L232
  MPI_Errhandler_get (comm, &errhandler);
  MPI_Errhandler_free (&errhandler);
  int errorcode;
  char* string;
  MPI_Error_string (errorcode, string, &resultlen); // L237
  int errorclass;
  MPI_Error_class (errorcode, &errorclass); // L239
  MPI_Wtime ();
  MPI_Wtick ();
  int argc;
  char** argv;
  MPI_Init (&argc, &argv); // L244
  MPI_Finalize ();
  MPI_Initialized (&flag);
  MPI_Abort (comm, errorcode);
}
Beispiel #22
0
void comm_recv_procs_and_msg_sizes(ParallelMachine comm ,
                                   const std::vector<CommBuffer>& send_bufs ,
                                         std::vector<CommBuffer>& recv_bufs,
                                   std::vector<int>& send_procs,
                                   std::vector<int>& recv_procs)
{
  static const char method[] = "stk::comm_procs_and_msg_recv_sizes" ;

  const int p_size = parallel_machine_size( comm );

  int result = MPI_SUCCESS ;

  MPI_Datatype uint_type = MPI_LONG_LONG;
  if (sizeof(int) == sizeof(unsigned))
    uint_type = MPI_INT;
  else if (sizeof(long) == sizeof(unsigned))
    uint_type = MPI_LONG;
  else if (sizeof(long long) == sizeof(unsigned))
    uint_type = MPI_LONG_LONG;
  else {
    std::ostringstream msg ;
    msg << method << " ERROR: No matching MPI type found for size_t argument";
    throw std::runtime_error(msg.str());
  }

  std::vector<unsigned> buf;
  buf.reserve(p_size*2);
  int* recvcounts = reinterpret_cast<int*>(&buf[0]);
  unsigned * tmp = &buf[p_size];
  send_procs.clear();
  send_procs.reserve(16);
  for ( int i = 0 ; i < p_size ; ++i ) {
    recvcounts[i] = 1;
    tmp[i] = 0;
    if ( send_bufs[i].size() > 0 ) {
      tmp[i] = 1 ;
      send_procs.push_back(i);
    }
  }

  unsigned num_recv = 0;

  result = MPI_Reduce_scatter(tmp,&num_recv,recvcounts,uint_type,MPI_SUM,comm);

  if ( result != MPI_SUCCESS ) {
    // PARALLEL ERROR
    std::ostringstream msg ;
    msg << method << " ERROR: " << result << " == MPI_Reduce_scatter" ;
    throw std::runtime_error( msg.str() );
  }

  // do point-to-point send/recvs

  const int mpi_tag = STK_COMMSPARSE_MPI_TAG_PROC_SIZING;

  MPI_Request request_null = MPI_REQUEST_NULL ;
  MPI_Status init_status;
  std::vector<MPI_Request> request( num_recv , request_null );
  std::vector<MPI_Status>  status(  num_recv , init_status );

  // Post receives for point-to-point message sizes

  for ( unsigned i = 0 ; i < num_recv ; ++i ) {
    unsigned    * const p_buf     = & buf[i] ;
    MPI_Request * const p_request = & request[i] ;
    result = MPI_Irecv( p_buf , 1 , uint_type,
                        MPI_ANY_SOURCE , mpi_tag , comm , p_request );
    if ( MPI_SUCCESS != result ) {
      // LOCAL ERROR
      std::ostringstream msg ;
      msg << method << " ERROR: " << result << " == MPI_Irecv" ;
      throw std::runtime_error( msg.str() );
    }
  }

  // Send the point-to-point message sizes,

  for ( size_t i = 0 ; i < send_procs.size() ; ++i ) {
    int      dst = send_procs[i];
    unsigned value = send_bufs[dst].size();
    result = MPI_Send( & value , 1 , uint_type, dst , mpi_tag , comm );
    if ( MPI_SUCCESS != result ) {
      // LOCAL ERROR
      std::ostringstream msg ;
      msg << method << " ERROR: " << result << " == MPI_Send" ;
      throw std::runtime_error( msg.str() );
    }
  }

  // Wait for all receives

  {
    MPI_Request * const p_request = (request.empty() ? NULL : & request[0]) ;
    MPI_Status  * const p_status  = (status.empty() ? NULL : & status[0]) ;
    result = MPI_Waitall( num_recv , p_request , p_status );
  }
  if ( MPI_SUCCESS != result ) {
    // LOCAL ERROR ?
    std::ostringstream msg ;
    msg << method << " ERROR: " << result << " == MPI_Waitall" ;
    throw std::runtime_error( msg.str() );
  }

  recv_procs.resize(num_recv);

  // Set the receive message sizes

  for ( unsigned i = 0 ; i < num_recv ; ++i ) {
    MPI_Status * const recv_status = & status[i] ;
    const int recv_proc = recv_status->MPI_SOURCE ;

#ifndef NDEBUG
    //debug-mode-only error check
    const int recv_tag  = recv_status->MPI_TAG ;
    int recv_count  = 0 ;

    MPI_Get_count( recv_status , uint_type , & recv_count );

    if ( recv_tag != mpi_tag || recv_count != 1 ) {
      std::ostringstream msg ;
      const int p_rank = parallel_machine_rank( comm );
      msg << method << " ERROR: Received buffer mismatch " ;
      msg << "P" << p_rank << " <- P" << recv_proc ;
      msg << "  " << 1 << " != " << recv_count ;
      throw std::runtime_error( msg.str() );
    }
#endif

    recv_bufs[ recv_proc ].set_size(buf[i]);
    recv_procs[i] = recv_proc;
  }
}
Beispiel #23
0
void Irregular::pattern(int n, int *proclist)
{
  // free any previous irregular info

  deallocate();
  init();

  patternflag = SET;
  sizestyle = NONE;

  ndatumsend = n;

  // list = 1 for procs I send to, including self
  // nrecv = # of messages I receive, not including self
  // self = 0 if no data for self, 1 if there is

  int *list = new int[nprocs];
  int *counts = new int[nprocs];

  for (int i = 0; i < nprocs; i++) {
    list[i] = 0;
    counts[i] = 1;
  }

  for (int i = 0; i < n; i++) list[proclist[i]] = 1;
  MPI_Reduce_scatter(list,&nrecv,counts,MPI_INT,MPI_SUM,comm);

  self = 0;
  if (list[me]) self = 1;
  if (self) nrecv--;

  // storage for recv info, not including self

  recvproc = new int[nrecv];
  recvcount = new int[nrecv];
  recvsize = new int[nrecv];
  request = new MPI_Request[nrecv];
  status = new MPI_Status[nrecv];

  // list = # of datums to send to each proc, including self
  // nsend = # of messages I send, not including self
  
  for (int i = 0; i < nprocs; i++) list[i] = 0;
  for (int i = 0; i < n; i++) list[proclist[i]]++;

  nsend = 0;
  for (int i = 0; i < nprocs; i++) if (list[i] > 0) nsend++;
  if (self) nsend--;

  // storage for send info, including self

  sendproc = new int[nsend+self];
  sendcount = new int[nsend+self];
  sendsize = new int[nsend+self];
  sendindices = (int *) memory->smalloc(n*sizeof(int),"sendindices");

  // setup sendprocs and sendcounts, including self
  // each proc begins with iproc > me, and continues until iproc = me
  // list ends up with pointer to which send that proc is associated with

  int iproc = me;
  int isend = 0;
  for (int i = 0; i < nprocs; i++) {
    iproc++;
    if (iproc == nprocs) iproc = 0;
    if (list[iproc] > 0) {
      sendproc[isend] = iproc;
      sendcount[isend] = list[iproc];
      list[iproc] = isend;
      isend++;
    }
  }

  // post all receives for datum counts

  for (int i = 0; i < nrecv; i++)
    MPI_Irecv(&recvcount[i],1,MPI_INT,MPI_ANY_SOURCE,0,comm,&request[i]);

  // barrier to insure receives are posted

  MPI_Barrier(comm);

  // send each datum count, packing buf with needed datums

  for (int i = 0; i < nsend; i++)
    MPI_Send(&sendcount[i],1,MPI_INT,sendproc[i],0,comm);

  // insure all MPI_ANY_SOURCE messages are received
  // set recvproc

  if (nrecv) MPI_Waitall(nrecv,request,status);
  for (int i = 0; i < nrecv; i++) recvproc[i] = status[i].MPI_SOURCE;

  // ndatumrecv = total datums received, including self

  ndatumrecv = 0;
  for (int i = 0; i < nrecv; i++)
    ndatumrecv += recvcount[i];
  if (self) ndatumrecv += sendcount[nsend];

  // setup sendindices, including self
  // counts = offset into sendindices for each proc I send to
  // let sc0 = sendcount[0], sc1 = sendcount[1], etc
  // sendindices[0:sc0-1] = indices of datums in 1st message
  // sendindices[sc0:sc0+sc1-1] = indices of datums in 2nd message, etc

  counts[0] = 0;
  for (int i = 1; i < nsend+self; i++)
    counts[i] = counts[i-1] + sendcount[i-1];

  for (int i = 0; i < n; i++) {
    isend = list[proclist[i]];
    sendindices[counts[isend]++] = i;
  }

  // clean up

  delete [] counts;
  delete [] list;
}
//==============================================================================
//---------------------------------------------------------------------------
//ComputeRecvs Method
//---------------------------------------------------------------------------
int Epetra_MpiDistributor::ComputeRecvs_( int my_proc, 
			        int nprocs )
{
  int * msg_count = new int[ nprocs ];
  int * counts = new int[ nprocs ];

  int i;
  MPI_Status status;

  for( i = 0; i < nprocs; i++ )
  {
    msg_count[i] = 0;
    counts[i] = 1;
  }

  for( i = 0; i < nsends_+self_msg_; i++ )
    msg_count[ procs_to_[i] ] = 1;

#if defined(REDUCE_SCATTER_BUG)
// the bug is found in mpich on linux platforms
  MPI_Reduce(msg_count, counts, nprocs, MPI_INT, MPI_SUM, 0, comm_);
  MPI_Scatter(counts, 1, MPI_INT, &nrecvs_, 1, MPI_INT, 0, comm_);
#else
  MPI_Reduce_scatter( msg_count, &nrecvs_, counts, MPI_INT, MPI_SUM, comm_ );
#endif

  delete [] msg_count;
  delete [] counts;

  if (nrecvs_>0) {
    lengths_from_ = new int[nrecvs_];
    procs_from_ = new int[nrecvs_];
    for(i=0; i<nrecvs_; ++i) {
      lengths_from_[i] = 0;
      procs_from_[i] = 0;
    }
  }

#ifndef NEW_COMM_PATTERN
  for( i = 0; i < (nsends_+self_msg_); i++ )
    if( procs_to_[i] != my_proc ) {
      MPI_Send( &(lengths_to_[i]), 1, MPI_INT, procs_to_[i], tag_, comm_ );
    }
    else
    {
      //set self_msg_ to end block of recv arrays
      lengths_from_[nrecvs_-1] = lengths_to_[i];
      procs_from_[nrecvs_-1] = my_proc;
    }

  for( i = 0; i < (nrecvs_-self_msg_); i++ )
  {
    MPI_Recv( &(lengths_from_[i]), 1, MPI_INT, MPI_ANY_SOURCE, tag_, comm_, &status );
    procs_from_[i] = status.MPI_SOURCE;
  }

  MPI_Barrier( comm_ );
#else
  if (nrecvs_>0) {
    if( !request_ ) {
      request_ = new MPI_Request[nrecvs_-self_msg_];
      status_ = new MPI_Status[nrecvs_-self_msg_];
    }
  }

  for( i = 0; i < (nrecvs_-self_msg_); i++ )
    MPI_Irecv( &(lengths_from_[i]), 1, MPI_INT, MPI_ANY_SOURCE, tag_, comm_, &(request_[i]) );

  MPI_Barrier( comm_ );

  for( i = 0; i < (nsends_+self_msg_); i++ )
    if( procs_to_[i] != my_proc ) {
      MPI_Rsend( &(lengths_to_[i]), 1, MPI_INT, procs_to_[i], tag_, comm_ );
    }
    else
    {
      //set self_msg_ to end block of recv arrays
      lengths_from_[nrecvs_-1] = lengths_to_[i];
      procs_from_[nrecvs_-1] = my_proc;
    }

  if( (nrecvs_-self_msg_) > 0 ) MPI_Waitall( (nrecvs_-self_msg_), request_, status_ );

  for( i = 0; i < (nrecvs_-self_msg_); i++ )
    procs_from_[i] = status_[i].MPI_SOURCE;
#endif

  Sort_ints_( procs_from_, lengths_from_, nrecvs_ );

  // Compute indices_from_
  // Seems to break some rvs communication
/* Not necessary since rvs communication is always blocked
  size_indices_from_ = 0;
  if( nrecvs_ > 0 )
  {
    for( i = 0; i < nrecvs_; i++ )  size_indices_from_ += lengths_from_[i];
    indices_from_ = new int[ size_indices_from_ ];

    for (i=0; i<size_indices_from_; i++) indices_from_[i] = i;
  }
*/

  if (nrecvs_>0) starts_from_ = new int[nrecvs_];
  int j = 0;
  for( i=0; i<nrecvs_; ++i )
  {
    starts_from_[i] = j;
    j += lengths_from_[i];
  }

  total_recv_length_ = 0;
  for( i = 0; i < nrecvs_; i++ )
    total_recv_length_ += lengths_from_[i];

  nrecvs_ -= self_msg_;

  MPI_Barrier( comm_ );
  
  return false;
}
Beispiel #25
0
void BIL_Pio_exchange_blocks(BIL_Sched_IO* io_sched,
                             BIL_Sched_IO* inv_io_sched) {
  BIL_Timing_comm_start();
  int i;
  // Convenience variables
  int num_recv_blocks = inv_io_sched->num_io_blocks;
  BIL_Block* recv_blocks = inv_io_sched->io_blocks;
  int num_io_blocks = io_sched->num_io_blocks;
  BIL_Block* io_blocks = io_sched->io_blocks;
  
  // Sort your recv blocks by their read rank.
  qsort(recv_blocks, num_recv_blocks, sizeof(BIL_Block),
        BIL_Pio_compare_block_read_rank);
  // Sort the I/O blocks by their group name to perform binary searching.
  qsort(io_blocks, num_io_blocks, sizeof(BIL_Block),
        BIL_Pio_compare_block_group_name);
  qsort(BIL->blocks, BIL->num_blocks, sizeof(BIL_Block), 
        BIL_Pio_compare_block_group_name);

  // In netcdf mode, you will not know the variable size of the blocks you
  // are receiving until now. The variable sizes are stored in the
  // BIL global variable after reading. Fill in these sizes into the inverse
  // I/O schedule.
  for (i = 0; i < num_recv_blocks; i++) {
    BIL_Block* io_block =
      bsearch(&(recv_blocks[i]), BIL->blocks, BIL->num_blocks, 
              sizeof(BIL_Block), BIL_Pio_compare_block_group_name);
    recv_blocks[i].var_size = io_block->var_size;
  }

  int* all_num_send_blocks = 
    BIL_Misc_malloc(sizeof(int) * BIL->world_size);
  memset(all_num_send_blocks, 0, sizeof(int) * BIL->world_size);

  for (i = 0; i < num_recv_blocks; i++) {
    all_num_send_blocks[recv_blocks[i].read_rank]++;
  }
  int* recv_counts = BIL_Misc_malloc(sizeof(int) * BIL->world_size);
  for (i = 0; i < BIL->world_size; i++) {
    recv_counts[i] = 1;
  }
  int num_send_blocks = 0;
  MPI_Reduce_scatter(all_num_send_blocks, &num_send_blocks, 
                     recv_counts, MPI_INT, MPI_SUM, BIL->world_comm);

  BIL_Misc_free(all_num_send_blocks);
  BIL_Misc_free(recv_counts);

  // Receive the blocks that you will send.
  BIL_Block* send_blocks =
    BIL_Misc_malloc(sizeof(BIL_Block) * num_send_blocks);
  MPI_Request* recv_requests =
    BIL_Misc_malloc(sizeof(MPI_Request) * num_send_blocks);
  
  for (i = 0; i < num_send_blocks; i++) {
    MPI_Irecv(send_blocks + i, sizeof(BIL_Block), MPI_BYTE, MPI_ANY_SOURCE,
              0, BIL->world_comm, recv_requests + i);
  }
  // Send the blocks that you will receive.
  MPI_Request* send_requests =
    BIL_Misc_malloc(sizeof(MPI_Request) * inv_io_sched->num_io_blocks);
  for (i = 0; i < num_recv_blocks; i++) {
    MPI_Isend(recv_blocks + i, sizeof(BIL_Block), MPI_BYTE,
              recv_blocks[i].read_rank, 0, BIL->world_comm,
              send_requests + i);
  }

  assert(MPI_Waitall(num_send_blocks, recv_requests, MPI_STATUSES_IGNORE)
         == MPI_SUCCESS);
  assert(MPI_Waitall(num_recv_blocks, send_requests,
         MPI_STATUSES_IGNORE) == MPI_SUCCESS);
  // This barrier is need on my mac to get this to work. Not sure why, but
  // it seems that Waitall doesn't block until the isends/irecvs are complete.
  MPI_Barrier(BIL->world_comm);
  BIL_Misc_free(recv_requests);
  BIL_Misc_free(send_requests);

  // Sort your send blocks by their request rank.
  qsort(send_blocks, num_send_blocks, sizeof(BIL_Block),
        BIL_Pio_compare_block_request_rank);

  // Find the total amount of send data.
  int64_t total_send_data = 0;
  for (i = 0; i < num_send_blocks; i++) {
    total_send_data += send_blocks[i].var_size * send_blocks[i].total_size;
  }
  // Allocate a big array for the sending data. Once this array is allocated,
  // the I/O data will be copied to it in such a manner that MPI_Alltoallv
  // can be performed.
  void* send_data = BIL_Misc_malloc(total_send_data);
  // For convenience, set the blocks send data to the appropriate offset
  // in the send_data array.
  if (num_send_blocks > 0) {
    send_blocks[0].data = send_data;
    for (i = 1; i < num_send_blocks; i++) {
      send_blocks[i].data = send_blocks[i - 1].data +
        (send_blocks[i - 1].var_size * send_blocks[i - 1].total_size);
    }
  }

  // Pack each block into send_data.
  for (i = 0; i < num_send_blocks; i++) {
    // Find the corresponding I/O block.
    BIL_Block* io_block =
      bsearch(&(send_blocks[i]), io_blocks, num_io_blocks, sizeof(BIL_Block),
              BIL_Pio_compare_block_group_name);
    assert(io_block != NULL);
    BIL_Pio_extract_block(&(send_blocks[i]), io_block);
  }
  // Free the I/O data now.
  for (i = 0; i < num_io_blocks; i++) {
    BIL_Misc_free(io_blocks[i].data);
  }

  // Allocate a big array for receiving data. This array will be populated
  // during the MPI_Alltoallv call.
  int64_t total_recv_data = 0;
  for (i = 0; i < num_recv_blocks; i++) {
    total_recv_data += recv_blocks[i].var_size * recv_blocks[i].total_size;
  }
  void* recv_data = NULL;
  // Special case - single block I/O where the user has already allocated a
  // buffer.
  if (num_recv_blocks == 1 && BIL->blocks[0].data != NULL) {
    recv_data = BIL->blocks[0].data;
  } else if (num_recv_blocks > 0) {
    recv_data = BIL_Misc_malloc(total_recv_data);
    recv_blocks[0].data = recv_data;
    for (i = 1; i < num_recv_blocks; i++) {
      recv_blocks[i].data = recv_blocks[i - 1].data +
        (recv_blocks[i - 1].var_size * recv_blocks[i - 1].total_size);
    }
  }

  // Find out the recv counts and offsets. Keep in mind that you may receive
  // multiple blocks from the same process.
  recv_counts = BIL_Misc_malloc(sizeof(int) * BIL->world_size);
  memset(recv_counts, 0, sizeof(int) * BIL->world_size);
  int* recv_offsets = BIL_Misc_malloc(sizeof(int) * BIL->world_size);
  memset(recv_offsets, 0, sizeof(int) * BIL->world_size);
  int cur_recv_offset = 0;
  for (i = 0; i < num_recv_blocks; i++) {
    if (recv_counts[recv_blocks[i].read_rank] == 0) {
      recv_offsets[recv_blocks[i].read_rank] = cur_recv_offset;
    }
    recv_counts[recv_blocks[i].read_rank] +=
      recv_blocks[i].total_size * recv_blocks[i].var_size;
    cur_recv_offset += recv_blocks[i].total_size * recv_blocks[i].var_size;
  }

  // Find out the send counts and offsets. Keep in mind you might send
  // multiple blocks to the same process.
  int* send_counts = BIL_Misc_malloc(sizeof(int) * BIL->world_size);
  memset(send_counts, 0, sizeof(int) * BIL->world_size);
  int* send_offsets = BIL_Misc_malloc(sizeof(int) * BIL->world_size);
  memset(send_offsets, 0, sizeof(int) * BIL->world_size);
  int cur_send_offset = 0;
  for (i = 0; i < num_send_blocks; i++) {
    if (send_counts[send_blocks[i].request_rank] == 0) {
      send_offsets[send_blocks[i].request_rank] = cur_send_offset;
    }
    send_counts[send_blocks[i].request_rank] +=
      send_blocks[i].total_size * send_blocks[i].var_size;
    cur_send_offset += send_blocks[i].total_size * send_blocks[i].var_size;
  }
  
  // Exchange blocks.
  MPI_Alltoallv(send_data, send_counts, send_offsets, MPI_BYTE,
                recv_data, recv_counts, recv_offsets, MPI_BYTE,
                BIL->world_comm);

  // Free your send data.
  BIL_Misc_free(send_data);
  // Copy your recv blocks into BIL->blocks. Remember that multiple recv blocks
  // might account for only one block in BIL->blocks.
  qsort(BIL->blocks, BIL->num_blocks, sizeof(BIL_Block), 
        BIL_Pio_compare_block_id);
  if (num_recv_blocks > 1) {
    for (i = 0; i < num_recv_blocks; i++) {
      // Find the corresponding I/O block.
      BIL_Block* io_block = bsearch(&(recv_blocks[i]), BIL->blocks, 
                                    BIL->num_blocks, sizeof(BIL_Block),
                                    BIL_Pio_compare_block_id);
      assert(io_block != NULL);
      if (io_block->data == NULL) {
        io_block->data =
          BIL_Misc_malloc(io_block->var_size * io_block->total_size);
      }
      BIL_Pio_insert_block(&(recv_blocks[i]), io_block);
    }
    // Free the recv_data.
    BIL_Misc_free(recv_data);
  } else if (num_recv_blocks == 1 && BIL->blocks[0].data == NULL) {
    assert(BIL->num_blocks == 1);
    // The reason we are deferencing this void** is because the user passed
    // a NULL buffer that was allocated for them.
    BIL->blocks[0].data = recv_blocks[0].data;
  }

  BIL_Misc_free(send_counts);
  BIL_Misc_free(send_offsets);
  BIL_Misc_free(recv_counts);
  BIL_Misc_free(recv_offsets);
  BIL_Misc_free(send_blocks);

  BIL_Timing_comm_stop();
}
Beispiel #26
0
int main( int argc, char **argv )
{
    int      err = 0;
    int      *recvcounts;
    int      size, rsize, rank, i;
    int      recvcount, /* Each process receives this much data */
             sendcount, /* Each process contributes this much data */
	     basecount; /* Unit of elements - basecount *rsize is recvcount, 
			   etc. */
    int      isLeftGroup;
    long long *sendbuf, *recvbuf;
    long long sumval;
    MPI_Comm comm;


    MTest_Init( &argc, &argv );
    comm = MPI_COMM_WORLD;

    basecount = 1024;

    while (MTestGetIntercomm( &comm, &isLeftGroup, 2 )) {
	if (comm == MPI_COMM_NULL) continue;

	MPI_Comm_remote_size( comm, &rsize );
	MPI_Comm_size( comm, &size );
	MPI_Comm_rank( comm, &rank );

	if (0) {
	    printf( "[%d] %s (%d,%d) remote %d\n", rank, 
		    isLeftGroup ? "L" : "R", 
		    rank, size, rsize );
	}

	recvcount = basecount * rsize;
	sendcount = basecount * rsize * size;

	recvcounts = (int *)malloc( size * sizeof(int) );
	if (!recvcounts) {
	    fprintf( stderr, "Could not allocate %d int for recvcounts\n", 
		     size );
	    MPI_Abort( MPI_COMM_WORLD, 1 );
	}
	for (i=0; i<size; i++) 
	    recvcounts[i] = recvcount;
	
	sendbuf = (long long *) malloc( sendcount * sizeof(long long) );
	if (!sendbuf) {
	    fprintf( stderr, "Could not allocate %d ints for sendbuf\n", 
		     sendcount );
	    MPI_Abort( MPI_COMM_WORLD, 1 );
	}

	for (i=0; i<sendcount; i++) {
	    sendbuf[i] = (long long)(rank*sendcount + i);
	}
	recvbuf = (long long *)malloc( recvcount * sizeof(long long) );
	if (!recvbuf) {
	    fprintf( stderr, "Could not allocate %d ints for recvbuf\n", 
		     recvcount );
	    MPI_Abort( MPI_COMM_WORLD, 1 );
	}
	for (i=0; i<recvcount; i++) {
	    recvbuf[i] = (long long)(-i);
	}
	
	MPI_Reduce_scatter( sendbuf, recvbuf, recvcounts, MPI_LONG_LONG, MPI_SUM,
			    comm );

	/* Check received data */
	for (i=0; i<recvcount; i++) {
	    sumval = (long long)(sendcount) * (long long)((rsize * (rsize-1))/2) +
		(long long)(i + rank * rsize * basecount) * (long long)rsize;
	    if (recvbuf[i] != sumval) {
		err++;
		if (err < 4) {
		    fprintf( stdout, "Did not get expected value for reduce scatter\n" );
		    fprintf( stdout, "[%d] %s recvbuf[%d] = %lld, expected %lld\n",
			     rank, 
			     isLeftGroup ? "L" : "R", 
			     i, recvbuf[i], sumval );
		}
	    }
	}
	
	free(sendbuf);
	free(recvbuf);
	free(recvcounts);

	MTestFreeComm( &comm );
    }

    MTest_Finalize( err );

    MPI_Finalize( );

    return 0;
}
Beispiel #27
0
int main( int argc, char **argv )
{
    int      err = 0;
    int      *sendbuf, *recvbuf, *recvcounts;
    int      size, rank, i, j, idx, mycount, sumval;
    MPI_Comm comm;


    MTest_Init( &argc, &argv );
    comm = MPI_COMM_WORLD;

    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );
    recvcounts = (int *)malloc( size * sizeof(int) );
    if (!recvcounts) {
	fprintf( stderr, "Could not allocate %d ints for recvcounts\n", 
		 size );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    mycount = (1024 * 1024) / size;
    for (i=0; i<size; i++) 
	recvcounts[i] = mycount;
    sendbuf = (int *) malloc( mycount * size * sizeof(int) );
    if (!sendbuf) {
	fprintf( stderr, "Could not allocate %d ints for sendbuf\n", 
		 mycount * size );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    idx = 0;
    for (i=0; i<size; i++) {
	for (j=0; j<mycount; j++) {
	    sendbuf[idx++] = rank + i;
	}
    }
    recvbuf = (int *)malloc( mycount * sizeof(int) );
    if (!recvbuf) {
	fprintf( stderr, "Could not allocate %d ints for recvbuf\n", 
		 mycount );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    for (i=0; i<mycount; i++) {
	recvbuf[i] = -1;
    }

    MPI_Reduce_scatter( sendbuf, recvbuf, recvcounts, MPI_INT, MPI_SUM, comm );

    sumval = size * rank + ((size - 1) * size)/2;
    /* recvbuf should be size * (rank + i) */
    for (i=0; i<mycount; i++) {
	if (recvbuf[i] != sumval) {
	    err++;
	    if (err < MAX_ERRORS) {
		fprintf( stdout, "Did not get expected value for reduce scatter\n" );
		fprintf( stdout, "[%d] Got recvbuf[%d] = %d expected %d\n",
			 rank, i, recvbuf[i], sumval );
	    }
	}
    }

#if MTEST_HAVE_MIN_MPI_VERSION(2,2)
    MPI_Reduce_scatter( MPI_IN_PLACE, sendbuf, recvcounts, MPI_INT, MPI_SUM, 
			comm );

    sumval = size * rank + ((size - 1) * size)/2;
    /* recv'ed values for my process should be size * (rank + i) */
    for (i=0; i<mycount; i++) {
        if (sendbuf[i] != sumval) {
            err++;
            if (err < MAX_ERRORS) {
                fprintf( stdout, "Did not get expected value for reduce scatter (in place)\n" );
                fprintf( stdout, "[%d] Got buf[%d] = %d expected %d\n",
                    rank, i, sendbuf[i], sumval );
            }
        }
    }
#endif

    free(sendbuf);
    free(recvbuf);
    free(recvcounts);
       
    MTest_Finalize( err );

    MPI_Finalize( );

    return 0;
}
Beispiel #28
0
static int Zoltan_Reftree_Sum_Weights(ZZ *zz)

{
/*
 * Function to sum the weights in the refinement tree.  On input the
 * refinement tree should be valid and have weight set.  On output the
 * values in summed_weight at each node is the sum of the weights in the
 * subtree with that node as root.
 * This function also sets assigned_to_me for interior nodes to be
 * 1 if the entire subtree is assigned to this processor
 * 0 if none of the subtree is assigned to this processor
 * -1 if some of the subtree is assigned to this processor
 */
char *yo = "Zoltan_Reftree_Sum_Weights";
ZOLTAN_REFTREE *root;         /* Root of the refinement tree */
int wdim;                 /* Dimension of the weight array */
int i,j;                  /* loop counters */
int count;                /* counter */
ZOLTAN_ID_PTR leaf_list = NULL;      
                          /* leaves for which some proc requests weight */
ZOLTAN_ID_PTR all_leaflist = NULL;   
                          /* leaf_list from all processors */
int reqsize;              /* length of leaf_list */
int *reqsize_all;         /* reqsize from all processors */
int sum_reqsize;          /* sum of all reqsize */
int *displs;              /* running sum of all reqsize */
int my_start;             /* position in leaf_list of this proc's list */
int nproc;                /* number of processors */
ZOLTAN_REFTREE *node;         /* a node in the refinement tree */
struct Zoltan_Reftree_hash_node **hashtab; /* hash table */
int hashsize;             /* dimension of hash table */
float *send_float;        /* sending message of floats */
float *req_weights;       /* the requested weights */
int num_gid_entries = zz->Num_GID; /* Number of array entries in a global ID */

   ZOLTAN_TRACE_ENTER(zz, yo);

  /*
   * set the root and hash table
   */

  root = ((struct Zoltan_Reftree_data_struct *)zz->LB.Data_Structure)->reftree_root;
  if (root == NULL) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Refinement tree not defined.");
    ZOLTAN_TRACE_EXIT(zz, yo);
    return(ZOLTAN_FATAL);
  }
  hashtab  = ((struct Zoltan_Reftree_data_struct *)zz->LB.Data_Structure)->hash_table;
  hashsize = ((struct Zoltan_Reftree_data_struct *)zz->LB.Data_Structure)->hash_table_size;

  /*
   * Determine the dimension of the weight array
   */

  if (zz->Obj_Weight_Dim == 0) {
    wdim = 1;
  } else {
    wdim = zz->Obj_Weight_Dim;
  }

  /*
   * In the first pass, sum the weights of the nodes that are assigned to
   * this processor, and count the leaves that are not.
   */

  count = 0;
  for (i=0; i<root->num_child; i++) {
    Zoltan_Reftree_Sum_My_Weights(zz,&(root->children[i]),&count,wdim);
  }
  root->assigned_to_me = -1;

  /*
   * Make a list of the leaves that are not assigned to this processor
   */

  if (count == 0)
    leaf_list = ZOLTAN_MALLOC_GID(zz);
  else
    leaf_list = ZOLTAN_MALLOC_GID_ARRAY(zz, count);
  if (leaf_list == NULL) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
    ZOLTAN_TRACE_EXIT(zz, yo);
    return(ZOLTAN_MEMERR);
  }

  count = 0;
  Zoltan_Reftree_List_Other_Leaves(zz, root,leaf_list,&count);

  /*
   * Get the unknown leaf weights from other processors.
   */

  nproc = zz->Num_Proc;
  reqsize = count;

  /*
   * Build a list of all processor's request list by concatinating them in
   * the order of the processor ranks
   */

  /*
   * Determine the request size of all processors
   */

  reqsize_all = (int *)ZOLTAN_MALLOC(nproc*sizeof(int));
  displs = (int *)ZOLTAN_MALLOC(nproc*sizeof(int));
  if (reqsize_all == NULL || displs == NULL) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
    Zoltan_Multifree(__FILE__, __LINE__, 3, &displs,
                                            &reqsize_all,
                                            &leaf_list);
    ZOLTAN_TRACE_EXIT(zz, yo);
    return(ZOLTAN_MEMERR);
  }

  MPI_Allgather((void *)&reqsize,1,MPI_INT,(void *)reqsize_all,1,MPI_INT,
                zz->Communicator);
  displs[0] = 0;
  for (i=1; i<nproc; i++) displs[i] = displs[i-1]+reqsize_all[i-1];
  sum_reqsize = displs[nproc-1] + reqsize_all[nproc-1];
  my_start = displs[zz->Proc];

  /*
   * If sum_reqsize is 0, nothing needs to be communciated
   */

  if (sum_reqsize == 0) {
    Zoltan_Multifree(__FILE__, __LINE__, 3, &displs,
                                            &reqsize_all,
                                            &leaf_list);
  }
  else {

  /*
   * Gather the request list from all processors
   */

    all_leaflist = ZOLTAN_MALLOC_GID_ARRAY(zz, sum_reqsize);
    if (all_leaflist == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      Zoltan_Multifree(__FILE__, __LINE__, 4, &all_leaflist,
                                              &displs,
                                              &reqsize_all,
                                              &leaf_list);
      ZOLTAN_TRACE_EXIT(zz, yo);
      return(ZOLTAN_MEMERR);
    }

    /* KDDKDD Changed MPI_BYTE to ZOLTAN_ID_MPI_TYPE  */

    /* Account for number of array entries in an ID. */
    for (i=0; i<nproc; i++) {
      reqsize_all[i] = reqsize_all[i]*num_gid_entries;
      displs[i] = displs[i]*num_gid_entries;
    }

    MPI_Allgatherv((void *)leaf_list,reqsize*num_gid_entries,ZOLTAN_ID_MPI_TYPE,
                   (void *)all_leaflist,reqsize_all,displs,ZOLTAN_ID_MPI_TYPE,
                   zz->Communicator);

    ZOLTAN_FREE(&displs);
    ZOLTAN_FREE(&leaf_list);

    for (i=0; i<nproc; i++) reqsize_all[i] = reqsize_all[i]/num_gid_entries;

  /* 
   * Create a list with the partial sums this processor has
   */

    send_float = (float *) ZOLTAN_MALLOC(sizeof(float)*wdim*sum_reqsize);
    if (send_float == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      Zoltan_Multifree(__FILE__, __LINE__, 3, &send_float,
                                              &all_leaflist,
                                              &reqsize_all);
      ZOLTAN_TRACE_EXIT(zz, yo);
      return(ZOLTAN_MEMERR);
    }

    for (i=0; i<sum_reqsize; i++) {
      node = Zoltan_Reftree_hash_lookup(zz, hashtab,
                                    &(all_leaflist[i*num_gid_entries]),
                                    hashsize);
      if (node == NULL)
         for (j=0; j<wdim; j++) send_float[i*wdim+j] = 0.0;
      else
         for (j=0; j<wdim; j++) send_float[i*wdim+j] = node->my_sum_weight[j];
    }

  /*
   * Sum the weights over all the processors
   */

    if (reqsize == 0)
      req_weights = (float *) ZOLTAN_MALLOC(sizeof(float)*wdim);
    else
      req_weights = (float *) ZOLTAN_MALLOC(sizeof(float)*wdim*reqsize);
    if (req_weights == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      Zoltan_Multifree(__FILE__, __LINE__, 4, &req_weights,
                                              &send_float,
                                              &all_leaflist,
                                              &reqsize_all);
      ZOLTAN_TRACE_EXIT(zz, yo);
      return(ZOLTAN_MEMERR);
    }

    MPI_Reduce_scatter((void *)send_float, (void *)req_weights, reqsize_all,
                       MPI_FLOAT, MPI_SUM, zz->Communicator);

    ZOLTAN_FREE(&send_float);
    ZOLTAN_FREE(&reqsize_all);

  /*
   * Set the weights this processor requested
   */

    for (i=0; i<count; i++) {
      node = Zoltan_Reftree_hash_lookup(zz, hashtab,
                                  &(all_leaflist[(i+my_start)*num_gid_entries]),
                                  hashsize);
      for (j=0; j<wdim; j++) node->summed_weight[j] = req_weights[i*wdim+j];
    }

    ZOLTAN_FREE(&req_weights);
    ZOLTAN_FREE(&all_leaflist);
  }

  /*
   * All the leaves now have summed_weight set.
   * Sum the weights throughout the tree.
   */

  Zoltan_Reftree_Sum_All_Weights(zz,root,wdim);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return(ZOLTAN_OK);
}
Beispiel #29
0
int main(int argc, char *argv[])
{
    int i, numprocs, rank, size, align_size;
    int skip;
    double latency = 0.0, t_start = 0.0, t_stop = 0.0;
    double timer=0.0;
    double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
    float *sendbuf, *recvbuf;
    int *recvcounts;
    int po_ret;
    size_t bufsize;

    set_header(HEADER);
    set_benchmark_name("osu_scatter");
    enable_accel_support();
    po_ret = process_options(argc, argv);

    if (po_okay == po_ret && none != options.accel) {
        if (init_accel()) {
            fprintf(stderr, "Error initializing device\n");
            exit(EXIT_FAILURE);
        }
    }

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);

    switch (po_ret) {
        case po_bad_usage:
            print_bad_usage_message(rank);
            MPI_Finalize();
            exit(EXIT_FAILURE);
        case po_help_message:
            print_help_message(rank);
            MPI_Finalize();
            exit(EXIT_SUCCESS);
        case po_version_message:
            print_version_message(rank);
            MPI_Finalize();
            exit(EXIT_SUCCESS);
        case po_okay:
            break;
    }

    if(numprocs < 2) {
        if (rank == 0) {
            fprintf(stderr, "This test requires at least two processes\n");
        }

        MPI_Finalize();
        exit(EXIT_FAILURE);
    }

    if (options.max_message_size > options.max_mem_limit) {
        options.max_message_size = options.max_mem_limit;
    }

    if (allocate_buffer((void**)&recvcounts, numprocs*sizeof(int), none)) {
        fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }

    bufsize = sizeof(float)*(options.max_message_size/sizeof(float));
    if (allocate_buffer((void**)&sendbuf, bufsize, options.accel)) {
        fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }
    set_buffer(sendbuf, options.accel, 1, bufsize);

    bufsize = sizeof(float)*((options.max_message_size/numprocs + 1)/sizeof(float));
    if (allocate_buffer((void**)&recvbuf, bufsize,
                options.accel)) {
        fprintf(stderr, "Could Not Allocate Memory [rank %d]\n", rank);
        MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
    }
    set_buffer(recvbuf, options.accel, 0, bufsize);

    print_preamble(rank);

    for(size=1; size*sizeof(float)<= options.max_message_size; size *= 2) {

        if(size > LARGE_MESSAGE_SIZE) {
            skip = SKIP_LARGE;
            options.iterations = options.iterations_large;
        } else {
            skip = SKIP;
        }

        int portion=0, remainder=0;
        portion=size/numprocs;
        remainder=size%numprocs;

        for (i=0; i<numprocs; i++){
            recvcounts[i]=0;
            if(size<numprocs){ 
                if(i<size)
                    recvcounts[i]=1;
            }
            else{
                if((remainder!=0) && (i<remainder)){
                    recvcounts[i]+=1;
                }
                recvcounts[i]+=portion;
            }
        }
        MPI_Barrier(MPI_COMM_WORLD);
        
        timer=0.0;
        for(i=0; i < options.iterations + skip ; i++) {
            t_start = MPI_Wtime();

            MPI_Reduce_scatter( sendbuf, recvbuf, recvcounts, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD );
            t_stop=MPI_Wtime();
            if(i>=skip){

            timer+=t_stop-t_start;
            } 
            MPI_Barrier(MPI_COMM_WORLD);  
        }
        latency = (double)(timer * 1e6) / options.iterations;

        MPI_Reduce(&latency, &min_time, 1, MPI_DOUBLE, MPI_MIN, 0,
                MPI_COMM_WORLD);
        MPI_Reduce(&latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0,
                MPI_COMM_WORLD);
        MPI_Reduce(&latency, &avg_time, 1, MPI_DOUBLE, MPI_SUM, 0,
                MPI_COMM_WORLD);
        avg_time = avg_time/numprocs;

        print_stats(rank, size, avg_time, min_time, max_time);
        MPI_Barrier(MPI_COMM_WORLD);
    }

    free_buffer(recvcounts, none);
    free_buffer(sendbuf, options.accel);
    free_buffer(recvbuf, options.accel);

    MPI_Finalize();

    if (none != options.accel) {
        if (cleanup_accel()) {
            fprintf(stderr, "Error cleaning up device\n");
            exit(EXIT_FAILURE);
        }
    }

    return EXIT_SUCCESS;
}