Пример #1
0
Файл: Comm.cpp Проект: eheien/vq
VCComm::~VCComm(void) {
#ifdef MPI_C_FOUND

    if (updateFieldCounts) delete updateFieldCounts;

    if (updateFieldDisps) delete updateFieldDisps;

    if (updateFieldSendBuf) delete updateFieldSendBuf;

    if (updateFieldRecvBuf) delete updateFieldRecvBuf;

    if (updateFieldSendIDs) delete updateFieldSendIDs;

    if (updateFieldRecvIDs) delete updateFieldRecvIDs;

    if (failBlockSendBuf) delete failBlockSendBuf;

    if (failBlockRecvBuf) delete failBlockRecvBuf;

    if (failBlockCounts) delete failBlockCounts;

    if (failBlockDisps) delete failBlockDisps;

    MPI_Type_free(&block_val_type);
    MPI_Op_free(&bv_min_op);
    MPI_Op_free(&bv_max_op);
    MPI_Op_free(&bv_sum_op);
    MPI_Type_free(&element_sweep_type);
#endif
}
Пример #2
0
VCComm::~VCComm(void) {
#ifdef MPI_C_FOUND

    // these are declared with "new type[]", so use delete []:
    //
    if (updateFieldCounts) delete [] updateFieldCounts;

    if (updateFieldDisps) delete [] updateFieldDisps;

    if (updateFieldSendBuf) delete [] updateFieldSendBuf;

    if (updateFieldRecvBuf) delete [] updateFieldRecvBuf;

    if (updateFieldSendIDs) delete [] updateFieldSendIDs;

    if (updateFieldRecvIDs) delete [] updateFieldRecvIDs;

    if (failBlockSendBuf) delete [] failBlockSendBuf;

    if (failBlockRecvBuf) delete [] failBlockRecvBuf;

    if (failBlockCounts) delete [] failBlockCounts;

    if (failBlockDisps) delete [] failBlockDisps;

    MPI_Type_free(&block_val_type);
    MPI_Op_free(&bv_min_op);
    MPI_Op_free(&bv_max_op);
    MPI_Op_free(&bv_sum_op);
    MPI_Type_free(&element_sweep_type);
#endif
}
Пример #3
0
int main( int argc, char **argv )
{
    int      *sendbuf;
    int      block_size;
    int      *recvbuf;
    int      size, rank, i;
    MPI_Comm comm;
    MPI_Op left_op, right_op, nc_sum_op;

    MTest_Init( &argc, &argv );
    comm = MPI_COMM_WORLD;

    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );

#if MTEST_HAVE_MIN_MPI_VERSION(2,2)
    /* MPI_Reduce_scatter block was added in MPI-2.2 */

    MPI_Op_create(&left, 0/*non-commutative*/, &left_op);
    MPI_Op_create(&right, 0/*non-commutative*/, &right_op);
    MPI_Op_create(&nc_sum, 0/*non-commutative*/, &nc_sum_op);

    for (block_size = 1; block_size < MAX_BLOCK_SIZE; block_size *= 2) {
        sendbuf = (int *) malloc( block_size * size * sizeof(int) );
        recvbuf = malloc( block_size * sizeof(int) );

        for (i=0; i<(size*block_size); i++) 
            sendbuf[i] = rank + i;
        for (i=0; i<block_size; i++)
            recvbuf[i] = 0xdeadbeef;

        MPI_Reduce_scatter_block( sendbuf, recvbuf, block_size, MPI_INT, left_op, comm );
        for (i = 0; i < block_size; ++i)
            if (recvbuf[i] != (rank * block_size + i)) ++err;

        MPI_Reduce_scatter_block( sendbuf, recvbuf, block_size, MPI_INT, right_op, comm );
        for (i = 0; i < block_size; ++i)
            if (recvbuf[i] != ((size - 1) + (rank * block_size) + i)) ++err;

        MPI_Reduce_scatter_block( sendbuf, recvbuf, block_size, MPI_INT, nc_sum_op, comm );
        for (i = 0; i < block_size; ++i) {
            int x = rank * block_size + i;
            if (recvbuf[i] != (size*x + (size-1)*size/2)) ++err;
        }

        free(recvbuf);
        free(sendbuf);
    }

    MPI_Op_free(&left_op);
    MPI_Op_free(&right_op);
    MPI_Op_free(&nc_sum_op);
#endif 

    MTest_Finalize( err );
    MPI_Finalize( );

    return err;
}
Пример #4
0
int main(int argc, char *argv[])
{
    MPI_Op c_uop = MPI_OP_NULL;
    MPI_Op nc_uop = MPI_OP_NULL;
    int is_commutative = 0;

    MTest_Init(&argc, &argv);

    /* make sure that user-define ops work too */
    MPI_Op_create(&user_op, 1 /*commute */ , &c_uop);
    MPI_Op_create(&user_op, 0 /*!commute */ , &nc_uop);

#if MTEST_HAVE_MIN_MPI_VERSION(2,2)
    /* this function was added in MPI-2.2 */

#define CHECK_COMMUTATIVE(op_)                      \
    do {                                            \
        MPI_Op_commutative((op_), &is_commutative); \
        if (!is_commutative) { ++errs; }            \
    } while (0)

    /* Check all predefined reduction operations for commutivity.
     * This list is from section 5.9.2 of the MPI-2.1 standard */
    CHECK_COMMUTATIVE(MPI_MAX);
    CHECK_COMMUTATIVE(MPI_MIN);
    CHECK_COMMUTATIVE(MPI_SUM);
    CHECK_COMMUTATIVE(MPI_PROD);
    CHECK_COMMUTATIVE(MPI_LAND);
    CHECK_COMMUTATIVE(MPI_BAND);
    CHECK_COMMUTATIVE(MPI_LOR);
    CHECK_COMMUTATIVE(MPI_BOR);
    CHECK_COMMUTATIVE(MPI_LXOR);
    CHECK_COMMUTATIVE(MPI_BXOR);
    CHECK_COMMUTATIVE(MPI_MAXLOC);
    CHECK_COMMUTATIVE(MPI_MINLOC);

#undef CHECK_COMMUTATIVE

    MPI_Op_commutative(c_uop, &is_commutative);
    if (!is_commutative) {
        ++errs;
    }

    /* also check our non-commutative user defined operation */
    MPI_Op_commutative(nc_uop, &is_commutative);
    if (is_commutative) {
        ++errs;
    }
#endif

    MPI_Op_free(&nc_uop);
    MPI_Op_free(&c_uop);

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;
}
Пример #5
0
void Sort<Hilbert::HilbertIndices,unsigned int>::binsort()
{
  // Find the global min and max from all the
  // processors.  Do this using MPI_Allreduce.
  Hilbert::HilbertIndices
    local_min,  local_max,
    global_min, global_max;

  if (_data.empty())
    {
      local_min.rack0 = local_min.rack1 = local_min.rack2 = static_cast<Hilbert::inttype>(-1);
      local_max.rack0 = local_max.rack1 = local_max.rack2 = 0;
    }
  else
    {
      local_min = _data.front();
      local_max = _data.back();
    }

  MPI_Op hilbert_max, hilbert_min;

  MPI_Op_create       ((MPI_User_function*)__hilbert_max_op, true, &hilbert_max);
  MPI_Op_create       ((MPI_User_function*)__hilbert_min_op, true, &hilbert_min);

  // Communicate to determine the global
  // min and max for all processors.
  MPI_Allreduce(&local_min,
                &global_min,
                1,
                Parallel::StandardType<Hilbert::HilbertIndices>(),
                hilbert_min,
                this->comm().get());

  MPI_Allreduce(&local_max,
                &global_max,
                1,
                Parallel::StandardType<Hilbert::HilbertIndices>(),
                hilbert_max,
                this->comm().get());

  MPI_Op_free   (&hilbert_max);
  MPI_Op_free   (&hilbert_min);

  // Bin-Sort based on the global min and max
  Parallel::BinSorter<Hilbert::HilbertIndices> bs(this->comm(),_data);
  bs.binsort(_n_procs, global_max, global_min);

  // Now save the local bin sizes in a vector so
  // we don't have to keep around the BinSorter.
  for (processor_id_type i=0; i<_n_procs; ++i)
    _local_bin_sizes[i] = bs.sizeof_bin(i);
}
Пример #6
0
/*@C
  VecFinalizePackage - This function finalizes everything in the Vec package. It is called
  from PetscFinalize().

  Level: developer

.keywords: Vec, initialize, package
.seealso: PetscInitialize()
@*/
PetscErrorCode  VecFinalizePackage(void)
{
  PetscErrorCode ierr;

  PetscFunctionBegin;
  ierr = PetscFunctionListDestroy(&VecList);CHKERRQ(ierr);
  ierr = MPI_Op_free(&PetscSplitReduction_Op);CHKERRQ(ierr);
  ierr = MPI_Op_free(&VecMax_Local_Op);CHKERRQ(ierr);
  ierr = MPI_Op_free(&VecMin_Local_Op);CHKERRQ(ierr);
  VecPackageInitialized = PETSC_FALSE;
  VecRegisterAllCalled  = PETSC_FALSE;
  PetscFunctionReturn(0);
}
Пример #7
0
int
dgraphAllreduceMaxSum2 (
Gnum *                      reduloctab,           /* Pointer to array of local Gnum data   */
Gnum *                      reduglbtab,           /* Pointer to array of reduced Gnum data */
int                         redumaxsumnbr,        /* Number of max + sum Gnum operations   */
MPI_User_function *         redufuncptr,          /* Pointer to operator function          */
MPI_Comm                    proccomm)             /* Communicator to be used for reduction */
{
  MPI_Datatype      redutypedat;                  /* Data type for finding best separator              */
  MPI_Op            reduoperdat;                  /* Handle of MPI operator for finding best separator */

  if ((MPI_Type_contiguous (redumaxsumnbr, GNUM_MPI, &redutypedat) != MPI_SUCCESS) ||
      (MPI_Type_commit (&redutypedat)                              != MPI_SUCCESS) ||
      (MPI_Op_create (redufuncptr, 1, &reduoperdat)                != MPI_SUCCESS)) {
    errorPrint ("dgraphAllreduceMaxSum: communication error (1)");
    return     (1);
  }

  if (MPI_Allreduce (reduloctab, reduglbtab, 1, redutypedat, reduoperdat, proccomm) != MPI_SUCCESS) {
    errorPrint ("dgraphAllreduceMaxSum: communication error (2)");
    return     (1);
  }

  if ((MPI_Op_free   (&reduoperdat) != MPI_SUCCESS) ||
      (MPI_Type_free (&redutypedat) != MPI_SUCCESS)) {
    errorPrint ("dgraphAllreduceMaxSum: communication error (3)");
    return     (1);
  }

  return (0);
}
Пример #8
0
int main( int argc, char **argv )
{
    int              rank, size, i;
    int              data;
    int              errors=0;
    int              result = -100;
    int              correct_result;
    MPI_Op           op;

    MTest_Init( &argc, &argv );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    MPI_Comm_size( MPI_COMM_WORLD, &size );

    data = rank;
    MPI_Op_create( (MPI_User_function *)addem, 1, &op );
    MPI_Reduce ( &data, &result, 1, MPI_INT, op, 0, MPI_COMM_WORLD );
    MPI_Bcast  ( &result, 1, MPI_INT, 0, MPI_COMM_WORLD );
    MPI_Op_free( &op );
    correct_result = 0;
    for(i=0;i<size;i++) 
      correct_result += i;
    if (result != correct_result) errors++;

    MTest_Finalize( errors );
    MPI_Finalize();
    return MTestReturnValue( errors );
}
Пример #9
0
void mpiReduce_pickerV3(float       *resDataAbsMaxPaddedGlobal,
                        size_t      *resDataMaxIndPaddedGlobal,
                        size_t      resSize,
                        eXCorrMerge bAbs)
{
    resSizeMPI = resSize;

    MPI_Datatype mpiType;
    MPI_Type_contiguous((int) 2, MPI_FLOAT, &mpiType);
    MPI_Type_commit(&mpiType);

    float     *resDataGlobalNode = NULL;
    float     *resDataGlobalNodeReduce = NULL;

    array_new(resDataGlobalNode, 2*resSize);
    array_new(resDataGlobalNodeReduce, 2*resSize);

    memcpy(resDataGlobalNode,
           resDataAbsMaxPaddedGlobal,
           resSize*sizeof(float));
    mpiOp_array_typecast(resDataMaxIndPaddedGlobal,
                         resDataGlobalNode+resSize,
                         resSize);

    MPI_Op mpiOp;

    switch (bAbs) {
        case XCORR_MERGE_NEGATIVE:
            MPI_Op_create((MPI_User_function *) mpiOp_xcorrMergeResultGlobalV3Abs,
                          1,            // commutative
                          &mpiOp);
            break;
        case XCORR_MERGE_POSITIVE:
            MPI_Op_create((MPI_User_function *) mpiOp_xcorrMergeResultGlobalV3,
                          1,            // commutative
                          &mpiOp);
            break;
        default:
            ERROR("mpiReduce_pickerV3", "unsupported merging mode");
    }
    MPI_Reduce(resDataGlobalNode,
               resDataGlobalNodeReduce,
               (int) resSize,       // resSize elements of size 2*sizeof(float)
               mpiType,
               mpiOp,
               0,
               MPI_COMM_WORLD);
    MPI_Op_free(&mpiOp);

    memcpy(resDataAbsMaxPaddedGlobal,
           resDataGlobalNodeReduce,
           resSize*sizeof(float));
    mpiOp_array_typecast(resDataGlobalNodeReduce+resSize,
                         resDataMaxIndPaddedGlobal,
                         resSize);

    array_delete(resDataGlobalNode);
    array_delete(resDataGlobalNodeReduce);
    MPI_Type_free(&mpiType);
}
Пример #10
0
int main( int argc, char **argv )
{
    int              rank, size;
    int              data;
    int              errors=0;
    int              result = -100;
    MPI_Op           op;

    MPI_Init( &argc, &argv );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    MPI_Comm_size( MPI_COMM_WORLD, &size );

    data = rank;

    MPI_Op_create( (MPI_User_function*)assoc, 0, &op );
    MPI_Reduce ( &data, &result, 1, MPI_INT, op, size-1, MPI_COMM_WORLD );
    MPI_Bcast  ( &result, 1, MPI_INT, size-1, MPI_COMM_WORLD );
    MPI_Op_free( &op );
    if (result == BAD_ANSWER) errors++;

    if (errors)
      printf( "[%d] done with ERRORS(%d)!\n", rank, errors );
    else {
	if (rank == 0) 
	    printf(" No Errors\n");
    }

    MPI_Finalize();

    return errors;
}
Пример #11
0
int main( int argc, char *argv[] )
{
    int errs = 0;
#if MTEST_HAVE_MIN_MPI_VERSION(2,2)
    int i;
    int *inbuf = NULL;
    int *inoutbuf = NULL;
    int count = -1;
    MPI_Op uop = MPI_OP_NULL;
#endif

    MTest_Init(&argc, &argv);
#if MTEST_HAVE_MIN_MPI_VERSION(2,2)
    /* this function was added in MPI-2.2 */

    inbuf = malloc(sizeof(int) * MAX_BUF_ELEMENTS);
    inoutbuf = malloc(sizeof(int) * MAX_BUF_ELEMENTS);

    for (count = 0; count < MAX_BUF_ELEMENTS; count > 0 ? count*=2 : count++) {
        for (i = 0; i < count; ++i) {
            inbuf[i] = i;
            inoutbuf[i] = i;
        }
        MPI_Reduce_local(inbuf, inoutbuf, count, MPI_INT, MPI_SUM);
        for (i = 0; i < count; ++i)
            if (inbuf[i] != i) {
                ++errs;
            if (inoutbuf[i] != (2*i))
                ++errs;
        }
    }

    /* make sure that user-define ops work too */
    MPI_Op_create(&user_op, 0/*!commute*/, &uop);
    for (count = 0; count < MAX_BUF_ELEMENTS; count > 0 ? count*=2 : count++) {
        for (i = 0; i < count; ++i) {
            inbuf[i] = i;
            inoutbuf[i] = i;
        }
        MPI_Reduce_local(inbuf, inoutbuf, count, MPI_INT, uop);
        errs += uop_errs;
        for (i = 0; i < count; ++i)
            if (inbuf[i] != i) {
                ++errs;
            if (inoutbuf[i] != (3*i))
                ++errs;
        }
    }
    MPI_Op_free(&uop);

    free(inbuf);
    free(inoutbuf);
#endif

    MTest_Finalize(errs);
    MPI_Finalize();
    return 0;
}
Пример #12
0
int main( int argc, char **argv )
{
    MPI_Op op;
    int    i, rank, size, bufsize, errcnt = 0, toterr;
    double *inbuf, *outbuf, value;
    
    MPI_Init( &argc, &argv );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    MPI_Comm_size( MPI_COMM_WORLD, &size );
    MPI_Op_create( (MPI_User_function *)add, 1, &op );
    
    bufsize = 1;
    while (bufsize < 100000) {
	inbuf  = (double *)malloc( bufsize * sizeof(double) );
	outbuf = (double *)malloc( bufsize * sizeof(double) );
	if (! inbuf || ! outbuf) {
	    fprintf( stderr, "Could not allocate buffers for size %d\n",
		     bufsize );
	    errcnt++;
	    break;
	}

	value = (rank & 0x1) ? 1.0 : -1.0;
	for (i=0; i<bufsize; i++) {
	    inbuf[i]  = value;
	    outbuf[i] = 100.0;
	}
	MPI_Allreduce( inbuf, outbuf, bufsize, MPI_DOUBLE, op, 
		       MPI_COMM_WORLD );
	/* Check values */
	value = (size & 0x1) ? -1.0 : 0.0;
	for (i=0; i<bufsize; i++) {
	    if (outbuf[i] != value) {
		if (errcnt < 10) 
		    printf( "outbuf[%d] = %f, should = %f\n", i, outbuf[i],
			    value );
		errcnt ++;
	    }
	}
	free( inbuf );
	free( outbuf );
	bufsize *= 2;
    }
    
    MPI_Allreduce( &errcnt, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    if (rank == 0) {
	if (toterr == 0) 
	    printf( " No Errors\n" );
	else 
	    printf( "*! %d errors!\n", toterr );
    }

    MPI_Op_free( &op );
    MPI_Finalize( );
    return 0;
}
Пример #13
0
Int128 Comm::add_int128(Int128 x) const {
#ifdef OMEGA_H_USE_MPI
  MPI_Op op;
  int commute = true;
  CALL(MPI_Op_create(mpi_add_int128, commute, &op));
  CALL(MPI_Allreduce(MPI_IN_PLACE, &x, sizeof(Int128), MPI_PACKED, op, impl_));
  CALL(MPI_Op_free(&op));
#endif
  return x;
}
Пример #14
0
/*
 * Class:     mpi_Op
 * Method:    Free
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_mpi_Op_free(JNIEnv *env, jobject jthis)
{
    MPI_Op op = (MPI_Op)((*env)->GetLongField(env, jthis, ompi_java.OpHandle));

    if(op != NULL && op != MPI_OP_NULL)
    {
        int rc = MPI_Op_free(&op);
        ompi_java_exceptionCheck(env, rc);
        ((*env)->SetLongField(env,jthis,ompi_java.OpHandle,(long)MPI_OP_NULL));
    }
}
Пример #15
0
 ~user_op()
 {
     if (std::uncaught_exception()) {
         // Ignore failure cases: there are obviously other problems
         // already, and we don't want to cause program termination if
         // MPI_Op_free fails.
         MPI_Op_free(&mpi_op);
     } else {
         BOOST_MPI_CHECK_RESULT(MPI_Op_free, (&mpi_op));
     }
 }
Пример #16
0
        std::pair<MPI_Request, const K*>* buildTwo(Prcndtnr* B, const MPI_Comm& comm) {
            static_assert(std::is_same<typename Prcndtnr::super&, decltype(*this)>::value || std::is_same<typename Prcndtnr::super::super&, decltype(*this)>::value, "Wrong preconditioner");
            std::pair<MPI_Request, const K*>* ret = nullptr;
            constexpr unsigned short N = std::is_same<typename Prcndtnr::super&, decltype(*this)>::value ? 2 : 3;
            unsigned short allUniform[N + 1];
            allUniform[0] = Subdomain<K>::_map.size();
            const Option& opt = *Option::get();
            unsigned short nu = allUniform[1] = (_co ? _co->getLocal() : static_cast<unsigned short>(opt["geneo_nu"]));
            allUniform[2] = static_cast<unsigned short>(~nu);
            if(N == 3)
                allUniform[3] = nu > 0 ? nu : std::numeric_limits<unsigned short>::max();
            {
                MPI_Op op;
#ifdef __MINGW32__
                MPI_Op_create(&f<N>, 1, &op);
#else
                auto f = [](void* in, void* inout, int*, MPI_Datatype*) -> void {
                    HPDDM_LAMBDA_F(in, input, inout, output, N)
                };
                MPI_Op_create(f, 1, &op);
#endif
                MPI_Allreduce(MPI_IN_PLACE, allUniform, N + 1, MPI_UNSIGNED_SHORT, op, comm);
                MPI_Op_free(&op);
            }
            if(nu > 0 || allUniform[1] != 0 || allUniform[2] != std::numeric_limits<unsigned short>::max()) {
                if(!_co) {
                    _co = new CoarseOperator;
                    _co->setLocal(nu);
                }
                double construction = MPI_Wtime();
                if(allUniform[1] == nu && allUniform[2] == static_cast<unsigned short>(~nu))
                    ret = _co->template construction<1, excluded>(Operator(*B, allUniform[0]), comm);
                else if(N == 3 && allUniform[1] == 0 && allUniform[2] == static_cast<unsigned short>(~allUniform[3]))
                    ret = _co->template construction<2, excluded>(Operator(*B, allUniform[0]), comm);
                else
                    ret = _co->template construction<0, excluded>(Operator(*B, allUniform[0]), comm);
                construction = MPI_Wtime() - construction;
                if(_co->getRank() == 0 && opt.val<int>("verbosity") > 0) {
                    std::stringstream ss;
                    ss << std::setprecision(2) << construction;
                    std::string line = " --- coarse operator transferred and factorized by " + to_string(static_cast<int>(opt["master_p"])) + " process" + (static_cast<int>(opt["master_p"]) == 1 ? "" : "es") + " (in " + ss.str() + "s)";
                    std::cout << line << std::endl;
                    std::cout << std::right << std::setw(line.size()) << "(criterion = " + to_string(allUniform[1] == nu && allUniform[2] == static_cast<unsigned short>(~nu) ? nu : (N == 3 && allUniform[2] == static_cast<unsigned short>(~allUniform[3]) ? -_co->getLocal() : 0)) + " -- topology = " + to_string(static_cast<int>(opt["master_topology"])) + " -- distribution = " + to_string(static_cast<int>(opt["master_distribution"])) + ")" << std::endl;
                    std::cout.unsetf(std::ios_base::adjustfield);
                }
            }
            else {
                delete _co;
                _co = nullptr;
            }
            return ret;
        }
Пример #17
0
void ADIO_End(int *error_code)
{
    ADIOI_Flatlist_node *curr, *next;
    ADIOI_Datarep *datarep, *datarep_next;
    
/*    FPRINTF(stderr, "reached end\n"); */

    /* if a default errhandler was set on MPI_FILE_NULL then we need to ensure
     * that our reference to that errhandler is released */
/* Open MPI: The call to PMPI_File_set_errhandler has to be done in romio/src/io_romio_file_open.c
   in routine mca_io_romio_file_close()
*/
#if 0
    PMPI_File_set_errhandler(MPI_FILE_NULL, MPI_ERRORS_RETURN);
#endif

/* delete the flattened datatype list */
    curr = ADIOI_Flatlist;
    while (curr) {
	if (curr->blocklens) ADIOI_Free(curr->blocklens);
	if (curr->indices) ADIOI_Free(curr->indices);
	next = curr->next;
	ADIOI_Free(curr);
	curr = next;
    }
    ADIOI_Flatlist = NULL;

/* free file and info tables used for Fortran interface */
    if (ADIOI_Ftable) ADIOI_Free(ADIOI_Ftable);
#ifndef HAVE_MPI_INFO
    if (MPIR_Infotable) ADIOI_Free(MPIR_Infotable);
#endif


/* free the memory allocated for a new data representation, if any */
    datarep = ADIOI_Datarep_head;
    while (datarep) {
        datarep_next = datarep->next;
        ADIOI_Free(datarep->name);
        ADIOI_Free(datarep);
        datarep = datarep_next;
    }

    if( ADIOI_syshints != MPI_INFO_NULL)
	    MPI_Info_free(&ADIOI_syshints);

    MPI_Op_free(&ADIO_same_amode);

    *error_code = MPI_SUCCESS;
}
Пример #18
0
        inline std::pair<MPI_Request, const K*>* buildTwo(Operator&& A, const MPI_Comm& comm, Container& parm) {
            static_assert(N == 2 || N == 3, "Wrong template parameter");
            std::pair<MPI_Request, const K*>* ret = nullptr;
            unsigned short allUniform[N + 1];
            allUniform[0] = Subdomain<K>::_map.size();
            allUniform[1] = parm[NU];
            allUniform[2] = static_cast<unsigned short>(~parm[NU]);
            if(N == 3)
                allUniform[3] = parm[NU] > 0 ? parm[NU] : std::numeric_limits<unsigned short>::max();
            {
                MPI_Op op;
#ifndef __MINGW32__
                auto f = [](void* in, void* inout, int*, MPI_Datatype*) -> void {
                    HPDDM_LAMBDA_F(in, input, inout, output, N)
                };
                MPI_Op_create(f, 1, &op);
#else
                MPI_Op_create(&f<N>, 1, &op);
#endif
                MPI_Allreduce(MPI_IN_PLACE, allUniform, N + 1, MPI_UNSIGNED_SHORT, op, comm);
                MPI_Op_free(&op);
            }
            A.sparsity(allUniform[0]);
            if(parm[NU] > 0 || allUniform[1] != 0 || allUniform[2] != std::numeric_limits<unsigned short>::max()) {
                if(!_co)
                    _co = new CoarseOperator;

                _co->setLocal(parm[NU]);

                double construction = MPI_Wtime();
                if(allUniform[1] == parm[NU] && allUniform[2] == static_cast<unsigned short>(~parm[NU]))
                    ret = _co->template construction<1, excluded>(A, comm, parm);
                else if(N == 3 && allUniform[1] == 0 && allUniform[2] == static_cast<unsigned short>(~allUniform[3]))
                    ret = _co->template construction<2, excluded>(A, comm, parm);
                else
                    ret = _co->template construction<0, excluded>(A, comm, parm);
                construction = MPI_Wtime() - construction;
                if(_co->getRank() == 0) {
                    std::cout << "                 (" << parm[P] << " process" << (parm[P] > 1 ? "es" : "") << " -- topology = " << parm[TOPOLOGY] << " -- distribution = " << _co->getDistribution() << ")" << std::endl;
                    std::cout << std::scientific << " --- coarse operator transferred and factorized (in " << construction << ")" << std::endl;
                    std::cout << "                                     (criterion: " << (allUniform[1] == parm[NU] && allUniform[2] == static_cast<unsigned short>(~parm[NU]) ? parm[NU] : (N == 3 && allUniform[2] == static_cast<unsigned short>(~allUniform[3]) ? -_co->getLocal() : 0)) << ")" << std::endl;
                }
                _uc = new K[_co->getSizeRHS()];
            }
            return ret;
        }
Пример #19
0
int main(int argc, char *argv[])
{
    int errs = 0;
    int rank, size;
    int minsize = 2, count;
    MPI_Comm comm;
    MPI_Op op;
    int *buf, i;

    MTest_Init(&argc, &argv);

    MPI_Op_create(mysum, 0, &op);

    while (MTestGetIntracommGeneral(&comm, minsize, 1)) {
        if (comm == MPI_COMM_NULL)
            continue;
        MPI_Comm_size(comm, &size);
        MPI_Comm_rank(comm, &rank);

        for (count = 1; count < 65000; count = count * 2) {
            /* Contiguous data */
            buf = (int *) malloc(count * sizeof(int));
            for (i = 0; i < count; i++)
                buf[i] = rank + i;
            MPI_Allreduce(MPI_IN_PLACE, buf, count, MPI_INT, op, comm);
            /* Check the results */
            for (i = 0; i < count; i++) {
                int result = i * size + (size * (size - 1)) / 2;
                if (buf[i] != result) {
                    errs++;
                    if (errs < 10) {
                        fprintf(stderr, "buf[%d] = %d expected %d\n", i, buf[i], result);
                    }
                }
            }
            free(buf);
        }
        MTestFreeComm(&comm);
    }
    MPI_Op_free(&op);

    MTest_Finalize(errs);
    return MTestReturnValue(errs);
}
Пример #20
0
	/**
	 * Reduce all server timers to server master.
	 * @param timer
	 */
	void mpi_reduce_timers(TIMER& timer){
		sip::SIPMPIAttr &attr = sip::SIPMPIAttr::get_instance();
		sip::check(attr.is_server(), "Trying to reduce timer on a non-server rank !");
		long long * timers = timer.get_timers();
		long long * timer_counts = timer.get_timer_count();

		// Data to send to reduce
		long long * sendbuf = new long long[2*timer.max_slots + 1];
		sendbuf[0] = timer.max_slots;
		// The data will be structured as
		// Length of arrays 1 & 2
		// Array1 -> timer_switched_ array
		// Array2 -> timer_list_ array
		std::copy(timer_counts + 0, timer_counts + timer.max_slots, sendbuf+1);
		std::copy(timers + 0, timers + timer.max_slots, sendbuf+1+ timer.max_slots);

		long long * recvbuf = new long long[2*timer.max_slots + 1]();

		int server_master = attr.COMPANY_MASTER_RANK;
		MPI_Comm server_company = attr.company_communicator();

		MPI_Datatype server_timer_reduce_dt; // MPI Type for timer data to be reduced.
		MPI_Op server_timer_reduce_op;	// MPI OP to reduce timer data.
		SIPMPIUtils::check_err(MPI_Type_contiguous(timer.max_slots*2+1, MPI_LONG_LONG, &server_timer_reduce_dt));
		SIPMPIUtils::check_err(MPI_Type_commit(&server_timer_reduce_dt));
		SIPMPIUtils::check_err(MPI_Op_create((MPI_User_function *)server_timer_reduce_op_function, 1, &server_timer_reduce_op));

		SIPMPIUtils::check_err(MPI_Reduce(sendbuf, recvbuf, 1, server_timer_reduce_dt, server_timer_reduce_op, server_master, server_company));

		if (attr.is_company_master()){
			std::copy(recvbuf+1, recvbuf+1+timer.max_slots, timer_counts);
			std::copy(recvbuf+1+timer.max_slots, recvbuf+1+2*timer.max_slots, timers);
		}

		// Cleanup
		delete [] sendbuf;
		delete [] recvbuf;

		SIPMPIUtils::check_err(MPI_Type_free(&server_timer_reduce_dt));
		SIPMPIUtils::check_err(MPI_Op_free(&server_timer_reduce_op));
	}
Пример #21
0
int main(int argc, char **argv)
{
    int rank, size;
    int data;
    int errors = 0;
    int result = -100;
    MPI_Op op;

    MTest_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    data = rank;

    MPI_Op_create((MPI_User_function *) assoc, 0, &op);
    MPI_Reduce(&data, &result, 1, MPI_INT, op, size - 1, MPI_COMM_WORLD);
    MPI_Bcast(&result, 1, MPI_INT, size - 1, MPI_COMM_WORLD);
    MPI_Op_free(&op);
    if (result == BAD_ANSWER)
        errors++;

    MTest_Finalize(errors);
    return MTestReturnValue(errors);
}
Пример #22
0
int split_bin_2level(
  int bins,
  int bin,
  const lwgrp_ring* lev1_ring,
  const lwgrp_logring* lev1_logring,
  const lwgrp_ring* lev2_ring,
  const lwgrp_logring* lev2_logring,
  lwgrp_ring* new_lev1_ring,
  lwgrp_logring* new_lev1_logring,
  lwgrp_ring* new_lev2_ring,
  lwgrp_logring* new_lev2_logring)
{
  int i;

  /* initialize new rings and logrings to empty groups,
   * we'll overwrite these if proc is really in a group */
  lwgrp_ring_set_null(new_lev1_ring);
  lwgrp_ring_set_null(new_lev2_ring);
  lwgrp_logring_build_from_ring(new_lev1_ring, new_lev1_logring);
  lwgrp_logring_build_from_ring(new_lev2_ring, new_lev2_logring);

  if (bins <= 0) {
    return 0;
  }

  /* get our rank within and the size of the parent communicator */
  int comm_size;
  int comm_rank = lev1_ring->comm_rank;
  MPI_Comm_size(lev1_ring->comm, &comm_size);

  /* allocate memory to execute collectives */
  int* reduce_inbuf   = (int*) malloc(bins * sizeof(int));
  int* reduce_outbuf  = (int*) malloc(bins * sizeof(int));
  int* scan_inbuf     = (int*) malloc(2 * bins * sizeof(int));
  int* scan_recvleft  = (int*) malloc(2 * bins * sizeof(int));
  int* scan_recvright = (int*) malloc(2 * bins * sizeof(int));

  /* intiaize all bins to MPI_PROC_NULL, except for our
   * bin in which case we list our rank within comm */
  for (i = 0; i < bins; i++) {
    /* initialize all bins to size(lev1), would like MPI_PROC_NULL,
     * but we use size instead so that reduce(min) does the right thing */
    reduce_inbuf[i] = comm_size;
  }
  if (bin >= 0) {
    reduce_inbuf[bin] = comm_rank;
  }

  /* reduce to node leader to find lowest rank in each bin */
  lwgrp_logring_reduce(
    reduce_inbuf, reduce_outbuf, bins, MPI_INT, MPI_MIN,
    0, lev1_ring, lev1_logring
  );

  /* create the scan type (a rank and a count pair) */
  MPI_Datatype scan_type;
  MPI_Type_contiguous(2, MPI_INT, &scan_type);
  MPI_Type_commit(&scan_type);

  /* double exscan across node leaders to
   * build info for new node leader chains */
  int lev1_rank = lev1_ring->group_rank;
  if (lev1_rank == 0) {
    /* prepare data for input to double scan, for each bin
     * record the lowest rank and a count of either 0 or 1 */
    for (i = 0; i < bins; i++) {
      if (reduce_outbuf[i] != comm_size) {
        scan_inbuf[i*2 + SCAN_RANK]  = reduce_outbuf[i];
        scan_inbuf[i*2 + SCAN_COUNT] = 1;
      } else {
        scan_inbuf[i*2 + SCAN_RANK]  = MPI_PROC_NULL;
        scan_inbuf[i*2 + SCAN_COUNT] = 0;
      }
    }

    /* create the scan operation */
    MPI_Op scan_op;
    int commutative = 0;
    MPI_Op_create(scan_chain, commutative, &scan_op);

    /* execute the double exclusive scan to get next rank and
     * count of ranks to either side for each bin */
    lwgrp_logring_double_exscan(
      scan_inbuf, scan_recvright, scan_inbuf, scan_recvleft,
      bins, scan_type, scan_op, lev2_ring, lev2_logring
    );

    /* if we're on the end of the level 2 group, need to initialize
     * the recv values */
    int lev2_rank = lev2_ring->group_rank;
    int lev2_size = lev2_ring->group_size;
    if (lev2_rank == 0) {
      /* we're on the left end of lev2 group, so we didn't get
       * anything from the left side */
      for (i = 0; i < bins; i++) {
        scan_recvleft[i*2 + SCAN_RANK]  = MPI_PROC_NULL;
        scan_recvleft[i*2 + SCAN_COUNT] = 0;
      }
    }
    if (lev2_rank == lev2_size-1) {
      /* we're on the right end of lev2 group, so we didn't get
       * anything from the right side */
      for (i = 0; i < bins; i++) {
        scan_recvright[i*2 + SCAN_RANK]  = MPI_PROC_NULL;
        scan_recvright[i*2 + SCAN_COUNT] = 0;
      }
    }

    /* free the scan op */
    MPI_Op_free(&scan_op);
  }

  /* broadcast scan results to local comm */
  lwgrp_logring_bcast(scan_recvleft,  bins, scan_type, 0, lev1_ring, lev1_logring);
  lwgrp_logring_bcast(scan_recvright, bins, scan_type, 0, lev1_ring, lev1_logring);

  /* free the scan type */
  MPI_Type_free(&scan_type);

  /* call bin_split on local chain */
  lwgrp_ring_split_bin_radix(bins, bin, lev1_ring, new_lev1_ring);
  lwgrp_logring_build_from_ring(new_lev1_ring, new_lev1_logring);

  /* for each valid bin, all rank 0 procs of new lev1 groups form new lev2 groups */
  if (bin >= 0) {
    int new_lev1_rank = new_lev1_ring->group_rank;
    if (new_lev1_rank == 0) {
      /* extract chain values from scan results */
      MPI_Comm comm = new_lev1_ring->comm;
      int left  = scan_recvleft[2*bin  + SCAN_RANK];
      int right = scan_recvright[2*bin + SCAN_RANK];
      int size  = scan_recvleft[2*bin + SCAN_COUNT] + scan_recvright[2*bin + SCAN_COUNT] + 1;
      int rank  = scan_recvleft[2*bin + SCAN_COUNT];

      /* build chain, then ring, then logring, and finally free chain */
      lwgrp_chain tmp_chain;
      lwgrp_chain_build_from_vals(comm, left, right, size, rank, &tmp_chain);
      lwgrp_ring_build_from_chain(&tmp_chain, new_lev2_ring);
      lwgrp_logring_build_from_ring(new_lev2_ring, new_lev2_logring);
      lwgrp_chain_free(&tmp_chain);
    }
  }

  /* free our temporary memory */
  free(scan_recvright);
  free(scan_recvleft);
  free(scan_inbuf);
  free(reduce_outbuf);
  free(reduce_inbuf);

  return 0;
}
Пример #23
0
F_VOID_FUNC dgamn2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
                     double *A, int *lda, int *rA, int *cA, int *ldia,
                     int *rdest, int *cdest)
#endif
/*
 *  -- V1.1 BLACS routine --
 *  University of Tennessee, May 1, 1996
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Combine amn operation for double precision rectangular matrices.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  SCOPE   (input) Ptr to char
 *          Limit the scope of the operation.
 *          = 'R' :   Operation is performed by a process row.
 *          = 'C' :   Operation is performed by a process column.
 *          = 'A' :   Operation is performed by all processes in grid.
 *
 *  TOP     (input) Ptr to char
 *          Controls fashion in which messages flow within the operation.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (output) Ptr to double precision two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 *  RA      (output) Integer Array, dimension (LDIA, N)
 *          Contains process row that the amn of each element
 *          of A was found on: i.e., rA(1,2) contains the process
 *          row that the amn of A(1,2) was found on.
 *          Values are left on process {rdest, cdest} only, others
 *          may be modified, but not left with interesting data.
 *          If rdest == -1, then result is left on all processes in scope.
 *          If LDIA == -1, this array is not accessed, and need not exist.
 *
 *  CA      (output) Integer Array, dimension (LDIA, N)
 *          Contains process column that the amn of each element
 *          of A was found on: i.e., cA(1,2) contains the process
 *          column that the max/min of A(1,2) was found on.
 *          Values are left on process {rdest, cdest} only, others
 *          may be modified, but not left with interesting data.
 *          If rdest == -1, then result is left on all processes in scope.
 *          If LDIA == -1, this array is not accessed, and need not exist.
 *
 *  LDIA    (input) Ptr to int
 *          If (LDIA == -1), then the arrays RA and CA are not accessed.
 *          ELSE leading dimension of the arrays RA and CA.  LDIA >= M.
 *
 *  RDEST   (input) Ptr to int
 *          The process row of the destination of the amn.
 *          If rdest == -1, then result is left on all processes in scope.
 *
 *  CDEST   (input) Ptr to int
 *          The process column of the destination of the amn.
 *          If rdest == -1, then CDEST ignored.
 *
 * ------------------------------------------------------------------------
 */
{
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);
   MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
                                   MPI_Datatype, int *);
   BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
                                   MPI_Datatype, int *);
   void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
                     int, int);
   void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
                    int, int);
   void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
   void BI_dvvamn(int, char *, char *);
   void BI_dvvamn2(int, char *, char *);
   void BI_dMPI_amn(void *, void *, int *, MPI_Datatype *);
   void BI_dMPI_amn2(void *, void *, int *, MPI_Datatype *);
/*
 *  Variable Declarations
 */
   VVFUNPTR vvop;
   BLACBUFF *bp, *bp2;
   BLACSCONTEXT *ctxt;
   char ttop, tscope;
   int i, j, N, dest, idist, length, tlda, tldia, trdest, ierr;
   int len[2];
   MPI_Aint disp[2];
   MPI_Datatype dtypes[2];
   MPI_Op BlacComb;
   MPI_Datatype MyType;
   BI_DistType *dist, mydist;
   extern BLACBUFF *BI_ActiveQ;
   extern BLACBUFF BI_AuxBuff;

   MGetConTxt(Mpval(ConTxt), ctxt);
   ttop = F2C_CharTrans(top);
   ttop = Mlowcase(ttop);
   tscope = F2C_CharTrans(scope);
   tscope = Mlowcase(tscope);
/*
 *  If the user has set the default combine topology, use it instead of
 *  BLACS default
 */
#ifdef DefCombTop
   if (ttop == ' ') ttop = DefCombTop;
#endif
   if (Mpval(cdest) == -1) trdest = -1;
   else trdest = Mpval(rdest);
#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
               Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
   if (Mpval(ldia) < Mpval(m))
   {
      if (Mpval(ldia) != -1)
         BI_BlacsWarn(Mpval(ConTxt), __LINE__, __FILE__,
                      "LDIA too small (LDIA=%d, but M=%d)", Mpval(ldia),
                      Mpval(m));
   }
#endif
   if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
   else tlda = Mpval(m);
   if (Mpval(ldia) < Mpval(m)) tldia = Mpval(m);
   else tldia = Mpval(ldia);
   switch(tscope)
   {
   case 'r':
      ctxt->scp = &ctxt->rscp;
      if (trdest == -1) dest = -1;
      else dest = Mpval(cdest);
      break;
   case 'c':
      ctxt->scp = &ctxt->cscp;
      dest = trdest;
      break;
   case 'a':
      ctxt->scp = &ctxt->ascp;
      if (trdest == -1) dest = -1;
      else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
      break;
   default:
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
                  tscope);
   }


/*
 * It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
 * topology if we've got one
 */
   if (ttop == ' ')
      if ( (Mpval(m) < 1) || (Mpval(n) < 1) || (ctxt->TopsRepeat) ) ttop = '1';
   N = Mpval(m) * Mpval(n);
/*
 * If process who has amn is to be communicated, must set up distance
 * vector after value vector
 */
   if (Mpval(ldia) != -1)
   {
      vvop = BI_dvvamn;
      length = N * sizeof(double);
      i = length % sizeof(BI_DistType);  /* ensure dist vec aligned correctly */
      if (i) length += sizeof(BI_DistType) - i;
      idist = length;
      length += N * sizeof(BI_DistType);
/*
 *    For performance, insist second buffer is at least 8-byte aligned
 */
      j = 8;
      if (sizeof(double) > j) j = sizeof(double);
      i = length % j;
      if (i) length += j - i;
      i = 2 * length;

      bp = BI_GetBuff(i);
      bp2 = &BI_AuxBuff;
      bp2->Buff = &bp->Buff[length];
      BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
/*
 *    Fill in distance vector
 */
      if (dest == -1) mydist = ctxt->scp->Iam;
      else mydist = (ctxt->scp->Np + ctxt->scp->Iam - dest) % ctxt->scp->Np;
      dist = (BI_DistType *) &bp->Buff[idist];
      for (i=0; i < N; i++) dist[i] = mydist;

/*
 *    Create the MPI datatype holding both user's buffer and distance vector
 */
      len[0] = len[1] = N;
      disp[0] = 0;
      disp[1] = idist;
      dtypes[0] = MPI_DOUBLE;
      dtypes[1] = BI_MpiDistType;
#ifdef ZeroByteTypeBug
      if (N > 0)
      {
#endif
      i = 2;
      ierr=MPI_Type_struct(i, len, disp, dtypes, &MyType);
      ierr=MPI_Type_commit(&MyType);
      bp->N = bp2->N = 1;
      bp->dtype = bp2->dtype = MyType;
#ifdef ZeroByteTypeBug
      }
      else
      {
         bp->N = bp2->N = 0;
         bp->dtype = bp2->dtype = MPI_INT;
      }
#endif
   }
   else
   {
      vvop = BI_dvvamn2;
      length = N * sizeof(double);
/*
 *    If A is contiguous, we can use it as one of our buffers
 */
      if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
      {
         bp = &BI_AuxBuff;
         bp->Buff = (char *) A;
         bp2 = BI_GetBuff(length);
      }
      else
      {
         bp = BI_GetBuff(length*2);
         bp2 = &BI_AuxBuff;
         bp2->Buff = &bp->Buff[length];
         BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
      }
      bp->N = bp2->N = N;
      bp->dtype = bp2->dtype = MPI_DOUBLE;
   }

   switch(ttop)
   {
   case ' ':         /* use MPI's reduction by default */
      i = 1;
      if (Mpval(ldia) == -1)
      {
         ierr=MPI_Op_create(BI_dMPI_amn2, i, &BlacComb);
      }
      else
      {
         ierr=MPI_Op_create(BI_dMPI_amn, i, &BlacComb);
         BI_AuxBuff.Len = N;  /* set this up for the MPI OP wrappers */
      }

      if (trdest != -1)
      {
         ierr=MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, dest,
	 	       ctxt->scp->comm);
         if (ctxt->scp->Iam == dest)
	 {
	    BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
	    if (Mpval(ldia) != -1)
               BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
                            (BI_DistType *) &bp2->Buff[idist],
			    trdest, Mpval(cdest));
	 }
      }
      else
      {
         ierr=MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
		          ctxt->scp->comm);
	 BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
         if (Mpval(ldia) != -1)
            BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
                         (BI_DistType *) &bp2->Buff[idist],
                         trdest, Mpval(cdest));
      }
      ierr=MPI_Op_free(&BlacComb);
      if (Mpval(ldia) != -1)
#ifdef ZeroByteTypeBug
         if (N > 0)
#endif
         ierr=BI_MPI_TYPE_FREE(&MyType);
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
      return;
      break;
   case 'i':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 1);
      break;
   case 'd':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, -1);
      break;
   case 's':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 2);
      break;
   case 'm':
      BI_MringComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nr_co);
      break;
   case '1':
   case '2':
   case '3':
   case '4':
   case '5':
   case '6':
   case '7':
   case '8':
   case '9':
      BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ttop-47);
      break;
   case 'f':
      BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, FULLCON);
      break;
   case 't':
      BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nb_co);
      break;
   case 'h':
/*
 *    Use bidirectional exchange if everyone wants answer
 */
      if ( (trdest == -1) && !(ctxt->TopsCohrnt) )
         BI_BeComb(ctxt, bp, bp2, N, vvop);
      else
         BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, 2);
      break;
   default :
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
                  ttop);
   }

   if (Mpval(ldia) != -1)
#ifdef ZeroByteTypeBug
      if (N > 0)
#endif
      ierr=BI_MPI_TYPE_FREE(&MyType);
/*
 * If I am selected to receive answer
 */
   if ( (ctxt->scp->Iam == dest) || (dest == -1) )
   {
/*
 *    Translate the distances stored in the latter part of bp->Buff into
 *    process grid coordinates, and output these coordinates in the
 *    arrays rA and cA.
 */
      if (Mpval(ldia) != -1)
         BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
                      dist, trdest, Mpval(cdest));
/*
 *    Unpack the amn array
 */
      if (bp != &BI_AuxBuff) BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
   }
}
Пример #24
0
int main( int argc, char **argv )
{
    int              rank, size, i;
    int              data;
    int              errors=0;
    int              result = -100;
    int              correct_result;
    MPI_Op           op_assoc, op_addem;
    MPI_Comm         comm;

    MPI_Init( &argc, &argv );
    MPI_Op_create( (MPI_User_function *)assoc, 0, &op_assoc );
    MPI_Op_create( (MPI_User_function *)addem, 1, &op_addem );

    /* Run this for a variety of communicator sizes */
    while ((comm = GetNextComm()) != MPI_COMM_NULL) {
	MPI_Comm_rank( comm, &rank );
	MPI_Comm_size( comm, &size );

	data = rank;
	
	correct_result = 0;
	for (i=0;i<=rank;i++)
	    correct_result += i;

	MPI_Scan ( &data, &result, 1, MPI_INT, MPI_SUM, comm );
	if (result != correct_result) {
	    fprintf( stderr, "[%d] Error suming ints with scan\n", rank );
	    errors++;
	}

	MPI_Scan ( &data, &result, 1, MPI_INT, MPI_SUM, comm );
	if (result != correct_result) {
	    fprintf( stderr, "[%d] Error summing ints with scan (2)\n", rank );
	    errors++;
	}

	data = rank;
	result = -100;
	MPI_Scan ( &data, &result, 1, MPI_INT, op_addem, comm );
	if (result != correct_result) {
	    fprintf( stderr, "[%d] Error summing ints with scan (userop)\n", 
		     rank );
	    errors++;
	}

	MPI_Scan ( &data, &result, 1, MPI_INT, op_addem, comm );
	if (result != correct_result) {
	    fprintf( stderr, "[%d] Error summing ints with scan (userop2)\n", 
		     rank );
	    errors++;
	}
/*	result = -100;*/
/*	data = rank;*/
/*	MPI_Scan ( &data, &result, 1, MPI_INT, op_assoc, comm );*/
/*	if (result == BAD_ANSWER) {*/
/*	    fprintf( stderr, "[%d] Error scanning with non-commutative op\n",*/
/*		     rank );*/
/*	    errors++;*/
/*	}*/
	MPI_Comm_free( &comm );
    }

    MPI_Op_free( &op_assoc );
    MPI_Op_free( &op_addem );

    if (errors) {
	MPI_Comm_rank( MPI_COMM_WORLD, &rank );
	printf( "[%d] done with ERRORS(%d)!\n", rank, errors );
    }

    Test_Waitforall( );
    MPI_Finalize();
    return errors;
}
Пример #25
0
int Zoltan_LB_Build_PartDist(ZZ *zz)
{
char *yo = "Zoltan_LB_Build_PartDist";
int ierr = ZOLTAN_OK;
int inflag[6], outflag[6] = {0,0,-1,0,0,0};
int global_parts_set = 0;   /* number of procs on which NUM_GLOBAL_PARTS 
                               parameter was set. */
int local_parts_set = 0;    /* number of procs on which NUM_LOCAL_PARTS
                               parameter was set. */
int max_global_parts = 0;   /* Max value of Num_Global_Parts_Param on all 
                               procs. */
int sum_local_parts = 0;    /* Sum of Num_Local_Parts over all procs.
                               Procs on which NUM_LOCAL_PARTS was not
                               set assume zero parts on them.  Thus,
                               sum_local_parts may be < max_global_parts. */
int remaining_procs;        /* Num of procs not setting NUM_LOCAL_PARTS */
int avail_local_parts;      /* max_global_parts - sum_local_parts */
int num_proc = zz->Num_Proc;
int *pdist;
int local_parts = 0;
int *local_parts_params = NULL;
int i, j, cnt, pcnt;
int frac = 0, mod = 0;
MPI_Op op;
MPI_User_function Zoltan_PartDist_MPIOp;
struct Zoltan_part_info *part_sizes=NULL;
float sum_parts, part_total;

  MPI_Op_create(&Zoltan_PartDist_MPIOp,1,&op);

  /* Check whether global parts or local parts parameters were used. */
  inflag[0] = (zz->LB.Num_Global_Parts_Param != -1); 
  inflag[1] = (zz->LB.Num_Local_Parts_Param != -1); 
  inflag[2] =  zz->LB.Num_Global_Parts_Param;
  inflag[3] = ((zz->LB.Num_Local_Parts_Param == -1) 
                    ? 0 : zz->LB.Num_Local_Parts_Param);
  inflag[4] = (zz->LB.Num_Global_Parts_Param != zz->LB.Prev_Global_Parts_Param);
  inflag[5] = (zz->LB.Num_Local_Parts_Param != zz->LB.Prev_Local_Parts_Param);

  MPI_Allreduce(inflag, outflag, 6, MPI_INT, op, zz->Communicator);
  MPI_Op_free(&op);

  if (!outflag[4] && !outflag[5]) {
    /* Parameter values have not changed since last invocation of Zoltan. */
    /* Do not have to change PartDist or Num_Global_Parts. */
    goto End;
  }

  /* Since PartDist is changing, can't reuse old parts.
   * Free LB.Data_Structure to prevent reuse. 
   * Also free LB.PartDist and LB.ProcDist.
   */
  if (zz->LB.Free_Structure != NULL)
    zz->LB.Free_Structure(zz);
  ZOLTAN_FREE(&(zz->LB.PartDist));
  ZOLTAN_FREE(&(zz->LB.ProcDist));

  zz->LB.Prev_Global_Parts_Param = zz->LB.Num_Global_Parts_Param;
  zz->LB.Prev_Local_Parts_Param = zz->LB.Num_Local_Parts_Param;

  global_parts_set = outflag[0];  /* Sum of inflag[0] */
  local_parts_set = outflag[1];   /* Sum of inflag[1] */
  max_global_parts = outflag[2];  /* Max of inflag[2] */
  sum_local_parts = outflag[3];   /* Sum of inflag[3] */

  /* Check whether any parameters were set;
   * No need to build the PartDist array if not. 
   */
  if ((!global_parts_set || (max_global_parts==num_proc)) && !local_parts_set) {
    /* Number of parts == number of procs, uniformly distributed; */
    zz->LB.Num_Global_Parts = num_proc;
    zz->LB.Single_Proc_Per_Part = 1;
  }

  else {
    /* Either NUM_GLOBAL_PARTS is set != num_proc or NUM_LOCAL_PARTS
     * is set.  Build PartDist, distributing parts to processors as 
     * specified. 
     */

    /* error checking. */
    if (local_parts_set) {
      if (!global_parts_set) 
        max_global_parts = sum_local_parts;
      else if (sum_local_parts > max_global_parts) {
        char emsg[256];
        sprintf(emsg, 
                "Sum of NUM_LOCAL_PARTS %d > NUM_GLOBAL_PARTS %d", 
                sum_local_parts, max_global_parts);
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, emsg);
        ierr = ZOLTAN_FATAL;
        goto End;
      }
      else if (sum_local_parts < max_global_parts && 
               local_parts_set == num_proc) {
        char emsg[256];
        sprintf(emsg, 
                "Sum of NUM_LOCAL_PARTS %d < NUM_GLOBAL_PARTS %d", 
                sum_local_parts, max_global_parts);
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, emsg);
        ierr = ZOLTAN_FATAL;
        goto End;
      }
    }

    if (max_global_parts == 0) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Zero parts requested");
      ierr = ZOLTAN_FATAL;
      goto End;
    }

    /* Allocate space for PartDist. */
    zz->LB.PartDist = (int *) ZOLTAN_MALLOC((max_global_parts+1)*sizeof(int));
    if (zz->LB.PartDist == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      goto End;
    }

    pdist = zz->LB.PartDist;
 
    /* Compute the PartDist array. */

    if (!local_parts_set) {
      part_sizes = zz->LB.Part_Info;

      if (part_sizes != NULL){
        /* Use ratio of part sizes to assign parts to procs */

        part_total = 0.0;
        for (i=0; i < zz->LB.Part_Info_Len; i++){
          part_total += part_sizes[i].Size;
        }

        pdist[0] = 0;
        zz->LB.Single_Proc_Per_Part = 1;

        sum_parts = 0.0;

        for (i = 1; i < max_global_parts; i++){

          sum_parts += (part_sizes[i-1].Size / part_total);

          pdist[i] = sum_parts * num_proc;

          if (pdist[i] > pdist[i-1] + 1){
             zz->LB.Single_Proc_Per_Part = 0;
          }
        }
        pdist[max_global_parts] = num_proc;
      }
      else{
        if (max_global_parts > num_proc) {
          /* NUM_LOCAL_PARTS is not set; NUM_GLOBAL_PARTS > num_proc. */
          /* Even distribution of parts to processors. */
          zz->LB.Single_Proc_Per_Part = 1;
          frac = max_global_parts / num_proc;
          mod  = max_global_parts % num_proc;
  
          for (cnt = 0, i = 0; i < num_proc; i++) {
            local_parts = frac + ((num_proc - i) <= mod);
            for (j = 0; j < local_parts; j++)
              pdist[cnt++] = i;
          }
          pdist[cnt] = num_proc;
        }
        else { /* num_proc < max_global_parts */
          /* NUM_LOCAL_PARTS is not set; NUM_GLOBAL_PARTS < num_proc. */
          /* Even distribution of processors to parts. */
          zz->LB.Single_Proc_Per_Part = 0;  /* Parts are spread across procs */
          pdist[0] = 0;
          frac = num_proc / max_global_parts;
          mod  = num_proc % max_global_parts;
          for (i = 1; i < max_global_parts; i++)
            pdist[i] = pdist[i-1] + frac + ((max_global_parts - i) <= mod);
          pdist[max_global_parts] = num_proc;
        }
      }
    }
    else /* local_parts_set */ {

      /* NUM_LOCAL_PARTS is set on at least some processors. */
      /* Distribute parts to processors to match NUM_LOCAL_PARTS
         where specified; distribute remaining parts 
         to processors that didn't specify NUM_LOCAL_PARTS */

      zz->LB.Single_Proc_Per_Part = 1;

      /* Gather the parameter values from all processors. */
      local_parts_params = (int *) ZOLTAN_MALLOC((num_proc+1)* sizeof(int));
      MPI_Allgather(&(zz->LB.Num_Local_Parts_Param), 1, MPI_INT, 
                    local_parts_params, 1, MPI_INT, zz->Communicator);

      /* Compute number of parts not specified by NUM_LOCAL_PARTS */
      /* In MPI_Allreduce above, processors not specifying NUM_LOCAL_PARTS
       * specified contributed zero parts to sum_local_parts.  */

      remaining_procs = num_proc - local_parts_set;
      avail_local_parts = max_global_parts - sum_local_parts;
      if (remaining_procs > 0) {
        frac = avail_local_parts / remaining_procs;
        mod  = avail_local_parts % remaining_procs;
      }

      for (cnt = 0, pcnt = 0, i = 0; i < num_proc; i++)
        if (local_parts_params[i] != -1) {
          /* Fill in processor for its NUM_LOCAL_PARTS parts. */
          for (j = 0; j < local_parts_params[i]; j++)
            pdist[cnt++] = i;
        }
        else {
          /* Equally distribute avail_local_parts among remaining_procs. */
          local_parts = frac + ((remaining_procs - pcnt) <= mod);
          for (j = 0; j < local_parts; j++)
            pdist[cnt++] = i;
          pcnt++;
        }
  
      pdist[cnt] = num_proc;
      ZOLTAN_FREE(&local_parts_params);
    }

    /* Reset Num_Global_Parts.  */
    zz->LB.Num_Global_Parts = max_global_parts;

    if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->LB.PartDist != NULL) {
      printf("[%1d] Debug: LB.PartDist = ", zz->Proc);
      for (i=0; i<=zz->LB.Num_Global_Parts; i++)
        printf("%d ", zz->LB.PartDist[i]);
      
    }
  }

End:
  return ierr;
}
Пример #26
0
static PetscErrorCode TestCellShape(DM dm)
{
  PetscMPIInt    rank;
  PetscInt       dim, c, cStart, cEnd, count = 0;
  ex1_stats_t    stats, globalStats;
  PetscReal      *J, *invJ, min = 0, max = 0, mean = 0, stdev = 0;
  MPI_Comm       comm = PetscObjectComm((PetscObject)dm);
  DM             dmCoarse;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  stats.min = PETSC_MAX_REAL;
  stats.max = PETSC_MIN_REAL;
  stats.sum = stats.squaresum = 0.;
  stats.count = 0;

  ierr = DMGetDimension(dm,&dim);CHKERRQ(ierr);

  ierr = PetscMalloc2(dim * dim, &J, dim * dim, &invJ);CHKERRQ(ierr);

  ierr = DMPlexGetHeightStratum(dm,0,&cStart,&cEnd);CHKERRQ(ierr);
  for (c = cStart; c < cEnd; c++) {
    PetscInt  i;
    PetscReal frobJ = 0., frobInvJ = 0., cond2, cond, detJ;

    ierr = DMPlexComputeCellGeometryAffineFEM(dm,c,NULL,J,invJ,&detJ);CHKERRQ(ierr);

    for (i = 0; i < dim * dim; i++) {
      frobJ += J[i] * J[i];
      frobInvJ += invJ[i] * invJ[i];
    }
    cond2 = frobJ * frobInvJ;
    cond  = PetscSqrtReal(cond2);

    stats.min = PetscMin(stats.min,cond);
    stats.max = PetscMax(stats.max,cond);
    stats.sum += cond;
    stats.squaresum += cond2;
    stats.count++;
  }

  {
    PetscMPIInt    blockLengths[2] = {4,1};
    MPI_Aint       blockOffsets[2] = {offsetof(ex1_stats_t,min),offsetof(ex1_stats_t,count)};
    MPI_Datatype   blockTypes[2]   = {MPIU_REAL,MPIU_INT}, statType;
    MPI_Op         statReduce;

    ierr = MPI_Type_create_struct(2,blockLengths,blockOffsets,blockTypes,&statType);CHKERRQ(ierr);
    ierr = MPI_Type_commit(&statType);CHKERRQ(ierr);
    ierr = MPI_Op_create(ex1_stats_reduce, PETSC_TRUE, &statReduce);CHKERRQ(ierr);
    ierr = MPI_Reduce(&stats,&globalStats,1,statType,statReduce,0,comm);CHKERRQ(ierr);
    ierr = MPI_Op_free(&statReduce);CHKERRQ(ierr);
    ierr = MPI_Type_free(&statType);CHKERRQ(ierr);
  }

  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  if (!rank) {
    count = globalStats.count;
    min = globalStats.min;
    max = globalStats.max;
    mean = globalStats.sum / globalStats.count;
    stdev = PetscSqrtReal(globalStats.squaresum / globalStats.count - mean * mean);
  }
  ierr = PetscPrintf(comm,"Mesh with %d cells, shape condition numbers: min = %g, max = %g, mean = %g, stddev = %g\n", count, (double) min, (double) max, (double) mean, (double) stdev);

  ierr = PetscFree2(J,invJ);CHKERRQ(ierr);

  ierr = DMGetCoarseDM(dm,&dmCoarse);CHKERRQ(ierr);
  if (dmCoarse) {
    ierr = TestCellShape(dmCoarse);CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}
Пример #27
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    int rank, size, root;
    int minsize = 2, count; 
    MPI_Comm      comm;
    int *buf, *bufout;
    MPI_Op op;
    MPI_Datatype mattype;

    MTest_Init( &argc, &argv );

    MPI_Op_create( uop, 0, &op );
    
    while (MTestGetIntracommGeneral( &comm, minsize, 1 )) {
	if (comm == MPI_COMM_NULL) continue;

	MPI_Comm_size( comm, &size );
	MPI_Comm_rank( comm, &rank );

	matSize = size;  /* used by the user-defined operation */
	/* Only one matrix for now */
	count = 1;

	/* A single matrix, the size of the communicator */
	MPI_Type_contiguous( size*size, MPI_INT, &mattype );
	MPI_Type_commit( &mattype );
	
	buf = (int *)malloc( count * size * size * sizeof(int) );
	if (!buf) MPI_Abort( MPI_COMM_WORLD, 1 );
	bufout = (int *)malloc( count * size * size * sizeof(int) );
	if (!bufout) MPI_Abort( MPI_COMM_WORLD, 1 );

	for (root = 0; root < size; root ++) {
	    initMat( comm, buf );
	    MPI_Reduce( buf, bufout, count, mattype, op, root, comm );
	    if (rank == root) {
		errs += isShiftLeft( comm, bufout );
	    }

	    /* Try the same test, but using MPI_IN_PLACE */
	    initMat( comm, bufout );
	    if (rank == root) {
		MPI_Reduce( MPI_IN_PLACE, bufout, count, mattype, op, root, comm );
	    }
	    else {
		MPI_Reduce( bufout, NULL, count, mattype, op, root, comm );
	    }
	    if (rank == root) {
		errs += isShiftLeft( comm, bufout );
	    }

#if MTEST_HAVE_MIN_MPI_VERSION(2,2)
            /* Try one more time without IN_PLACE to make sure we check
             * aliasing correctly */
            if (rank == root) {
                MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
                if (MPI_SUCCESS == MPI_Reduce( bufout, bufout, count, mattype, op, root, comm ))
                    errs++;
            }
#endif
	}

	free( buf );
	free( bufout );
	
	MPI_Type_free( &mattype );

	MTestFreeComm( &comm );
    }

    MPI_Op_free( &op );

    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
}
Пример #28
0
 /* Destroy the two ops that we created */
 ~modeler_t () {
   MPI_Op_free (&MPI_KPI_MAX_OP);
   MPI_Op_free (&MPI_KPI_MIN_OP);
   MPI_Type_free (&MPI_Kpi_pair);
 }
Пример #29
0
int main(int argc, char *argv[])
{
	unsigned char str[154];
	unsigned int arr[] = {9,2,5,8,4,2,4,1,6,9,1,8,9,9,6,1,5,7,0,7,7,4,3,7,6,3,9,5,4,2,3,0,4,4,1,5,3,3,7,2,3,3,7,0,9,4,5,2,8,4,6,\
                              2,1,3,4,1,4,2,6,0,8,5,1,7,3,1,4,4,7,0,5,3,4,4,8,9,1,1,9,8,3,5,1,8,3,4,4,8,3,2,8,1,2,8,7,4,1,8,1,8,0,4,\
                              8,4,2,4,4,5,4,9,1,8,3,4,9,5,6,3,3,1,4,6,4,1,0,2,0,2,5,1,4,8,5,9,9,6,9,4,0,3,6,5,5,9,5,4,2,2,3,7,8,5,9,7};

	long i;

	double t1, t2, Itime;
	int provided;

	/* Allocation */

	v1 = (vector *) malloc (VLEN * sizeof (vector));
	v2 = (vector *) malloc (VLEN * sizeof (vector));
	v3 = (vector *) malloc (VLEN * sizeof (vector));

	fin_sum = (mp_limb_t *) malloc ((2*LIMBS+1) * sizeof (mp_limb_t));
	result = (mp_limb_t *) malloc ((2*LIMBS+1) * sizeof (mp_limb_t));
	q = (mp_limb_t *) malloc (LIMBS * sizeof (mp_limb_t));

	MPI_Init_thread (&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
	MPI_Comm_rank (MPI_COMM_WORLD, &id);
	MPI_Comm_size (MPI_COMM_WORLD, &p);

	MPI_Type_contiguous (2*LIMBS+1, MPI_UNSIGNED_LONG_LONG, &mpntype0);
	MPI_Type_commit (&mpntype0);

	MPI_Type_contiguous (LIMBS, MPI_UNSIGNED_LONG_LONG, &mpntype1);
	MPI_Type_commit (&mpntype1);

	MPI_Op_create ((MPI_User_function *)addmpn, 1, &mpn_sum);

	for (i=0; i<154; ++i)	str[i] = (unsigned char)arr[i];
	mpn_set_str (q, str, 154, 10);
	//if (!id) gmp_printf ("Modulus: %Nd\n", q, LIMBS);

	MPI_Barrier (MPI_COMM_WORLD);

	/* Setting limits for 2 MPI nodes */

	VOffset = BLOCK_LOW(id,p,VLEN);
	VChunk  = BLOCK_SIZE(id,p,VLEN);

	/* Setting limits for NCORES-1 threads */

	for (i=0; i<NCORES-1; ++i)
	{
		VStart[i] = VOffset + BLOCK_LOW(i,NCORES-1,VChunk);
		VEnd[i]   = VOffset + BLOCK_HIGH(i,NCORES-1,VChunk);
	}

	for (i=0; i<VLEN; ++i)	mpn_random (v1[i], LIMBS);
	for (i=0; i<VLEN; ++i)	mpn_random (v2[i], LIMBS);
	for (i=BLOCK_LOW(id,p,VLEN); i<=BLOCK_HIGH(id,p,VLEN); ++i)	mpn_random (v3[i], LIMBS);
		
	MPI_Barrier (MPI_COMM_WORLD);

	t1 = MPI_Wtime ();

	for (i=0; i<NCORES; ++i)
		pthread_create(&threads[i], &attr, VectMul, (void *) i);

	for (i=0; i<NCORES; ++i)
		pthread_join (threads[i], NULL);

	t2 = MPI_Wtime ();
	Itime = t2 - t1;
	if (!id) printf ("Total time taken: %lf\n",Itime);
	
	if (!id) gmp_printf ("Result: %Nd\n", cnum, LIMBS);

	MPI_Op_free(&mpn_sum);
	MPI_Request_free (&Rrqst);
	MPI_Request_free (&Srqst);
	MPI_Finalize ();

	return 0;	
}
Пример #30
0
HYPRE_Int
hypre_MPI_Op_free( hypre_MPI_Op *op )
{
   return (HYPRE_Int) MPI_Op_free(op);
}