Esempio n. 1
0
int smpi_coll_tuned_reduce_ompi_binomial( void *sendbuf, void *recvbuf,
                                           int count, MPI_Datatype datatype,
                                           MPI_Op  op, int root,
                                           MPI_Comm  comm)
{

    uint32_t segsize=0;
    int segcount = count;
    size_t typelng;

    const double a1 =  0.6016 / 1024.0; /* [1/B] */
    const double b1 =  1.3496;

//    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    typelng= smpi_datatype_size( datatype);
    int communicator_size = smpi_comm_size(comm);
    size_t message_size = typelng * count; 
    if (((communicator_size < 8) && (message_size < 20480)) ||
               (message_size < 2048) || (count <= 1)) {
        /* Binomial_0K */
        segsize = 0;
    } else if (communicator_size > (a1 * message_size + b1)) {
        // Binomial_1K 
        segsize = 1024;
    }

    XBT_DEBUG("coll:tuned:reduce_intra_binomial rank %d ss %5d",
                 smpi_comm_rank(comm), segsize);
    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

    return smpi_coll_tuned_ompi_reduce_generic( sendbuf, recvbuf, count, datatype, 
                                           op, root, comm, 
                                           ompi_coll_tuned_topo_build_in_order_bmtree(comm, root), 
                                           segcount, 0);
}
Esempio n. 2
0
int
smpi_coll_tuned_scatter_ompi_binomial(void *sbuf, int scount,
				       MPI_Datatype sdtype,
				       void *rbuf, int rcount,
				       MPI_Datatype rdtype,
				       int root,
				       MPI_Comm comm
				       )
{
    int line = -1;
    int i;
    int rank;
    int vrank;
    int size;
    int total_send = 0;
    char *ptmp     = NULL;
    char *tempbuf  = NULL;
    int err;
    ompi_coll_tree_t* bmtree;
    MPI_Status status;
    MPI_Aint sextent, slb, strue_lb, strue_extent; 
    MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;

    size = smpi_comm_size(comm);
    rank = smpi_comm_rank(comm);

    XBT_DEBUG(
                 "smpi_coll_tuned_scatter_ompi_binomial rank %d", rank);

    /* create the binomial tree */
    
//    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
    bmtree =  ompi_coll_tuned_topo_build_in_order_bmtree( comm, root);//ompi_ data->cached_in_order_bmtree;

    smpi_datatype_extent(sdtype, &slb, &sextent);
    smpi_datatype_extent(sdtype, &strue_lb, &strue_extent);
    smpi_datatype_extent(rdtype, &rlb, &rextent);
    smpi_datatype_extent(rdtype, &rtrue_lb, &rtrue_extent);

    vrank = (rank - root + size) % size;

    if (rank == root) {
	if (0 == root) {
	    /* root on 0, just use the send buffer */
	    ptmp = (char *) sbuf;
	    if (rbuf != MPI_IN_PLACE) {
		/* local copy to rbuf */
		err = smpi_datatype_copy(sbuf, scount, sdtype,
				      rbuf, rcount, rdtype);
		if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
	    }
	} else {
	    /* root is not on 0, allocate temp buffer for send */
	    tempbuf = (char *) malloc(strue_extent + (scount*size - 1) * sextent);
	    if (NULL == tempbuf) {
		err = MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
	    }

	    ptmp = tempbuf - slb;

	    /* and rotate data so they will eventually in the right place */
	    err = smpi_datatype_copy((char *) sbuf + sextent*root*scount, scount*(size-root), sdtype,
            ptmp, scount*(size-root), sdtype);
	    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }


	    err = smpi_datatype_copy((char*)sbuf, scount*root, sdtype,
						 ptmp + sextent*scount*(size - root), scount*root, sdtype);
	    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

	    if (rbuf != MPI_IN_PLACE) {
		/* local copy to rbuf */
		err = smpi_datatype_copy(ptmp, scount, sdtype,
				      rbuf, rcount, rdtype);
		if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
	    }
	}
	total_send = scount;
    } else if (!(vrank % 2)) {
	/* non-root, non-leaf nodes, allocte temp buffer for recv
	 * the most we need is rcount*size/2 */
	tempbuf = (char *) malloc(rtrue_extent + (rcount*size - 1) * rextent);
	if (NULL == tempbuf) {
	    err= MPI_ERR_OTHER; line = __LINE__; goto err_hndl;
	}

	ptmp = tempbuf - rlb;

	sdtype = rdtype;
	scount = rcount;
	sextent = rextent;
	total_send = scount;
    } else {
	/* leaf nodes, just use rbuf */
	ptmp = (char *) rbuf;
    }

    if (!(vrank % 2)) {
	if (rank != root) {
	    /* recv from parent on non-root */
	    smpi_mpi_recv(ptmp, rcount*size, rdtype, bmtree->tree_prev,
				    COLL_TAG_SCATTER, comm, &status);
	    /* local copy to rbuf */
	    err = smpi_datatype_copy(ptmp, scount, sdtype,
				  rbuf, rcount, rdtype);
	}
	/* send to children on all non-leaf */
	for (i = 0; i < bmtree->tree_nextsize; i++) {
	    int mycount = 0, vkid;
	    /* figure out how much data I have to send to this child */
	    vkid = (bmtree->tree_next[i] - root + size) % size;
	    mycount = vkid - vrank;
	    if (mycount > (size - vkid))
		mycount = size - vkid;
	    mycount *= scount;

	    smpi_mpi_send(ptmp + total_send*sextent, mycount, sdtype,
				    bmtree->tree_next[i],
				    COLL_TAG_SCATTER,
				     comm);

	    total_send += mycount;
	}

	if (NULL != tempbuf) 
	    free(tempbuf);
    } else {
	/* recv from parent on leaf nodes */
	smpi_mpi_recv(ptmp, rcount, rdtype, bmtree->tree_prev,
				COLL_TAG_SCATTER, comm, &status);
    }
    //!FIXME : store the tree, as done in ompi, instead of calculating it each time ?
    xbt_free(bmtree);

    return MPI_SUCCESS;

 err_hndl:
    if (NULL != tempbuf)
	free(tempbuf);

    XBT_DEBUG(  "%s:%4d\tError occurred %d, rank %2d",
		 __FILE__, line, err, rank);
    return err;
}
Esempio n. 3
0
int Coll_gather_ompi_binomial::gather(void* sbuf, int scount, MPI_Datatype sdtype, void* rbuf, int rcount,
                                      MPI_Datatype rdtype, int root, MPI_Comm comm)
{
    int line = -1;
    int i;
    int rank;
    int vrank;
    int size;
    int total_recv = 0;
    char *ptmp     = NULL;
    char *tempbuf  = NULL;
    int err;
    ompi_coll_tree_t* bmtree;
    MPI_Status status;
    MPI_Aint sextent, slb, strue_lb, strue_extent;
    MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;


    size = comm->size();
    rank = comm->rank();

    XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d", rank);

    /* create the binomial tree */
   // COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
    bmtree = ompi_coll_tuned_topo_build_in_order_bmtree(comm, root);
    // data->cached_in_order_bmtree;

    sdtype->extent(&slb, &sextent);
    sdtype->extent(&strue_lb, &strue_extent);

    vrank = (rank - root + size) % size;

    if (rank == root) {
        rdtype->extent(&rlb, &rextent);
        rdtype->extent(&rtrue_lb, &rtrue_extent);
        if (0 == root) {
          /* root on 0, just use the recv buffer */
          ptmp = (char*)rbuf;
          if (sbuf != MPI_IN_PLACE) {
            err = Datatype::copy(sbuf, scount, sdtype, ptmp, rcount, rdtype);
            if (MPI_SUCCESS != err) {
              line = __LINE__;
              goto err_hndl;
            }
          }
        } else {
          /* root is not on 0, allocate temp buffer for recv,
           * rotate data at the end */
          tempbuf = (char*)smpi_get_tmp_recvbuffer(rtrue_extent + (rcount * size - 1) * rextent);
          if (NULL == tempbuf) {
            err  = MPI_ERR_OTHER;
            line = __LINE__;
            goto err_hndl;
          }

          ptmp = tempbuf - rlb;
          if (sbuf != MPI_IN_PLACE) {
            /* copy from sbuf to temp buffer */
            err = Datatype::copy(sbuf, scount, sdtype, ptmp, rcount, rdtype);
            if (MPI_SUCCESS != err) {
              line = __LINE__;
              goto err_hndl;
            }
          } else {
            /* copy from rbuf to temp buffer  */
            err = Datatype::copy((char*)rbuf + rank * rextent * rcount, rcount, rdtype, ptmp, rcount, rdtype);
            if (MPI_SUCCESS != err) {
              line = __LINE__;
              goto err_hndl;
            }
          }
        }
        total_recv = rcount;
    } else if (!(vrank % 2)) {
      /* other non-leaf nodes, allocate temp buffer for data received from
       * children, the most we need is half of the total data elements due
       * to the property of binimoal tree */
      tempbuf = (char*)smpi_get_tmp_sendbuffer(strue_extent + (scount * size - 1) * sextent);
      if (NULL == tempbuf) {
        err  = MPI_ERR_OTHER;
        line = __LINE__;
        goto err_hndl;
      }

      ptmp = tempbuf - slb;
      /* local copy to tempbuf */
      err = Datatype::copy(sbuf, scount, sdtype, ptmp, scount, sdtype);
      if (MPI_SUCCESS != err) {
        line = __LINE__;
        goto err_hndl;
      }

      /* use sdtype,scount as rdtype,rdcount since they are ignored on
       * non-root procs */
      rdtype     = sdtype;
      rcount     = scount;
      rextent    = sextent;
      total_recv = rcount;
    } else {
      /* leaf nodes, no temp buffer needed, use sdtype,scount as
       * rdtype,rdcount since they are ignored on non-root procs */
      ptmp       = (char*)sbuf;
      total_recv = scount;
    }

    if (!(vrank % 2)) {
      /* all non-leaf nodes recv from children */
      for (i = 0; i < bmtree->tree_nextsize; i++) {
        int mycount = 0, vkid;
        /* figure out how much data I have to send to this child */
        vkid    = (bmtree->tree_next[i] - root + size) % size;
        mycount = vkid - vrank;
        if (mycount > (size - vkid))
          mycount = size - vkid;
        mycount *= rcount;

        XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d recv %d mycount = %d", rank, bmtree->tree_next[i],
                  mycount);

        Request::recv(ptmp + total_recv * rextent, mycount, rdtype, bmtree->tree_next[i], COLL_TAG_GATHER, comm,
                      &status);

        total_recv += mycount;
      }
    }

    if (rank != root) {
      /* all nodes except root send to parents */
      XBT_DEBUG("smpi_coll_tuned_gather_ompi_binomial rank %d send %d count %d\n", rank, bmtree->tree_prev, total_recv);

      Request::send(ptmp, total_recv, sdtype, bmtree->tree_prev, COLL_TAG_GATHER, comm);
  }
    if (rank == root) {
      if (root != 0) {
        /* rotate received data on root if root != 0 */
        err = Datatype::copy(ptmp, rcount * (size - root), rdtype, (char*)rbuf + rextent * root * rcount,
                             rcount * (size - root), rdtype);
        if (MPI_SUCCESS != err) {
          line = __LINE__;
          goto err_hndl;
        }

        err = Datatype::copy(ptmp + rextent * rcount * (size - root), rcount * root, rdtype, (char*)rbuf, rcount * root,
                             rdtype);
        if (MPI_SUCCESS != err) {
          line = __LINE__;
          goto err_hndl;
        }

        smpi_free_tmp_buffer(tempbuf);
      }
    } else if (!(vrank % 2)) {
      /* other non-leaf nodes */
      smpi_free_tmp_buffer(tempbuf);
    }
    ompi_coll_tuned_topo_destroy_tree(&bmtree);
    return MPI_SUCCESS;

 err_hndl:
    if (NULL != tempbuf)
      smpi_free_tmp_buffer(tempbuf);

    XBT_DEBUG("%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank);
    return err;
}