Пример #1
0
/* ****************************************************************** */
int MPIDO_Allgather_allreduce(const void *sendbuf,
			      int sendcount,
			      MPI_Datatype sendtype,
			      void *recvbuf,
			      int recvcount,
			      MPI_Datatype recvtype,
			      MPI_Aint send_true_lb,
			      MPI_Aint recv_true_lb,
			      size_t send_size,
			      size_t recv_size,
			      MPID_Comm * comm_ptr,
                              int *mpierrno)

{
  int rc, rank;
  char *startbuf = NULL;
  char *destbuf = NULL;

  rank = comm_ptr->rank;

  startbuf   = (char *) recvbuf + recv_true_lb;
  destbuf    = startbuf + rank * send_size;

  memset(startbuf, 0, rank * send_size);
  memset(destbuf + send_size, 0, recv_size - (rank + 1) * send_size);

  if (sendbuf != MPI_IN_PLACE)
  {
    char *outputbuf = (char *) sendbuf + send_true_lb;
    memcpy(destbuf, outputbuf, send_size);
  }
  /* TODO: Change to PAMI */
  rc = MPIDO_Allreduce(MPI_IN_PLACE,
		       startbuf,
		       recv_size/sizeof(int),
		       MPI_INT,
		       MPI_BOR,
		       comm_ptr,
           mpierrno);

  return rc;
}
Пример #2
0
int MPIDO_Scatter(void *sendbuf,
                  int sendcount,
                  MPI_Datatype sendtype,
                  void *recvbuf,
                  int recvcount,
                  MPI_Datatype recvtype,
                  int root,
                  MPID_Comm * comm)
{
  MPIDO_Embedded_Info_Set * properties = &(comm->dcmf.properties);
  MPID_Datatype * data_ptr;
  MPI_Aint true_lb = 0;
  char *sbuf = sendbuf, *rbuf = recvbuf;
  int contig, nbytes = 0, rc = 0;
  int rank = comm->rank;
  int success = 1;

  if (rank == root)
  {
    if (recvtype != MPI_DATATYPE_NULL && recvcount >= 0)
    {
      MPIDI_Datatype_get_info(sendcount, sendtype, contig,
                              nbytes, data_ptr, true_lb);
      if (!contig) success = 0;
    }
    else
      success = 0;

    if (success)
    {
      if (recvtype != MPI_DATATYPE_NULL && recvcount >= 0)
      {
        MPIDI_Datatype_get_info(recvcount, recvtype, contig,
                                nbytes, data_ptr, true_lb);
        if (!contig) success = 0;
      }
      else success = 0;
    }
  }

  else
  {
    if (sendtype != MPI_DATATYPE_NULL && sendcount >= 0)
    {
      MPIDI_Datatype_get_info(recvcount, recvtype, contig,
                              nbytes, data_ptr, true_lb);
      if (!contig) success = 0;
    }
    else
      success = 0;
  }

  if (MPIDO_INFO_ISSET(properties, MPIDO_USE_MPICH_SCATTER) ||
      MPIDO_INFO_ISSET(properties, MPIDO_IRREG_COMM) ||
      (!MPIDO_INFO_ISSET(properties, MPIDO_USE_TREE_BCAST) && nbytes <= 64))
  {
    comm->dcmf.last_algorithm = MPIDO_USE_MPICH_SCATTER;
    return MPIR_Scatter_intra(sendbuf, sendcount, sendtype,
                              recvbuf, recvcount, recvtype,
                              root, comm);
  }
  /* set the internal control flow to disable internal star tuning */
  STAR_info.internal_control_flow = 1;

  MPIDO_Allreduce(MPI_IN_PLACE, &success, 1, MPI_INT, MPI_BAND, comm);

  /* reset flag */
  STAR_info.internal_control_flow = 0;

  if (!success)
    return MPIR_Scatter_intra(sendbuf, sendcount, sendtype,
                              recvbuf, recvcount, recvtype,
                              root, comm);

  MPIDI_VerifyBuffer(sendbuf, sbuf, true_lb);
  MPIDI_VerifyBuffer(recvbuf, rbuf, true_lb);
  
  if (!STAR_info.enabled || STAR_info.internal_control_flow ||
      STAR_info.scatter_algorithms == 1)
  {
    if (MPIDO_INFO_ISSET(properties, MPIDO_USE_BCAST_SCATTER))
    {
      comm->dcmf.last_algorithm = MPIDO_USE_BCAST_SCATTER;
      return MPIDO_Scatter_bcast(sbuf, sendcount, sendtype,
                                 rbuf, recvcount, recvtype,
                                 root, comm);
    }
  }
  else
  {
    int id;
    unsigned char same_callsite = 1;

    void ** tb_ptr = (void **) MPIU_Malloc(sizeof(void *) *
                                           STAR_info.traceback_levels);

    /* set the internal control flow to disable internal star tuning */
    STAR_info.internal_control_flow = 1;

    /* get backtrace info for caller to this func, use that as callsite_id */
    backtrace(tb_ptr, STAR_info.traceback_levels);

    id = (int) tb_ptr[STAR_info.traceback_levels - 1];

    /* find out if all participants agree on the callsite id */
    if (STAR_info.agree_on_callsite)
    {
      int tmp[2], result[2];
      tmp[0] = id;
      tmp[1] = ~id;
      MPIDO_Allreduce(tmp, result, 2, MPI_UNSIGNED_LONG, MPI_MAX, comm);
      if (result[0] != (~result[1]))
        same_callsite = 0;
    }

    if (same_callsite)
    {
      STAR_Callsite collective_site;

      /* create a signature callsite info for this particular call site */
      collective_site.call_type = SCATTER_CALL;
      collective_site.comm = comm;
      collective_site.bytes = nbytes;
      collective_site.op_type_support = MPIDO_SUPPORT_NOT_NEEDED;
      collective_site.id = id;
	  
      rc = STAR_Scatter(sbuf, sendcount, sendtype,
                        rbuf, recvcount, recvtype,
                        root, &collective_site,
                        STAR_scatter_repository,
                        STAR_info.scatter_algorithms);
    }
      
    if (rc == STAR_FAILURE || !same_callsite)
      rc = MPIR_Scatter_intra(sendbuf, sendcount, sendtype,
                              recvbuf, recvcount, recvtype,
                              root, comm);

    /* unset the internal control flow */
    STAR_info.internal_control_flow = 0;

    MPIU_Free(tb_ptr);    
  }
   return rc;
}
Пример #3
0
int MPIDO_Allgather_allreduce(const void *sendbuf,
			      int sendcount,
			      MPI_Datatype sendtype,
			      void *recvbuf,
			      int recvcount,
			      MPI_Datatype recvtype,
			      MPI_Aint send_true_lb,
			      MPI_Aint recv_true_lb,
			      size_t send_size,
			      size_t recv_size,
			      MPID_Comm * comm_ptr,
                              int *mpierrno)

{
  int rc, i;
  char *startbuf = NULL;
  char *destbuf = NULL;
  const int rank = comm_ptr->rank;

  startbuf   = (char *) recvbuf + recv_true_lb;
  destbuf    = startbuf + rank * send_size;

  if (sendbuf != MPI_IN_PLACE)
  {
    char *outputbuf = (char *) sendbuf + send_true_lb;
    memcpy(destbuf, outputbuf, send_size);
  }

  /* TODO: Change to PAMI */
  /*Do a convert and then do the allreudce*/
  if ( recv_size <= MAX_ALLGATHER_ALLREDUCE_BUFFER_SIZE &&
       (send_size & 0x3)==0 &&  /*integer/long allgathers only*/
       (sendtype != MPI_DOUBLE || recvtype != MPI_DOUBLE))       
  {
    double *tmprbuf = (double *)MPL_malloc(recv_size*2);
    if (tmprbuf == NULL)
      goto direct_algo; /*skip int to fp conversion and go to direct
			  algo*/

    double *tmpsbuf = tmprbuf + (rank*send_size)/sizeof(int);
    int *sibuf = (int *) destbuf;
    
    memset(tmprbuf, 0, rank*send_size*2);
    memset(tmpsbuf + send_size/sizeof(int), 0, 
	   (recv_size - (rank + 1)*send_size)*2);

    for(i = 0; i < (send_size/sizeof(int)); ++i) 
      tmpsbuf[i] = (double)sibuf[i];
    /* Switch to comm->coll_fns->fn() */
    rc = MPIDO_Allreduce(MPI_IN_PLACE,
			 tmprbuf,
			 recv_size/sizeof(int),
			 MPI_DOUBLE,
			 MPI_SUM,
			 comm_ptr,
			 mpierrno);
    
    sibuf = (int *) startbuf;
    for(i = 0; i < (rank*send_size/sizeof(int)); ++i) 
      sibuf[i] = (int)tmprbuf[i];

    for(i = (rank+1)*send_size/sizeof(int); i < recv_size/sizeof(int); ++i) 
      sibuf[i] = (int)tmprbuf[i];

    MPL_free(tmprbuf);
    return rc;
  }

 direct_algo:

  memset(startbuf, 0, rank * send_size);
  memset(destbuf + send_size, 0, recv_size - (rank + 1) * send_size);

  if (sendtype == MPI_DOUBLE && recvtype == MPI_DOUBLE)
    /* Switch to comm->coll_fns->fn() */
    rc = MPIDO_Allreduce(MPI_IN_PLACE,
			 startbuf,
			 recv_size/sizeof(double),
			 MPI_DOUBLE,
			 MPI_SUM,
			 comm_ptr,
			 mpierrno);
  else
    /* Switch to comm->coll_fns->fn() */
    rc = MPIDO_Allreduce(MPI_IN_PLACE,
			 startbuf,
			 recv_size/sizeof(int),
			 MPI_UNSIGNED,
			 MPI_BOR,
			 comm_ptr,
			 mpierrno);
  
  return rc;
}
Пример #4
0
int MPIDO_Reduce(const void *sendbuf, 
                 void *recvbuf, 
                 int count, 
                 MPI_Datatype datatype,
                 MPI_Op op, 
                 int root, 
                 MPID_Comm *comm_ptr, 
                 int *mpierrno)

{
#ifndef HAVE_PAMI_IN_PLACE
  if (sendbuf == MPI_IN_PLACE)
  {
    MPID_Abort (NULL, 0, 1, "'MPI_IN_PLACE' requries support for `PAMI_IN_PLACE`");
    return -1;
  }
#endif
   MPID_Datatype *dt_null = NULL;
   MPI_Aint true_lb = 0;
   int dt_contig ATTRIBUTE((unused)), tsize;
   int mu;
   char *sbuf, *rbuf;
   pami_data_function pop;
   pami_type_t pdt;
   int rc;
   int alg_selected = 0;
   const int rank = comm_ptr->rank;
#if ASSERT_LEVEL==0
   /* We can't afford the tracing in ndebug/performance libraries */
    const unsigned verbose = 0;
#else
    const unsigned verbose = (MPIDI_Process.verbose >= MPIDI_VERBOSE_DETAILS_ALL) && (rank == 0);
#endif
   const struct MPIDI_Comm* const mpid = &(comm_ptr->mpid);
   const int selected_type = mpid->user_selected_type[PAMI_XFER_REDUCE];

   rc = MPIDI_Datatype_to_pami(datatype, &pdt, op, &pop, &mu);
   if(unlikely(verbose))
      fprintf(stderr,"reduce - rc %u, root %u, count %d, dt: %p, op: %p, mu: %u, selectedvar %u != %u (MPICH) sendbuf %p, recvbuf %p\n",
	      rc, root, count, pdt, pop, mu, 
	      (unsigned)selected_type, MPID_COLL_USE_MPICH,sendbuf, recvbuf);

   pami_xfer_t reduce;
   pami_algorithm_t my_reduce=0;
   const pami_metadata_t *my_md = (pami_metadata_t *)NULL;
   int queryreq = 0;
   volatile unsigned reduce_active = 1;

   MPIDI_Datatype_get_info(count, datatype, dt_contig, tsize, dt_null, true_lb);
   rbuf = (char *)recvbuf + true_lb;
   sbuf = (char *)sendbuf + true_lb;
   if(sendbuf == MPI_IN_PLACE) 
   {
      if(unlikely(verbose))
	fprintf(stderr,"reduce MPI_IN_PLACE send buffering (%d,%d)\n",count,tsize);
      sbuf = PAMI_IN_PLACE;
   }

   reduce.cb_done = reduce_cb_done;
   reduce.cookie = (void *)&reduce_active;
   if(mpid->optreduce) /* GLUE_ALLREDUCE */
   {
      char* tbuf = NULL;
      if(unlikely(verbose))
         fprintf(stderr,"Using protocol GLUE_ALLREDUCE for reduce (%d,%d)\n",count,tsize);
      MPIDI_Update_last_algorithm(comm_ptr, "REDUCE_OPT_ALLREDUCE");
      void *destbuf = recvbuf;
      if(rank != root) /* temp buffer for non-root destbuf */
      {
         tbuf = destbuf = MPL_malloc(tsize);
      }
      /* Switch to comm->coll_fns->fn() */
      MPIDO_Allreduce(sendbuf,
                      destbuf,
                      count,
                      datatype,
                      op,
                      comm_ptr,
                      mpierrno);
      if(tbuf)
         MPL_free(tbuf);
      return 0;
   }
   if(selected_type == MPID_COLL_USE_MPICH || rc != MPI_SUCCESS)
   {
      if(unlikely(verbose))
         fprintf(stderr,"Using MPICH reduce algorithm\n");
#if CUDA_AWARE_SUPPORT
      if(MPIDI_Process.cuda_aware_support_on)
      {
         MPI_Aint dt_extent;
         MPID_Datatype_get_extent_macro(datatype, dt_extent);
         char *scbuf = NULL;
         char *rcbuf = NULL;
         int is_send_dev_buf = MPIDI_cuda_is_device_buf(sendbuf);
         int is_recv_dev_buf = MPIDI_cuda_is_device_buf(recvbuf);
         if(is_send_dev_buf)
         {
           scbuf = MPL_malloc(dt_extent * count);
           cudaError_t cudaerr = CudaMemcpy(scbuf, sendbuf, dt_extent * count, cudaMemcpyDeviceToHost);
           if (cudaSuccess != cudaerr) 
             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
         }
         else
           scbuf = sendbuf;
         if(is_recv_dev_buf)
         {
           rcbuf = MPL_malloc(dt_extent * count);
           if(sendbuf == MPI_IN_PLACE)
           {
             cudaError_t cudaerr = CudaMemcpy(rcbuf, recvbuf, dt_extent * count, cudaMemcpyDeviceToHost);
             if (cudaSuccess != cudaerr)
               fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
           }
           else
             memset(rcbuf, 0, dt_extent * count);
         }
         else
           rcbuf = recvbuf;
         int cuda_res =  MPIR_Reduce(scbuf, rcbuf, count, datatype, op, root, comm_ptr, mpierrno);
         if(is_send_dev_buf)MPL_free(scbuf);
         if(is_recv_dev_buf)
         {
           cudaError_t cudaerr = CudaMemcpy(recvbuf, rcbuf, dt_extent * count, cudaMemcpyHostToDevice);
           if (cudaSuccess != cudaerr)
             fprintf(stderr, "cudaMemcpy failed: %s\n", CudaGetErrorString(cudaerr));
           MPL_free(rcbuf);
         }
         return cuda_res;
      }
      else
#endif
      return MPIR_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm_ptr, mpierrno);
   }

   if(selected_type == MPID_COLL_OPTIMIZED)
   {
      if((mpid->cutoff_size[PAMI_XFER_REDUCE][0] == 0) || 
          (mpid->cutoff_size[PAMI_XFER_REDUCE][0] >= tsize && mpid->cutoff_size[PAMI_XFER_REDUCE][0] > 0))
      {
        TRACE_ERR("Optimized Reduce (%s) was pre-selected\n",
         mpid->opt_protocol_md[PAMI_XFER_REDUCE][0].name);
        my_reduce    = mpid->opt_protocol[PAMI_XFER_REDUCE][0];
        my_md = &mpid->opt_protocol_md[PAMI_XFER_REDUCE][0];
        queryreq     = mpid->must_query[PAMI_XFER_REDUCE][0];
      }

   }
   else
   {
      TRACE_ERR("Optimized reduce (%s) was specified by user\n",
      mpid->user_metadata[PAMI_XFER_REDUCE].name);
      my_reduce    =  mpid->user_selected[PAMI_XFER_REDUCE];
      my_md = &mpid->user_metadata[PAMI_XFER_REDUCE];
      queryreq     = selected_type;
   }
   reduce.algorithm = my_reduce;
   reduce.cmd.xfer_reduce.sndbuf = sbuf;
   reduce.cmd.xfer_reduce.rcvbuf = rbuf;
   reduce.cmd.xfer_reduce.stype = pdt;
   reduce.cmd.xfer_reduce.rtype = pdt;
   reduce.cmd.xfer_reduce.stypecount = count;
   reduce.cmd.xfer_reduce.rtypecount = count;
   reduce.cmd.xfer_reduce.op = pop;
   reduce.cmd.xfer_reduce.root = MPIDI_Task_to_endpoint(MPID_VCR_GET_LPID(comm_ptr->vcr, root), 0);


   if(unlikely(queryreq == MPID_COLL_ALWAYS_QUERY || 
               queryreq == MPID_COLL_CHECK_FN_REQUIRED))
   {
      metadata_result_t result = {0};
      TRACE_ERR("Querying reduce protocol %s, type was %d\n",
                my_md->name,
                queryreq);
      if(my_md->check_fn == NULL)
      {
         /* process metadata bits */
         if((!my_md->check_correct.values.inplace) && (sendbuf == MPI_IN_PLACE))
            result.check.unspecified = 1;
         if(my_md->check_correct.values.rangeminmax)
         {
            MPI_Aint data_true_lb ATTRIBUTE((unused));
            MPID_Datatype *data_ptr;
            int data_size, data_contig ATTRIBUTE((unused));
            MPIDI_Datatype_get_info(count, datatype, data_contig, data_size, data_ptr, data_true_lb); 
            if((my_md->range_lo <= data_size) &&
               (my_md->range_hi >= data_size))
               ; /* ok, algorithm selected */
            else
            {
               result.check.range = 1;
               if(unlikely(verbose))
               {   
                  fprintf(stderr,"message size (%u) outside range (%zu<->%zu) for %s.\n",
                          data_size,
                          my_md->range_lo,
                          my_md->range_hi,
                          my_md->name);
               }
            }
         }
      }
Пример #5
0
int
MPIDO_Allgatherv(void *sendbuf,
		 int sendcount,
		 MPI_Datatype sendtype,
		 void *recvbuf,
		 int *recvcounts,
		 int *displs,
		 MPI_Datatype recvtype,
		 MPID_Comm * comm)
{
  /* function pointer to be used to point to approperiate algorithm */
  allgatherv_fptr func = NULL;

  /* Check the nature of the buffers */
  MPID_Datatype *dt_null = NULL;
  MPI_Aint send_true_lb  = 0;
  MPI_Aint recv_true_lb  = 0;
  size_t   send_size     = 0;
  size_t   recv_size     = 0;
  MPIDO_Coll_config config = {1,1,1,1,1};

  double msize;
  
  int i, rc, buffer_sum = 0, np = comm->local_size;
  char use_tree_reduce, use_alltoall, use_rect_async, use_bcast;
  char *sbuf, *rbuf;

  MPIDO_Embedded_Info_Set * comm_prop = &(comm->dcmf.properties);
  MPIDO_Embedded_Info_Set * coll_prop = &MPIDI_CollectiveProtocols.properties;

  unsigned char userenvset = MPIDO_INFO_ISSET(comm_prop,
                                             MPIDO_ALLGATHERV_ENVVAR);

  if (MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_MPICH_ALLGATHERV))
  {
    comm->dcmf.last_algorithm = MPIDO_USE_MPICH_ALLGATHERV;
    return MPIR_Allgatherv_intra(sendbuf, sendcount, sendtype,
                                 recvbuf, recvcounts, displs, recvtype,
                                 comm);
  }
  MPIDI_Datatype_get_info(1,
			  recvtype,
			  config.recv_contig,
			  recv_size,
			  dt_null,
			  recv_true_lb);
  
  
  if (sendbuf != MPI_IN_PLACE)
  {
    MPIDI_Datatype_get_info(sendcount,
                            sendtype,
                            config.send_contig,
                            send_size,
                            dt_null,
                            send_true_lb);
    MPIDI_VerifyBuffer(sendbuf, sbuf, send_true_lb);
  }
  
  if (displs[0])
    config.recv_continuous = 0;
  
  for (i = 1; i < np; i++)
  {
    buffer_sum += recvcounts[i - 1];
    if (buffer_sum != displs[i])
    {
      config.recv_continuous = 0;
      break;
    }
  }
  
  buffer_sum += recvcounts[np - 1];
  
  buffer_sum *= recv_size;
  msize = (double)buffer_sum / (double)np; 
  
  MPIDI_VerifyBuffer(recvbuf, rbuf, (recv_true_lb + buffer_sum));
  
  if (MPIDO_INFO_ISSET(coll_prop, MPIDO_USE_PREALLREDUCE_ALLGATHERV))
  {
    STAR_info.internal_control_flow = 1;
    MPIDO_Allreduce(MPI_IN_PLACE, &config, 5, MPI_INT, MPI_BAND, comm);
    STAR_info.internal_control_flow = 0;
  }

  if (!STAR_info.enabled || STAR_info.internal_control_flow ||
      ((double)buffer_sum / (double)np) < STAR_info.allgather_threshold)
  {
    use_tree_reduce = MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_TREE_ALLREDUCE) &&
      MPIDO_INFO_ISSET(comm_prop,
                      MPIDO_USE_ALLREDUCE_ALLGATHERV) &&
      config.recv_contig &&
      config.send_contig &&
      config.recv_continuous &&
      buffer_sum % sizeof(int) == 0;
    
    use_alltoall = MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_TORUS_ALLTOALL) &&
      MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_ALLTOALL_ALLGATHERV) &&
      config.recv_contig &&
      config.send_contig;

    use_rect_async = MPIDO_INFO_ISSET(comm_prop,
                                     MPIDO_USE_ARECT_BCAST_ALLGATHERV) &&
      MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_ARECT_BCAST) &&
      config.recv_contig &&
      config.send_contig;
    
    use_bcast = //MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_TREE_BCAST) &&
      MPIDO_INFO_ISSET(comm_prop, MPIDO_USE_BCAST_ALLGATHERV);

    if(userenvset)
    {
      if(use_bcast)
      {
        func = MPIDO_Allgatherv_bcast;
        comm->dcmf.last_algorithm = MPIDO_USE_BCAST_ALLGATHERV;
      }
      if(use_tree_reduce)
      {
        func = MPIDO_Allgatherv_allreduce;
        comm->dcmf.last_algorithm = MPIDO_USE_ALLREDUCE_ALLGATHERV;
      }
      if(use_alltoall)
      {
        func = MPIDO_Allgatherv_alltoall;
        comm->dcmf.last_algorithm = MPIDO_USE_ALLTOALL_ALLGATHERV;
      }
      if(use_rect_async)
      {
        func = MPIDO_Allgatherv_bcast_rect_async;
        comm->dcmf.last_algorithm = MPIDO_USE_ARECT_BCAST_ALLGATHERV;
      }
    }
    else
    {
      if (!MPIDO_INFO_ISSET(comm_prop, MPIDO_IRREG_COMM))
      {
        if (np <= 512)
        {
          if (use_tree_reduce && msize < 128 * np)
          {
            func = MPIDO_Allgatherv_allreduce;
            comm->dcmf.last_algorithm = MPIDO_USE_ALLREDUCE_ALLGATHERV;
          }
          if (!func && use_bcast && msize >= 128 * np)
          {
            func = MPIDO_Allgatherv_bcast;
            comm->dcmf.last_algorithm = MPIDO_USE_BCAST_ALLGATHERV;
          }
          if (!func && use_alltoall &&
              msize > 128 && msize <= 8*np)
          {
            func = MPIDO_Allgatherv_alltoall;
            comm->dcmf.last_algorithm = MPIDO_USE_ALLTOALL_ALLGATHERV;
          }
          if (!func && use_rect_async && msize > 8*np)
          {
            func = MPIDO_Allgatherv_bcast_rect_async;
            comm->dcmf.last_algorithm = MPIDO_USE_ARECT_BCAST_ALLGATHERV;
          }
        }
        else
        {
          if (use_tree_reduce && msize < 512)
          {
            func = MPIDO_Allgatherv_allreduce;
            comm->dcmf.last_algorithm = MPIDO_USE_ALLREDUCE_ALLGATHERV;
          }
          if (!func && use_alltoall &&
              msize > 128 * (512.0 / (float) np) &&
              msize <= 128)
          {
            func = MPIDO_Allgatherv_alltoall;
            comm->dcmf.last_algorithm = MPIDO_USE_ALLTOALL_ALLGATHERV;
          }
          if (!func && use_rect_async &&
              msize >= 512 && msize <= 65536)
          {
            func = MPIDO_Allgatherv_bcast_rect_async;
            comm->dcmf.last_algorithm = MPIDO_USE_ARECT_BCAST_ALLGATHERV;
          }
          if (!func && use_bcast && msize > 65536)
          {
            func = MPIDO_Allgatherv_bcast;
            comm->dcmf.last_algorithm = MPIDO_USE_BCAST_ALLGATHERV;
          }
        }
      }
      else
      {
        if (msize >= 64 && use_alltoall)
        {
          func = MPIDO_Allgatherv_alltoall;
          comm->dcmf.last_algorithm = MPIDO_USE_ALLTOALL_ALLGATHERV;
        }
      }
    }

    if(!func)
    {
      comm->dcmf.last_algorithm = MPIDO_USE_MPICH_ALLGATHERV;
      return MPIR_Allgatherv_intra(sendbuf, sendcount, sendtype,
                                   recvbuf, recvcounts, displs, recvtype,
                                   comm);
    }
    
    rc = (func)(sendbuf, sendcount, sendtype,
                recvbuf, recvcounts, buffer_sum, displs, recvtype,
                send_true_lb, recv_true_lb, send_size, recv_size,
                comm);
  }
  else
  {
    STAR_Callsite collective_site;
    void ** tb_ptr = (void **) MPIU_Malloc(sizeof(void *) *
                                           STAR_info.traceback_levels);

    /* set the internal control flow to disable internal star tuning */
    STAR_info.internal_control_flow = 1;
      
    /* get backtrace info for caller to this func, use that as callsite_id */
    backtrace(tb_ptr, STAR_info.traceback_levels);
      
    /* create a signature callsite info for this particular call site */
    collective_site.call_type = ALLGATHERV_CALL;
    collective_site.comm = comm;
    collective_site.bytes = buffer_sum;
    collective_site.op_type_support = MPIDO_SUPPORT_NOT_NEEDED;
    collective_site.buff_attributes[0] = config.send_contig;
    collective_site.buff_attributes[1] = config.recv_contig;
    collective_site.buff_attributes[2] = config.recv_continuous;
      
    /* decide buffer alignment */
    collective_site.buff_attributes[3] = 1; /* assume aligned */
    if (((unsigned)sendbuf & 0x0F) || ((unsigned)recvbuf & 0x0F))
      collective_site.buff_attributes[3] = 0; /* set to not aligned */
      
    collective_site.id = (int) tb_ptr[STAR_info.traceback_levels - 1];

    rc = STAR_Allgatherv(sendbuf,
                         sendcount,
                         sendtype,
                         recvbuf,
                         recvcounts,
                         buffer_sum,
                         displs,
                         recvtype,
                         send_true_lb,
                         recv_true_lb,
                         send_size,
                         recv_size,
                         &collective_site,
                         STAR_allgatherv_repository,
                         STAR_info.allgatherv_algorithms);
      
    /* unset the internal control flow */
    STAR_info.internal_control_flow = 0;
      
    if (rc == STAR_FAILURE)
      rc = MPIR_Allgatherv_intra(sendbuf, sendcount, sendtype,
                                 recvbuf, recvcounts, displs, recvtype,
                                 comm);
    MPIU_Free(tb_ptr);
  }  
  return rc;
}
Пример #6
0
int MPIDO_Allgatherv_allreduce(const void *sendbuf,
			       int sendcount,
			       MPI_Datatype sendtype,
			       void *recvbuf,
			       const int *recvcounts,
			       int buffer_sum,
			       const int *displs,
			       MPI_Datatype recvtype,
			       MPI_Aint send_true_lb,
			       MPI_Aint recv_true_lb,
			       size_t send_size,
			       size_t recv_size,
			       MPID_Comm * comm_ptr,
                               int *mpierrno)
{
  int start, rc, i;
  int length;
  char *startbuf = NULL;
  char *destbuf = NULL;
  const int rank = comm_ptr->rank;
  TRACE_ERR("Entering MPIDO_Allgatherv_allreduce\n");

  startbuf = (char *) recvbuf + recv_true_lb;
  destbuf = startbuf + displs[rank] * recv_size;

  if (sendbuf != MPI_IN_PLACE)
  {
    char *outputbuf = (char *) sendbuf + send_true_lb;
    memcpy(destbuf, outputbuf, send_size);
  }

  //printf("buffer_sum %d, send_size %d recv_size %d\n", buffer_sum, 
  // (int)send_size, (int)recv_size);	 

  /* TODO: Change to PAMI */
  /*integer/long/double allgathers only*/
  /*Do a convert and then do the allreudce*/
  if ( buffer_sum <= MAX_ALLGATHERV_ALLREDUCE_BUFFER_SIZE &&
       (send_size & 0x3)==0 && (recv_size & 0x3)==0)  
  {
    double *tmprbuf = (double *)MPIU_Malloc(buffer_sum*2);
    if (tmprbuf == NULL)
      goto direct_algo; /*skip int to fp conversion and go to direct
			  algo*/

    double *tmpsbuf = tmprbuf + (displs[rank]*recv_size)/sizeof(int);
    int *sibuf = (int *) destbuf;
    
    memset(tmprbuf, 0, displs[rank]*recv_size*2);
    start  = (displs[rank] + recvcounts[rank]) * recv_size;   
    length = buffer_sum - (displs[rank] + recvcounts[rank]) * recv_size;
    memset(tmprbuf + start/sizeof(int), 0, length*2);

    for(i = 0; i < (send_size/sizeof(int)); ++i) 
      tmpsbuf[i] = (double)sibuf[i];
    
    /* Switch to comm->coll_fns->fn() */
    rc = MPIDO_Allreduce(MPI_IN_PLACE,
			 tmprbuf,
			 buffer_sum/sizeof(int),
			 MPI_DOUBLE,
			 MPI_SUM,
			 comm_ptr,
			 mpierrno);
    
    sibuf = (int *) startbuf;
    for(i = 0; i < (displs[rank]*recv_size/sizeof(int)); ++i) 
      sibuf[i] = (int)tmprbuf[i];
    
    for(i = start/sizeof(int); i < buffer_sum/sizeof(int); ++i) 
      sibuf[i] = (int)tmprbuf[i];

    MPIU_Free(tmprbuf);
    return rc;
  }

 direct_algo:

  start = 0;
  length = displs[rank] * recv_size;
  memset(startbuf + start, 0, length);

  start  = (displs[rank] +
	    recvcounts[rank]) * recv_size;
  length = buffer_sum - (displs[rank] +
			 recvcounts[rank]) * recv_size;
  memset(startbuf + start, 0, length);

  TRACE_ERR("Calling MPIDO_Allreduce from MPIDO_Allgatherv_allreduce\n");
  /* Switch to comm->coll_fns->fn() */
  rc = MPIDO_Allreduce(MPI_IN_PLACE,
		       startbuf,
		       buffer_sum/sizeof(unsigned),
		       MPI_UNSIGNED,
		       MPI_BOR,
		       comm_ptr,
                       mpierrno);

  TRACE_ERR("Leaving MPIDO_Allgatherv_allreduce\n");
  return rc;
}