예제 #1
0
    /// combine (per-process) messages
    Opm::DeferredLogger gatherDeferredLogger(const Opm::DeferredLogger& local_deferredlogger)
    {

        int num_messages = local_deferredlogger.messages_.size();

        int int64_mpi_pack_size;
        MPI_Pack_size(1, MPI_INT64_T, MPI_COMM_WORLD, &int64_mpi_pack_size);
        int unsigned_int_mpi_pack_size;
        MPI_Pack_size(1, MPI_UNSIGNED, MPI_COMM_WORLD, &unsigned_int_mpi_pack_size);

        // store number of messages;
        int message_size = unsigned_int_mpi_pack_size;
        // store 1 int64 per message for flag
        message_size += num_messages*int64_mpi_pack_size;
        // store 2 unsigned ints per message for length of tag and length of text
        message_size += num_messages*2*unsigned_int_mpi_pack_size;

        for (const auto lm : local_deferredlogger.messages_) {
            int string_mpi_pack_size;
            MPI_Pack_size(lm.tag.size(), MPI_CHAR, MPI_COMM_WORLD, &string_mpi_pack_size);
            message_size += string_mpi_pack_size;
            MPI_Pack_size(lm.text.size(), MPI_CHAR, MPI_COMM_WORLD, &string_mpi_pack_size);
            message_size += string_mpi_pack_size;
        }

        // Pack local messages.
        std::vector<char> buffer(message_size);

        int offset = 0;
        packMessages(local_deferredlogger.messages_, buffer, offset);
        assert(offset == message_size);

        // Get message sizes and create offset/displacement array for gathering.
        int num_processes = -1;
        MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
        std::vector<int> message_sizes(num_processes);
        MPI_Allgather(&message_size, 1, MPI_INT, message_sizes.data(), 1, MPI_INT, MPI_COMM_WORLD);
        std::vector<int> displ(num_processes + 1, 0);
        std::partial_sum(message_sizes.begin(), message_sizes.end(), displ.begin() + 1);

        // Gather.
        std::vector<char> recv_buffer(displ.back());
        MPI_Allgatherv(buffer.data(), buffer.size(), MPI_PACKED,
                       const_cast<char*>(recv_buffer.data()), message_sizes.data(),
                       displ.data(), MPI_PACKED,
                       MPI_COMM_WORLD);

        // Unpack.
        Opm::DeferredLogger global_deferredlogger;
        global_deferredlogger.messages_ = unpackMessages(recv_buffer, displ);
        return global_deferredlogger;
    }
예제 #2
0
/* Extract the source array into the dest array using the DARRAY datatype.
   "count" integers are returned in destArray */
int PackUnpack( MPI_Datatype darraytype, const int srcArray[], int destArray[],
		int count )
{
    int packsize, position;
    int *packArray;

    MPI_Type_commit( &darraytype );
    MPI_Pack_size( 1, darraytype, MPI_COMM_SELF, &packsize );
    packArray = (int *)malloc( packsize );
    if (!packArray) {
	fprintf( stderr, "Unable to allocate pack array of size %d\n", 
		 packsize );
	MPI_Abort( MPI_COMM_WORLD, 1 );
        exit(1);
    }
    position = 0;
    MPI_Pack( (int*)srcArray, 1, darraytype, packArray, packsize, &position, 
	      MPI_COMM_SELF );
    packsize = position;
    position = 0;
    MPI_Unpack( packArray, packsize, &position, destArray, count, MPI_INT, 
		MPI_COMM_SELF );
    free( packArray );
    return 0;
}
예제 #3
0
int
MPIBuffer :: givePackSize(MPI_Comm communicator, MPI_Datatype type, int size)
{
    int requredSpace;
    MPI_Pack_size(size, type, communicator, & requredSpace);
    return requredSpace;
}
예제 #4
0
/* Function:  p7_hmm_mpi_Send()
 * Synopsis:  Send an HMM as an MPI work unit.
 *
 * Purpose:   Sends an HMM <hmm> as a work unit to MPI process
 *            <dest> (where <dest> ranges from 0..<nproc-1>), tagged
 *            with MPI tag <tag>, for MPI communicator <comm>, as 
 *            the sole workunit or result. 
 *            
 *            Work units are prefixed by a status code indicating the
 *            number of HMMs sent. If <hmm> is <NULL>, this code is 0,
 *            and <_Recv()> interprets such a unit as an EOD
 *            (end-of-data) signal, a signal to cleanly shut down
 *            worker processes.
 *            
 *            In order to minimize alloc/free cycles in this routine,
 *            caller passes a pointer to a working buffer <*buf> of
 *            size <*nalloc> characters. If necessary (i.e. if <hmm> is
 *            too big to fit), <*buf> will be reallocated and <*nalloc>
 *            increased to the new size. As a special case, if <*buf>
 *            is <NULL> and <*nalloc> is 0, the buffer will be
 *            allocated appropriately, but the caller is still
 *            responsible for free'ing it.
 *            
 * Returns:   <eslOK> on success; <*buf> may have been reallocated and
 *            <*nalloc> may have been increased.
 * 
 * Throws:    <eslESYS> if an MPI call fails; <eslEMEM> if a malloc/realloc
 *            fails. In either case, <*buf> and <*nalloc> remain valid and useful
 *            memory (though the contents of <*buf> are undefined). 
 * 
 * Note:      Compare to p7_hmmfile_WriteBinary(). The two operations (sending
 *            an HMM via MPI, or saving it as a binary file to disk) are
 *            similar.
 */
int
p7_hmm_mpi_Send(const P7_HMM *hmm, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc)
{
  int   n = 0;
  int   code;
  int   sz, pos;
  int   status;

  /* Figure out size. We always send at least a status code (0=EOD=nothing sent) */
  if ( MPI_Pack_size(1, MPI_INT, comm, &sz)          != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi pack size failed");  n += sz;
  if ((status = p7_hmm_mpi_PackSize(hmm, comm, &sz)) != eslOK)       return status;                                   n += sz;

  /* Make sure the buffer is allocated appropriately */
  if (*buf == NULL || n > *nalloc) 
    {
      ESL_REALLOC(*buf, sizeof(char) * n);
      *nalloc = n; 
    }

  /* Pack the status code and HMM into the buffer */
  /* The status code is the # of HMMs being sent as one MPI message; here 1 or 0 */
  pos  = 0;
  code = (hmm ? 1 : 0);
  if (MPI_Pack(&code, 1, MPI_INT,           *buf, n, &pos, comm)  != MPI_SUCCESS) ESL_EXCEPTION(eslESYS, "mpi pack failed");
  if (hmm && (status = p7_hmm_mpi_Pack(hmm, *buf, n, &pos, comm)) != eslOK)       return status;
  
  /* Send the packed HMM to the destination. */
  if (MPI_Send(*buf, n, MPI_PACKED, dest, tag, comm) != MPI_SUCCESS)  ESL_EXCEPTION(eslESYS, "mpi send failed");
  return eslOK;

 ERROR:
  return status;
}
예제 #5
0
파일: mpi.cpp 프로젝트: shy3u/GeRelion
// MPI_TEST will be executed every this many seconds: so this determines the minimum time taken for every send operation!!
//#define VERBOSE_MPISENDRECV
int MpiNode::relion_MPI_Send(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm)
{

	int result;
	double start_time = MPI_Wtime();

#define ONLY_NORMAL_SEND
#ifdef ONLY_NORMAL_SEND
	result = MPI_Send(buf, count, datatype, dest, tag, comm);
	if (result != MPI_SUCCESS)
	{
		report_MPI_ERROR(result);
	}
#else
	// Only use Bsend for larger messages, otherwise use normal send
	if (count > 100)
	{
		int size;
		MPI_Pack_size( count, datatype, comm, &size );
		char *membuff;

		// Allocate memory for the package to be sent
		int attach_result = MPI_Buffer_attach( malloc(size + MPI_BSEND_OVERHEAD ), size + MPI_BSEND_OVERHEAD );
		if (attach_result != MPI_SUCCESS)
		{
			report_MPI_ERROR(result);
		}

		// Actually start sending the message
		result = MPI_Bsend(buf, count, datatype, dest, tag, comm);
		if (result != MPI_SUCCESS)
		{
			report_MPI_ERROR(result);
		}

		// The following will only complete once the message has been successfully sent (i.e. also received on the other side)
		int deattach_result = MPI_Buffer_detach( &membuff, &size);
		if (deattach_result != MPI_SUCCESS)
		{
			report_MPI_ERROR(result);
		}
	}
	else
	{
		result = MPI_Send(buf, count, datatype, dest, tag, comm);
		if (result != MPI_SUCCESS)
		{
			report_MPI_ERROR(result);
		}
	}
#endif

#ifdef VERBOSE_MPISENDRECV
	if (count > 100)
		std::cerr <<" relion_MPI_Send: message to " << dest << " of size "<< count << " arrived in " << MPI_Wtime() - start_time << " seconds" << std::endl;
#endif
	return result;

}
예제 #6
0
FORTRAN_API void FORT_CALL mpi_pack_size_ ( MPI_Fint *incount, MPI_Fint *datatype, MPI_Fint *comm, MPI_Fint *size, MPI_Fint *__ierr )
{
    int lsize;

    *__ierr = MPI_Pack_size((int)*incount, MPI_Type_f2c(*datatype),
                            MPI_Comm_f2c(*comm), &lsize);
    *size = (MPI_Fint)lsize;
}
예제 #7
0
int
DynamicCommunicationBuffer :: giveFitSize(MPI_Datatype type, int availableSpace, int arrySize)
{
    int arrySpace, guessSize;
    MPI_Pack_size(arrySize, type, communicator, & arrySpace);
    if ( availableSpace >= arrySpace ) {
        return arrySize;
    }

    guessSize = ( int ) floor( ( ( double ) arrySize / ( double ) arrySpace ) * availableSpace ) + 1;
    do {
        guessSize--;
        MPI_Pack_size(guessSize, type, communicator, & arrySpace);
    } while ( ( availableSpace < arrySpace ) && ( guessSize > 0 ) );

    return guessSize;
}
void slave(const struct fracInfo info)
{
    MPI_Status status;
    int msgsize;
    struct fracData *data = malloc(sizeof(*data));
    data->pixels = (unsigned char*)malloc(get_max_work_size(&info)*sizeof(unsigned char));  

    // Allocate buffers
    int membersize, emptysize, fullsize;
    int position;
    char *buffer; //Contains no pixel data
    MPI_Pack_size(1, MPI_INT, MPI_COMM_WORLD, &membersize);
    emptysize = membersize;
    MPI_Pack_size(1, MPI_INT, MPI_COMM_WORLD, &membersize);
    emptysize += membersize;
    MPI_Pack_size(get_max_work_size(&info), MPI_UNSIGNED_CHAR, MPI_COMM_WORLD, &membersize);
    fullsize = emptysize+membersize;
    buffer = malloc(fullsize);

    while(1) {
        // Recieve and unpack work
        MPI_Recv(buffer, emptysize, MPI_PACKED, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
        // Check tag for work/die
        if(status.MPI_TAG == DIETAG) {
            return;
        }

        // Unpack work info
        position = 0;
        MPI_Get_count(&status, MPI_PACKED, &msgsize);
        MPI_Unpack(buffer, msgsize, &position, &data->startRow,1,MPI_INT,MPI_COMM_WORLD);
        MPI_Unpack(buffer, msgsize, &position, &data->nRows,1,MPI_INT,MPI_COMM_WORLD);

        // calcPixels
        calcPixels(&info, data);        

        // Pack and send data back
        position = 0;
        MPI_Pack(&data->startRow,1,MPI_INT,buffer,fullsize,&position,MPI_COMM_WORLD);
        MPI_Pack(&data->nRows,1,MPI_INT,buffer,fullsize,&position,MPI_COMM_WORLD);
        MPI_Pack(data->pixels, data->nRows*info.nCols, MPI_UNSIGNED_CHAR,buffer,fullsize,&position,MPI_COMM_WORLD);
        MPI_Send(buffer, position, MPI_PACKED, 0, WORKTAG, MPI_COMM_WORLD);
    }
}
예제 #9
0
VT_MPI_INT VTUnify_MPI_Pack_size( VT_MPI_INT incount,
                                  VTUnify_MPI_Datatype utype,
                                  VTUnify_MPI_Comm ucomm, VT_MPI_INT * size )
{
   VT_MPI_INT error;
   MPI_Datatype type = get_mpi_type( utype );
   MPI_Comm comm = get_mpi_comm( ucomm );

   error = CALL_MPI( MPI_Pack_size( incount, type, comm, size ) );

   return (error == MPI_SUCCESS) ? 1 : 0;
}
예제 #10
0
snet_msg_t SNetDistribRecvMsg(void)
{
  int count;
  snet_msg_t result;
  MPI_Status status;
  static mpi_buf_t recvBuf = {0, 0, NULL};

  MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
  MPI_Get_count(&status, MPI_PACKED, &count);

  MPI_Pack_size(count, MPI_PACKED, MPI_COMM_WORLD, &recvBuf.offset);
  if (recvBuf.offset > recvBuf.size) {
    recvBuf.data = SNetMemResize(recvBuf.data, recvBuf.offset);
    recvBuf.size = recvBuf.offset;
  }

  MPI_Recv(recvBuf.data, count, MPI_PACKED, status.MPI_SOURCE, status.MPI_TAG,
            MPI_COMM_WORLD, &status);

  recvBuf.offset = 0;
  result.type = status.MPI_TAG;

  switch (status.MPI_TAG) {
    case snet_rec:
        result.rec = SNetRecDeserialise(&recvBuf);
    case snet_block:
    case snet_unblock:
      result.dest = SNetDestDeserialise(&recvBuf);
      result.dest.node = status.MPI_SOURCE;
      break;
    case snet_ref_set:
      result.ref = SNetRefDeserialise(&recvBuf);
      result.data = (uintptr_t) SNetInterfaceGet(SNetRefInterface(result.ref))->unpackfun(&recvBuf);
      break;
    case snet_ref_fetch:
      result.ref = SNetRefDeserialise(&recvBuf);
      result.data = status.MPI_SOURCE;
      break;
    case snet_ref_update:
      result.ref = SNetRefDeserialise(&recvBuf);
      SNetDistribUnpack(&recvBuf, &result.val, sizeof(result.val));
      break;
    case snet_update:
      break;
    case snet_stop:
      break;
    default:
      SNetUtilDebugFatal("[%s]: Unexpected MPI TAG %d\n", __func__, result.type);
      break;
  }

  return result;
}
예제 #11
0
파일: hmmsim.c 프로젝트: EddyRivasLab/hmmer
/* the pack send/recv buffer must be big enough to hold either an error message or a result vector.
 * it may even grow larger than that, to hold largest HMM we send.
 */
static int
minimum_mpi_working_buffer(ESL_GETOPTS *go, int N, int *ret_wn)
{
  int n;
  int nerr    = 0;
  int nresult = 0;

  /* error packet */
  if (MPI_Pack_size(eslERRBUFSIZE, MPI_CHAR,   MPI_COMM_WORLD, &nerr)!= 0)return eslESYS;   
  
  /* results packet */
  if (MPI_Pack_size(N,             MPI_DOUBLE, MPI_COMM_WORLD, &n)  != 0) return eslESYS;   nresult += n;   /* scores */
  if (esl_opt_GetBoolean(go, "-a")) {
    if (MPI_Pack_size(N,           MPI_INT,    MPI_COMM_WORLD, &n)  != 0) return eslESYS;   nresult += n;   /* alignment lengths */
  }
  if (MPI_Pack_size(1,             MPI_DOUBLE, MPI_COMM_WORLD, &n)  != 0) return eslESYS;   nresult += n*2; /* mu, lambda */

  /* add the shared status code to the max of the two possible kinds of packets */
  *ret_wn =  ESL_MAX(nresult, nerr);
  if (MPI_Pack_size(1,             MPI_INT,    MPI_COMM_WORLD, &n)  != 0) return eslESYS;   *ret_wn += n;   /* status code */
  return eslOK;
}
예제 #12
0
파일: transmit.c 프로젝트: foliern/snet-rts
snet_msg_t SNetDistribRecvMsg(void)
{
  int count;
  snet_msg_t result;
  MPI_Status status;
  static mpi_buf_t recvBuf = {0, 0, NULL};

  MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
  MPI_Get_count(&status, MPI_PACKED, &count);

  MPI_Pack_size(count, MPI_PACKED, MPI_COMM_WORLD, &recvBuf.offset);
  if ((unsigned) recvBuf.offset > recvBuf.size) {
    SNetMemFree(recvBuf.data);
    recvBuf.data = SNetMemAlloc(recvBuf.offset);
    recvBuf.size = recvBuf.offset;
  }

  MPI_Recv(recvBuf.data, count, MPI_PACKED, MPI_ANY_SOURCE, status.MPI_TAG,
            MPI_COMM_WORLD, &status);

  recvBuf.offset = 0;
  result.type = status.MPI_TAG;

  switch (result.type) {
    case snet_rec:
        result.rec = SNetRecDeserialise(&recvBuf, &UnpackInt, &UnpackRef);
    case snet_block:
    case snet_unblock:
      result.dest.node = status.MPI_SOURCE;
      UnpackDest(&recvBuf, &result.dest);
      break;
    case snet_ref_set:
      result.ref = SNetRefDeserialise(&recvBuf, &UnpackInt, &UnpackByte);
      result.data = (uintptr_t) SNetInterfaceGet(SNetRefInterface(result.ref))->unpackfun(&recvBuf);
      break;
    case snet_ref_fetch:
      result.ref = SNetRefDeserialise(&recvBuf, &UnpackInt, &UnpackByte);
      result.data = status.MPI_SOURCE;
      break;
    case snet_ref_update:
      result.ref = SNetRefDeserialise(&recvBuf, &UnpackInt, &UnpackByte);
      UnpackInt(&recvBuf, 1, &result.val);
      break;
    default:
      break;
  }

  return result;
}
예제 #13
0
파일: main.c 프로젝트: pallamidessi/Courses
/*-------------------------------------------------------------------------------*/
void OneStepCirculation(int step)
{
 MPI_Status   status;
 int n = SIZE * LOCAL_SIZE;
 int m = 1;
 int sizeOneMsg;

 MPI_Pack_size(n, MPI_DOUBLE, MPI_COMM_WORLD, &sizeOneMsg);
 int size = m * (sizeOneMsg + MPI_BSEND_OVERHEAD);
 
 double *buf = (double*) malloc(size);
 MPI_Buffer_attach(buf, size);
 MPI_Bsend(A_Slice, SIZE * LOCAL_SIZE, MPI_DOUBLE, ((Me - 1) + NbPE) % NbPE, 0, MPI_COMM_WORLD);
 MPI_Recv(A_Slice, SIZE * LOCAL_SIZE, MPI_DOUBLE, ((Me + 1)) % NbPE, 0, MPI_COMM_WORLD, &status);
 MPI_Buffer_detach(&buf, &size);
/******************************** TO DO ******************************************/
}
예제 #14
0
파일: pack_size_f.c 프로젝트: aosm/openmpi
void mpi_pack_size_f(MPI_Fint *incount, MPI_Fint *datatype, 
		     MPI_Fint *comm, MPI_Fint *size, MPI_Fint *ierr)
{
    MPI_Comm c_comm;
    MPI_Datatype c_type;
    OMPI_SINGLE_NAME_DECL(size);
    
    c_comm = MPI_Comm_f2c(*comm);
    c_type = MPI_Type_f2c(*datatype);

    *ierr = OMPI_INT_2_FINT(MPI_Pack_size(OMPI_FINT_2_INT(*incount),
					  c_type, c_comm, 
					  OMPI_SINGLE_NAME_CONVERT(size)));

    if (MPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) {
        OMPI_SINGLE_INT_2_FINT(size);
    }
}
예제 #15
0
template < class T > void
AbstractCommunicator::packArray (const T * p, int n)
{
  int size;
  int newsize;
  unsigned char *old;
  MPI_Datatype type;

  if(n > 0) {
    type = getMPIType < T > ();
    MPI_Pack_size (n, type, *(MPI_Comm*)mAuxData, &size);
    newsize = mSendBufferPosition + size;
    mSendBuffer.resize(newsize);
    MPI_Pack ((void *) p, n, type, mSendBuffer.getBuf(), mSendBuffer.getSize(),
	      &(mSendBufferPosition), *(MPI_Comm*)mAuxData);
  }

}
예제 #16
0
bool L::SendBuffer::pack (const void* buf, int n, MPI_Datatype t)
{
   if (! full)
   {
      int maxSize;

      MPI_Pack_size (n, t, comm, &maxSize);

      full = pos + maxSize > SIZE;

      if (! full)
      {
         MPI_Pack (const_cast<void*> (buf), n, t, data, SIZE, &pos, comm);
      }
   }

   return !full;
}
예제 #17
0
파일: vsgpackedmsg.c 프로젝트: pigay/vsg
/**
 * vsg_packed_msg_send_append:
 * @pm: a #VsgPackedMsg.
 * @buf: pointer to the beginning of data to be stored.
 * @count: number of @type data to store.
 * @type: type of the data to be stored.
 *
 * Appends @count instances of @type data to the message buffer.
 */
void vsg_packed_msg_send_append (VsgPackedMsg *pm, gpointer buf,
                                 gint count, MPI_Datatype type)
{
  gint pos, size, addsize;
  gint ierr;

  g_return_if_fail (pm != NULL);

  g_assert (pm->own_buffer == TRUE);

#ifdef VSG_PACKED_MSG_TRACE
  /* init msg id */
  if (pm->position == 0)
    {
      if (pm->buffer == NULL) pm->buffer = g_malloc0 (_PM_ID_SIZE);
      pm->size = MAX (pm->size, _PM_ID_SIZE);
      pm->allocated = pm->size;
      pm->position = _PM_BEGIN_POS;
    }
#endif

  pos = pm->position;
  size = pm->size;

  /* compute size of this new message part */
  MPI_Pack_size (count, type, pm->communicator, &addsize);

  /* allocate enough memory in message msg to store this message */
  if ((addsize + pos) > size)
    {
      size = MAX (size + 1024, addsize+pos);

      pm->buffer = g_realloc (pm->buffer, size * sizeof (char));

      pm->size = size;
      pm->allocated = pm->size;
    }

  ierr = MPI_Pack (buf, count, type, pm->buffer, size,
		   &pm->position, pm->communicator);

  if (ierr != MPI_SUCCESS) vsg_mpi_error_output (ierr);

}
예제 #18
0
파일: ex3-7.c 프로젝트: liuzq09/MA6252_NUS
int main(int argc, char *argv[])
{
    int i,bufsize,N=1024*10;
    int myrank, nprocs,src, dest,tag;
    MPI_Status status;
    double A[N],B[N],sum;
    double *buf;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
    MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
    for(i=0;i<N;i++)
	A[i]=(double)myrank;

    MPI_Pack_size(N,MPI_DOUBLE, MPI_COMM_WORLD, &bufsize);
//    MPI_Type_size(MPI_DOUBLE,&bufsize);  
//    bufsize = N*bufsize;
    bufsize = MPI_BSEND_OVERHEAD+bufsize;//定义缓冲方式所需额外开销
    buf=(double *)malloc(bufsize);
    MPI_Buffer_attach(buf,bufsize);
    src = myrank-1;
    if(src<0)
	src=nprocs-1;
    dest = myrank+1;
    if(dest>=nprocs)
	dest = 0;
    
    tag =111;
    MPI_Bsend(A, N, MPI_DOUBLE, dest, tag, MPI_COMM_WORLD);
    MPI_Recv(B, N, MPI_DOUBLE, src,  tag, MPI_COMM_WORLD, &status);
    sum = 0.0;
    for (i=0;i<N;i++)
	sum =sum +B[i];

    printf("Process %d ,values = %f\n",myrank, (double)sum/N);
    MPI_Buffer_detach(&buf, &bufsize);
    free(buf);  
    MPI_Finalize();
    return 0;
}
예제 #19
0
/* start_idx is the "zero" point for the unpack */
static int pack_and_check_expected(MPI_Datatype type, const char *name,
                                   int start_idx, int size,
                                   int *array, int *expected)
{
    int i;
    int err, errs = 0;
    int pack_size = -1;
    int *pack_buf = NULL;
    int pos;
    int type_size = -1;
    int sendbuf[8] = {0,1,2,3,4,5,6,7};

    err = MPI_Type_size(type, &type_size);
    check_err(MPI_Type_size);
    assert(sizeof(sendbuf) >= type_size);

    err = MPI_Pack_size(type_size/sizeof(int), MPI_INT, MPI_COMM_SELF, &pack_size);
    check_err(MPI_Pack_size);
    pack_buf = malloc(pack_size);
    assert(pack_buf!=NULL);

    pos = 0;
    err = MPI_Pack(&sendbuf[0], type_size/sizeof(int), MPI_INT, pack_buf, pack_size, &pos, MPI_COMM_SELF);
    check_err(MPI_Pack);
    pos = 0;
    err = MPI_Unpack(pack_buf, pack_size, &pos, &array[start_idx], 1, type, MPI_COMM_SELF);
    check_err(MPI_Unpack);
    free(pack_buf);

    /* check against expected */
    for (i = 0; i < size; ++i) {
        if (array[i] != expected[i]) {
            errs++;
            if (verbose)
                fprintf(stderr, "%s: array[%d]=%d, should be %d\n", name, i, array[i], expected[i]);
        }
    }

    return errs;
}
예제 #20
0
파일: mpi.c 프로젝트: Denis84/EPA-WorkBench
/* Function:  p7_oprofile_MPISend()
 * Synopsis:  Send an OPROFILE as an MPI work unit.
 * Incept:    MSF, Wed Oct 21, 2009 [Janelia]
 *
 * Purpose:   Sends an OPROFILE <om> as a work unit to MPI process
 *            <dest> (where <dest> ranges from 0..<nproc-1>), tagged
 *            with MPI tag <tag>, for MPI communicator <comm>, as 
 *            the sole workunit or result. 
 *            
 *            Work units are prefixed by a status code. If <hmm> is
 *            <non-NULL>, the work unit is an <eslOK> code followed by
 *            the packed HMM. If <hmm> is NULL, the work unit is an
 *            <eslEOD> code, which <p7_hmm_MPIRecv()> knows how to
 *            interpret; this is typically used for an end-of-data
 *            signal to cleanly shut down worker processes.
 *            
 *            In order to minimize alloc/free cycles in this routine,
 *            caller passes a pointer to a working buffer <*buf> of
 *            size <*nalloc> characters. If necessary (i.e. if <hmm> is
 *            too big to fit), <*buf> will be reallocated and <*nalloc>
 *            increased to the new size. As a special case, if <*buf>
 *            is <NULL> and <*nalloc> is 0, the buffer will be
 *            allocated appropriately, but the caller is still
 *            responsible for free'ing it.
 *            
 * Returns:   <eslOK> on success; <*buf> may have been reallocated and
 *            <*nalloc> may have been increased.
 * 
 * Throws:    <eslESYS> if an MPI call fails; <eslEMEM> if a malloc/realloc
 *            fails. In either case, <*buf> and <*nalloc> remain valid and useful
 *            memory (though the contents of <*buf> are undefined). 
 * 
 * Note:      Compare to p7_hmmfile_WriteBinary(). The two operations (sending
 *            an HMM via MPI, or saving it as a binary file to disk) are
 *            similar.
 */
int
p7_oprofile_MPISend(P7_OPROFILE *om, int dest, int tag, MPI_Comm comm, char **buf, int *nalloc)
{
  int   status;
  int   code;
  int   sz, n, pos;

  /* Figure out size */
  if (MPI_Pack_size(1, MPI_INT, comm, &n) != 0) ESL_XEXCEPTION(eslESYS, "mpi pack size failed");
  if (om != NULL) {
    if ((status = p7_oprofile_MPIPackSize(om, comm, &sz)) != eslOK) return status;
    n += sz;
  }

  /* Make sure the buffer is allocated appropriately */
  if (*buf == NULL || n > *nalloc) {
    void *tmp;
    ESL_RALLOC(*buf, tmp, sizeof(char) * n);
    *nalloc = n; 
  }

  /* Pack the status code and OPROFILE into the buffer */
  pos  = 0;
  code = (om == NULL) ? eslEOD : eslOK;
  if (MPI_Pack(&code, 1, MPI_INT, *buf, n, &pos, comm) != 0) ESL_EXCEPTION(eslESYS, "mpi pack failed");
  if (om != NULL) {
    if ((status = p7_oprofile_MPIPack(om, *buf, n, &pos, comm)) != eslOK) return status;
  }

  /* Send the packed OPROFILE to the destination. */
  if (MPI_Send(*buf, n, MPI_PACKED, dest, tag, comm) != 0)  ESL_EXCEPTION(eslESYS, "mpi send failed");
  return eslOK;

 ERROR:
  return status;
}
예제 #21
0
/* pack_and_unpack()
 *
 * Perform packing and unpacking of a buffer for the purposes of checking
 * to see if we are processing a type correctly.  Zeros the buffer between
 * these two operations, so the data described by the type should be in
 * place upon return but all other regions of the buffer should be zero.
 *
 * Parameters:
 * typebuf - pointer to buffer described by datatype and count that
 *           will be packed and then unpacked into
 * count, datatype - description of typebuf
 * typebufsz - size of typebuf; used specifically to zero the buffer
 *             between the pack and unpack steps
 *
 */
static int pack_and_unpack(char *typebuf, int count, MPI_Datatype datatype, int typebufsz)
{
    char *packbuf;
    int err, errs = 0, pack_size, type_size, position;

    err = MPI_Type_size(datatype, &type_size);
    if (err != MPI_SUCCESS) {
        errs++;
        if (verbose) {
            fprintf(stderr, "error in MPI_Type_size call; aborting after %d errors\n", errs);
        }
        return errs;
    }

    type_size *= count;

    err = MPI_Pack_size(count, datatype, MPI_COMM_SELF, &pack_size);
    if (err != MPI_SUCCESS) {
        errs++;
        if (verbose) {
            fprintf(stderr, "error in MPI_Pack_size call; aborting after %d errors\n", errs);
        }
        return errs;
    }
    packbuf = (char *) malloc(pack_size);
    if (packbuf == NULL) {
        errs++;
        if (verbose) {
            fprintf(stderr, "error in malloc call; aborting after %d errors\n", errs);
        }
        return errs;
    }

    position = 0;
    err = MPI_Pack(typebuf, count, datatype, packbuf, type_size, &position, MPI_COMM_SELF);

    if (position != type_size) {
        errs++;
        if (verbose)
            fprintf(stderr, "position = %d; should be %d (pack)\n", position, type_size);
    }

    memset(typebuf, 0, typebufsz);
    position = 0;
    err = MPI_Unpack(packbuf, type_size, &position, typebuf, count, datatype, MPI_COMM_SELF);
    if (err != MPI_SUCCESS) {
        errs++;
        if (verbose) {
            fprintf(stderr, "error in MPI_Unpack call; aborting after %d errors\n", errs);
        }
        return errs;
    }
    free(packbuf);

    if (position != type_size) {
        errs++;
        if (verbose)
            fprintf(stderr, "position = %d; should be %d (unpack)\n", position, type_size);
    }

    return errs;
}
예제 #22
0
파일: pack.c 프로젝트: ACME-Climate/cime
FC_FUNC( mpi_pack_size, MPI_PACK_SIZE )(int * incount, int * datatype, 
                                          int * comm, long * size, int *ierr)
{
  *ierr = MPI_Pack_size(*incount, *datatype, *comm, size);
}
예제 #23
0
F_VOID_FUNC ztrbr2d_(int *ConTxt, F_CHAR scope, F_CHAR top, F_CHAR uplo,
                     F_CHAR diag, int *m, int *n, double *A, int *lda,
                     int *rsrc, int *csrc)
#endif
/*
 *  -- V1.1 BLACS routine --
 *  University of Tennessee, May 1, 1996
 *  Written by Clint Whaley.
 *
 *  Purpose
 *  =======
 *  Broadcast/receive for trapezoidal double complex arrays.
 *
 *  Arguments
 *  =========
 *
 *  ConTxt  (input) Ptr to int
 *          Index into MyConTxts00 (my contexts array).
 *
 *  SCOPE   (input) Ptr to char
 *          Limit the scope of the operation.
 *          = 'R' :   Operation is performed by a process row.
 *          = 'C' :   Operation is performed by a process column.
 *          = 'A' :   Operation is performed by all processes in grid.
 *
 *  TOP     (input) Ptr to char
 *          Controls fashion in which messages flow within the operation.
 *
 *  UPLO    (input) Ptr to char
 *          Specifies the part of the matrix to be sent.
 *          = 'U':      Upper trapezoidal part
 *          ELSE :      Lower trapezoidal part
 *
 *  DIAG    (input) Ptr to char
 *          Specifies whether the matrix is unit diagonal or not.
 *          = 'U':      Matrix is unit diagonal, diagonal not communicated.
 *          ELSE :      Matrix is not unit diagonal, diagonal is communicated.
 *
 *  M       (input) Ptr to int
 *          The number of rows of the matrix A.  M >= 0.
 *
 *  N       (input) Ptr to int
 *          The number of columns of the matrix A.  N >= 0.
 *
 *  A       (output) Ptr to double complex two dimensional array
 *          The m by n matrix A.  Fortran77 (column-major) storage
 *          assumed.
 *          If UPLO = 'U', only the upper trapezoid is accessed;
 *          if UPLO = 'L', only the lower trapezoid is accessed.
 *
 *  LDA     (input) Ptr to int
 *          The leading dimension of the array A.  LDA >= M.
 *
 *
 *  RSRC    (input) Ptr to int
 *          The process row of the source of the matrix.
 *
 *  CSRC    (input) Ptr to int
 *          The process column of the source of the matrix.
 *
 *
 * ------------------------------------------------------------------------
 */
{
   void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
                    int *, int *);
   int BI_HypBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_IdringBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int, int);
   void BI_SringBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int);
   void BI_MpathBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int, int);
   void BI_TreeBR(BLACSCONTEXT *, BLACBUFF *, SDRVPTR, int, int);
   void BI_UpdateBuffs(BLACBUFF *);
   BLACBUFF *BI_GetBuff(int);
   int BI_BuffIsFree(BLACBUFF *, int);
   MPI_Datatype BI_GetMpiTrType(BLACSCONTEXT *, char, char, int, int, int,
                                   MPI_Datatype, int *);
   void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, MPI_Datatype);
   void BI_Ssend(BLACSCONTEXT *, int, int, BLACBUFF *);
   void BI_Asend(BLACSCONTEXT *, int, int, BLACBUFF *);

   BLACSCONTEXT *ctxt;
   BLACBUFF *bp=NULL;
   SDRVPTR send;
   MPI_Datatype MatTyp;
   int length, src, tlda, error, one=1;
   char ttop, tscope, tuplo, tdiag;
   extern BLACBUFF BI_AuxBuff, *BI_ActiveQ;

   MGetConTxt(Mpval(ConTxt), ctxt);
   ttop = F2C_CharTrans(top);
   ttop = Mlowcase(ttop);
   tscope = F2C_CharTrans(scope);
   tscope = Mlowcase(tscope);
   tdiag = F2C_CharTrans(diag);
   tdiag = Mlowcase(tdiag);
   tuplo = F2C_CharTrans(uplo);
   tuplo = Mlowcase(tuplo);
#if (BlacsDebugLvl > 0)
   BI_ArgCheck(Mpval(ConTxt), RT_BR, __FILE__, tscope, tuplo, tdiag, Mpval(m),
               Mpval(n), Mpval(lda), 1, Mpaddress(rsrc), Mpaddress(csrc));
#endif
/*
 *  If the user has set the default broadcast topology, use it instead of
 *  BLACS default
 */
#ifdef DefBSTop
   if (ttop == ' ') ttop = DefBSTop;
#endif
   if (Mpval(m) <= Mpval(lda)) tlda = Mpval(lda);
   else tlda = Mpval(m);

   switch(tscope)
   {
   case 'r':
      ctxt->scp = &ctxt->rscp;
      src = Mpval(csrc);
      break;
   case 'c':
      ctxt->scp = &ctxt->cscp;
      src = Mpval(rsrc);
      break;
   case 'a':
      ctxt->scp = &ctxt->ascp;
      src = Mvkpnum(ctxt, Mpval(rsrc), Mpval(csrc));
      break;
   default:
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
                  tscope);
   }

   MatTyp = BI_GetMpiTrType(ctxt, tuplo, tdiag, Mpval(m), Mpval(n), tlda,
                            MPI_DOUBLE_COMPLEX, &BI_AuxBuff.N);
/*
 * If using default topology, use MPI native broadcast
 */
   if (ttop == ' ')
   {
      error=MPI_Bcast(A, BI_AuxBuff.N, MatTyp, src, ctxt->scp->comm);
      error=MPI_Type_free(&MatTyp);
      if (BI_ActiveQ) BI_UpdateBuffs(NULL);
      return;
   }
/*
 * If MPI handles non-contiguous buffering well, always use MPI data types
 * instead of packing
 */
#ifdef MpiBuffGood
   send = BI_Ssend;
   BI_AuxBuff.Buff = (char *) A;
   BI_AuxBuff.dtype = MatTyp;
   bp = &BI_AuxBuff;
#else

   send = BI_Asend;
   MPI_Pack_size(one, MatTyp, ctxt->scp->comm, &length, error);
   bp = BI_GetBuff(length);
   bp->N = length;
   bp->dtype = MPI_PACKED;
#if ZeroByteTypeBug
   if (MatTyp == MPI_BYTE)
   {
      send = BI_Ssend;
      bp->N = 0;
      bp->dtype = MPI_BYTE;
   }
#endif

#endif

   switch(ttop)
   {
   case 'h':
      error = BI_HypBR(ctxt, bp, send, src);
      if (error == NPOW2) BI_TreeBR(ctxt, bp, send, src, 2);
      break;
   case '1':
   case '2':
   case '3':
   case '4':
   case '5':
   case '6':
   case '7':
   case '8':
   case '9':
      BI_TreeBR(ctxt, bp, send, src, ttop-47);
      break;
   case 't':
      BI_TreeBR(ctxt, bp, send, src, ctxt->Nb_bs);
      break;
   case 'i':
      BI_IdringBR(ctxt, bp, send, src, 1);
      break;
   case 'd':
      BI_IdringBR(ctxt, bp, send, src, -1);
      break;
   case 's':
      BI_SringBR(ctxt, bp, send, src);
      break;
   case 'm':
      BI_MpathBR(ctxt, bp, send, src, ctxt->Nr_bs);
      break;
   case 'f':
      BI_MpathBR(ctxt, bp, send, src, FULLCON);
      break;
   default :
      BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
                  ttop);
   }

#ifdef MpiBuffGood
   error=MPI_Type_free(&MatTyp);
   if (BI_ActiveQ) BI_UpdateBuffs(NULL);
#endif
#ifndef MpiBuffGood
   BI_Unpack(ctxt, (BVOID *) A, bp, MatTyp);
   BI_UpdateBuffs(bp);
#endif
}
예제 #24
0
파일: nestvec2.c 프로젝트: NexMirror/MPICH
int main(int argc, char **argv)
{
    int vcount, vstride;
    int32_t counts[2];
    int v2stride, typesize, packsize, i, position, errs = 0;
    double *outbuf, *outbuf2;
    double *vsource;
    MPI_Datatype vtype, stype;
    MPI_Aint lb, extent;
    double t0, t1;
    double tspack, tvpack, tmanual;
    int ntry;
    int blocklengths[2];
    MPI_Aint displacements[2];
    MPI_Datatype typesArray[2];

    MPI_Init(&argc, &argv);

    /* Create a struct consisting of a two 32-bit ints, followed by a
     * vector of stride 3 but count 128k (less than a few MB of data area) */
    vcount = 128000;
    vstride = 3;
    MPI_Type_vector(vcount, 1, vstride, MPI_DOUBLE, &vtype);

    vsource = (double *) malloc((vcount + 1) * (vstride + 1) * sizeof(double));
    if (!vsource) {
        fprintf(stderr, "Unable to allocate vsource\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    for (i = 0; i < vcount * vstride; i++) {
        vsource[i] = i;
    }
    blocklengths[0] = 2;
    MPI_Get_address(&counts[0], &displacements[0]);
    blocklengths[1] = 1;
    MPI_Get_address(vsource, &displacements[1]);
    if (verbose) {
        printf("%p = %p?\n", vsource, (void *) displacements[1]);
    }
    typesArray[0] = MPI_INT32_T;
    typesArray[1] = vtype;
    MPI_Type_create_struct(2, blocklengths, displacements, typesArray, &stype);
    MPI_Type_commit(&stype);
    MPI_Type_commit(&vtype);

#if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
    /* To use MPIR_Datatype_debug to print the datatype internals,
     * you must configure MPICH with --enable-g=log */
    if (verbose) {
        printf("Original struct datatype:\n");
        MPIR_Datatype_debug(stype, 10);
    }
#endif

    MPI_Pack_size(1, stype, MPI_COMM_WORLD, &packsize);
    outbuf = (double *) malloc(packsize);
    outbuf2 = (double *) malloc(packsize);
    if (!outbuf) {
        fprintf(stderr, "Unable to allocate %ld for outbuf\n", (long) packsize);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    if (!outbuf2) {
        fprintf(stderr, "Unable to allocate %ld for outbuf2\n", (long) packsize);
        MPI_Abort(MPI_COMM_WORLD, 1);
    }
    position = 0;
    /* Warm up the code and data */
    MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);

    tspack = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
        position = 0;
        t0 = MPI_Wtime();
        MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);
        t1 = MPI_Wtime() - t0;
        if (t1 < tspack)
            tspack = t1;
    }
    MPI_Type_free(&stype);

    /* An equivalent packing, using the 2 ints and the vector separately */
    tvpack = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
        position = 0;
        t0 = MPI_Wtime();
        MPI_Pack(counts, 2, MPI_INT32_T, outbuf, packsize, &position, MPI_COMM_WORLD);
        MPI_Pack(vsource, 1, vtype, outbuf, packsize, &position, MPI_COMM_WORLD);
        t1 = MPI_Wtime() - t0;
        if (t1 < tvpack)
            tvpack = t1;
    }
    MPI_Type_free(&vtype);

    /* Note that we exploit the fact that the vector type contains vblock
     * instances of a contiguous type of size 24, or a single block of 24*vblock
     * bytes.
     */
    tmanual = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
        const double *restrict ppe = (const double *) vsource;
        double *restrict ppo = outbuf2;
        int j;
        t0 = MPI_Wtime();
        position = 0;
        *(int32_t *) ppo = counts[0];
        *(((int32_t *) ppo) + 1) = counts[1];
        ppo++;
        /* Some hand optimization because this file is not normally
         * compiled with optimization by the test suite */
        j = vcount;
        while (j) {
            *ppo++ = *ppe;
            ppe += vstride;
            *ppo++ = *ppe;
            ppe += vstride;
            *ppo++ = *ppe;
            ppe += vstride;
            *ppo++ = *ppe;
            ppe += vstride;
            j -= 4;
        }
        position += (1 + vcount);
        position *= sizeof(double);
        t1 = MPI_Wtime() - t0;
        if (t1 < tmanual)
            tmanual = t1;

        /* Check on correctness */
#ifdef PACK_IS_NATIVE
        if (memcmp(outbuf, outbuf2, position) != 0) {
            printf("Panic(manual) - pack buffers differ\n");
            for (j = 0; j < 8; j++) {
                printf("%d: %llx\t%llx\n", j, (long long unsigned) outbuf[j],
                       (long long unsigned) outbuf2[j]);
            }
        }
#endif
    }

    if (verbose) {
        printf("Bytes packed = %d\n", position);
        printf("MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n",
               tspack, tvpack, tmanual);
    }

    if (4 * tmanual < tspack) {
        errs++;
        printf("MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack,
               tmanual);
        printf("MPI_Pack time should be less than 4 times the manual time\n");
        printf("For most informative results, be sure to compile this test with optimization\n");
    }
    if (4 * tmanual < tvpack) {
        errs++;
        printf("MPI_Pack using vector = %e, manual pack time = %e\n", tvpack, tmanual);
        printf("MPI_Pack time should be less than 4 times the manual time\n");
        printf("For most informative results, be sure to compile this test with optimization\n");
    }
    if (4 * tvpack < tspack) {
        errs++;
        printf("MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack);
        printf
            ("MPI_Pack time using vector should be about the same as the struct containing the vector\n");
        printf("For most informative results, be sure to compile this test with optimization\n");
    }

    if (errs) {
        printf(" Found %d errors\n", errs);
    }
    else {
        printf(" No Errors\n");
    }

    free(vsource);
    free(outbuf);
    free(outbuf2);

    MPI_Finalize();
    return 0;
}
예제 #25
0
파일: nestvec.c 프로젝트: jimmycao/mpi-test
int main( int argc, char **argv )
{
    int          vcount = 16, vblock = vcount*vcount/2, vstride=2*vcount*vblock;
    int          v2stride, typesize, packsize, i, position, errs = 0;
    char         *inbuf, *outbuf, *outbuf2;
    MPI_Datatype ft1type, ft2type, ft3type;
    MPI_Datatype ftopttype;
    MPI_Aint     lb, extent;
    double       t0, t1;
    double       tpack, tmanual, tpackopt;
    int          ntry;

    MPI_Init( &argc, &argv );
    
    MPI_Type_contiguous( 6, MPI_FLOAT, &ft1type );
    MPI_Type_size( ft1type, &typesize );
    v2stride = vcount * vcount * vcount * vcount * typesize;
    MPI_Type_vector( vcount, vblock, vstride, ft1type, &ft2type );
    MPI_Type_create_hvector( 2, 1, v2stride, ft2type, &ft3type );
    MPI_Type_commit( &ft3type );
    MPI_Type_free( &ft1type );
    MPI_Type_free( &ft2type );
#if defined(MPICH2) && defined(PRINT_DATATYPE_INTERNALS)
    /* To use MPIDU_Datatype_debug to print the datatype internals,
       you must configure MPICH2 with --enable-g=log */
    if (verbose) {
	printf( "Original datatype:\n" );
	MPIDU_Datatype_debug( ft3type, 10 );
    }
#endif
    /* The same type, but without using the contiguous type */
    MPI_Type_vector( vcount, 6*vblock, 6*vstride, MPI_FLOAT, &ft2type );
    MPI_Type_create_hvector( 2, 1, v2stride, ft2type, &ftopttype );
    MPI_Type_commit( &ftopttype );
    MPI_Type_free( &ft2type );
#if defined(MPICH2) && defined(PRINT_DATATYPE_INTERNALS)
    if (verbose) {
	printf( "\n\nMerged datatype:\n" );
	MPIDU_Datatype_debug( ftopttype, 10 );
    }
#endif

    MPI_Type_get_extent( ft3type, &lb, &extent );
    MPI_Type_size( ft3type, &typesize );

    MPI_Pack_size( 1, ft3type, MPI_COMM_WORLD, &packsize );

    inbuf   = (char *)malloc( extent );
    outbuf  = (char *)malloc( packsize );
    outbuf2 = (char *)malloc( packsize );
    if (!inbuf) {
	fprintf( stderr, "Unable to allocate %ld for inbuf\n", (long)extent );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    if (!outbuf) {
	fprintf( stderr, "Unable to allocate %ld for outbuf\n", (long)packsize );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    if (!outbuf2) {
	fprintf( stderr, "Unable to allocate %ld for outbuf2\n", (long)packsize );
	MPI_Abort( MPI_COMM_WORLD, 1 );
    }
    for (i=0; i<extent; i++) {
	inbuf[i] = i & 0x7f;
    }
    position = 0;
    /* Warm up the code and data */
    MPI_Pack( inbuf, 1, ft3type, outbuf, packsize, &position, MPI_COMM_WORLD );

    /* Pack using the vector of vector of contiguous */
    tpack = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
	position = 0;
	t0 = MPI_Wtime();
	MPI_Pack( inbuf, 1, ft3type, outbuf, packsize, &position, 
		  MPI_COMM_WORLD );
	t1 = MPI_Wtime() - t0;
	if (t1 < tpack) tpack = t1;
    }
    MPI_Type_free( &ft3type );

    /* Pack using vector of vector with big blocks (same type map) */
    tpackopt = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
	position = 0;
	t0 = MPI_Wtime();
	MPI_Pack( inbuf, 1, ftopttype, outbuf, packsize, &position, 
		  MPI_COMM_WORLD );
	t1 = MPI_Wtime() - t0;
	if (t1 < tpackopt) tpackopt = t1;
    }
    MPI_Type_free( &ftopttype );

    /* User (manual) packing code.
       Note that we exploit the fact that the vector type contains vblock 
       instances of a contiguous type of size 24, or equivalently a 
       single block of 24*vblock bytes.
    */
    tmanual = 1e12;
    for (ntry = 0; ntry < 5; ntry++) {
	const char *ppe = (const char *)inbuf;
	int k, j;
	t0 = MPI_Wtime();
	position = 0;
	for (k=0; k<2; k++) {  /* hvector count; blocksize is 1 */
	    const char *ptr = ppe;
	    for (j=0; j<vcount; j++) { /* vector count */
		memcpy( outbuf2 + position, ptr, 24*vblock );
		ptr      += vstride * 24;
		position += 24*vblock;
	    }
	    ppe += v2stride;
	}
	t1 = MPI_Wtime() - t0;
	if (t1 < tmanual) tmanual = t1;

	/* Check on correctness */
#ifdef PACK_IS_NATIVE
	if (memcmp( outbuf, outbuf2, position ) != 0) {
	    printf( "Panic - pack buffers differ\n" );
	}
#endif
    }

    if (verbose) {
	printf( "Bytes packed = %d\n", position );
	printf( "MPI_Pack time = %e, opt version = %e, manual pack time = %e\n", 
		tpack, tpackopt, tmanual );
    }

    /* A factor of 4 is extremely generous, especially since the test suite
       no longer builds any of the tests with optimization */
    if (4 * tmanual < tpack) {
	errs++;
	printf( "MPI_Pack time = %e, manual pack time = %e\n", tpack, tmanual );
	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
	printf( "For most informative results, be sure to compile this test with optimization\n" );
    }
    if (4 * tmanual < tpackopt) {
	errs++;
	printf( "MPI_Pack with opt = %e, manual pack time = %e\n", tpackopt, 
		tmanual );
	printf( "MPI_Pack time should be less than 4 times the manual time\n" );
	printf( "For most informative results, be sure to compile this test with optimization\n" );
    }
    if (errs) {
        printf( " Found %d errors\n", errs );
    }
    else {
        printf( " No Errors\n" );
    } 
    
    free( inbuf );
    free( outbuf );
    free( outbuf2 );

    MPI_Finalize();
    return 0;
}
예제 #26
0
int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
                                         struct ompi_datatype_t *sdtype,
                                         void* rbuf, int rcount,
                                         struct ompi_datatype_t *rdtype,
                                         struct ompi_communicator_t *comm)
{
    int i, k, line = -1;
    int rank, size;
    int sendto, recvfrom, distance, *displs=NULL, *blen=NULL;
    int maxpacksize, packsize, position;
    char * tmpbuf=NULL, *packbuf=NULL;
    ptrdiff_t lb, sext, rext;
    int err = 0;
    int weallocated = 0;
    MPI_Datatype iddt;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_bruck rank %d", rank));

    err = ompi_ddt_get_extent (sdtype, &lb, &sext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }

    err = ompi_ddt_get_extent (rdtype, &lb, &rext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }


#ifdef blahblah
    /* try and SAVE memory by using the data segment hung off the communicator if possible */
    if (comm->c_coll_selected_data->mcct_num_reqs >= size) { 
        /* we have enought preallocated for displments and lengths */
        displs = (int*) comm->c_coll_basic_data->mcct_reqs;
        blen = (int *) (displs + size);
        weallocated = 0;
    } 
    else { /* allocate the buffers ourself */
#endif
        displs = (int *) malloc(size*sizeof(int));
        if (displs == NULL) { line = __LINE__; err = -1; goto err_hndl; }
        blen = (int *) malloc(size*sizeof(int));
        if (blen == NULL) { line = __LINE__; err = -1; goto err_hndl; }
        weallocated = 1;
#ifdef blahblah
    }
#endif


    /* Prepare for packing data */
    err = MPI_Pack_size( scount*size, sdtype, comm, &maxpacksize );
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

    /* pack buffer allocation */
    packbuf = (char*) malloc((unsigned) maxpacksize);
    if (packbuf == NULL) { line = __LINE__; err = -1; goto err_hndl; }

    /* tmp buffer allocation for message data */
    tmpbuf = (char *) malloc(scount*size*sext);
    if (tmpbuf == NULL) { line = __LINE__; err = -1; goto err_hndl; }


    /* Step 1 - local rotation - shift up by rank */
    err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) ((size-rank)*scount),
                                          tmpbuf, ((char*)sbuf)+rank*scount*sext);
    if (err<0) {
        line = __LINE__; err = -1; goto err_hndl;
    }

    if (rank != 0) {
        err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) (rank*scount),
                                              tmpbuf+(size-rank)*scount*sext, (char*)sbuf);
        if (err<0) {
            line = __LINE__; err = -1; goto err_hndl;
        }
    }

    /* perform communication step */
    for (distance = 1; distance < size; distance<<=1) {

        /* send data to "sendto" */
        sendto = (rank+distance)%size;
        recvfrom = (rank-distance+size)%size;
        packsize = 0;
        k = 0;

        /* create indexed datatype */
        for (i = 1; i < size; i++) {
            if ((i&distance) == distance) {
                displs[k] = i*scount; blen[k] = scount;
                k++;
            }
        }
        /* Set indexes and displacements */
        err = MPI_Type_indexed(k, blen, displs, sdtype, &iddt);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
        /* Commit the new datatype */
        err = MPI_Type_commit(&iddt);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

        /* have the new distribution ddt, pack and exchange data */
        err = MPI_Pack(tmpbuf, 1, iddt, packbuf, maxpacksize, &packsize, comm);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

        /* Sendreceive */
        err = ompi_coll_tuned_sendrecv ( packbuf, packsize, MPI_PACKED, sendto, 
                                         MCA_COLL_BASE_TAG_ALLTOALL,
                                         rbuf, packsize, MPI_PACKED, recvfrom, 
                                         MCA_COLL_BASE_TAG_ALLTOALL,
                                         comm, MPI_STATUS_IGNORE, rank);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }

        /* Unpack data from rbuf to tmpbuf */
        position = 0;
        err = MPI_Unpack(rbuf, packsize, &position,
                         tmpbuf, 1, iddt, comm);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }

        /* free ddt */
        err = MPI_Type_free(&iddt);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
    } /* end of for (distance = 1... */

    /* Step 3 - local rotation - */
    for (i = 0; i < size; i++) {

        err = ompi_ddt_copy_content_same_ddt (rdtype, (int32_t) rcount,
                                              ((char*)rbuf)+(((rank-i+size)%size)*rcount*rext), 
                                              tmpbuf+i*rcount*rext);
        if (err<0) {
            line = __LINE__; err = -1; goto err_hndl;
        }
    }

    /* Step 4 - clean up */
    if (tmpbuf != NULL) free(tmpbuf);
    if (packbuf != NULL) free(packbuf);
    if (weallocated) {
        if (displs != NULL) free(displs);
        if (blen != NULL) free(blen);
    }
    return OMPI_SUCCESS;

 err_hndl:
    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
    if (tmpbuf != NULL) free(tmpbuf);
    if (packbuf != NULL) free(packbuf);
    if (weallocated) {
        if (displs != NULL) free(displs);
        if (blen != NULL) free(blen);
    }
    return err;
}
예제 #27
0
파일: setubv.c 프로젝트: pratikmallya/AUTO
int 
setubv_mpi_wrapper(integer ndim, integer na, integer ncol, integer nint,
#ifdef MANIFOLD
       integer nalc,
#endif
       integer ncb, integer nrc, integer nra, integer nca,
       integer ndxloc, iap_type *iap, rap_type *rap, doublereal *par, integer *icp,
       doublereal **fa, doublereal *fc,
       doublereal *rldot, doublereal **ups, doublereal **uoldps, doublereal **udotps, doublereal **upoldp,
       doublereal *dtm, doublereal *thl, doublereal *thu,
       doublereal *wi, doublereal **wp, doublereal **wt)
{
  integer loop_start,loop_end,local_na;
  int i,comm_size;
  int *fa_counts,*fa_displacements;
  int *dtm_counts,*dtm_displacements;

  MPI_Comm_size(MPI_COMM_WORLD,&comm_size);
  fa_counts=(int *)malloc(sizeof(int)*comm_size);
  fa_displacements=(int *)malloc(sizeof(int)*comm_size);
  dtm_counts=(int *)malloc(sizeof(int)*comm_size);
  dtm_displacements=(int *)malloc(sizeof(int)*comm_size);
  fa_counts[0] = 0;
  fa_displacements[0] = 0;
  dtm_counts[0] = 0;
  dtm_displacements[0] = 0;
  
  for(i=1;i<comm_size;i++){
    
    /*Send message to get worker into setubv mode*/
    {
      int message=AUTO_MPI_SETUBV_MESSAGE;
      MPI_Send(&message,1,MPI_INT,i,0,MPI_COMM_WORLD);
    }
    loop_start = ((i-1)*na)/(comm_size - 1);
    loop_end = (i*na)/(comm_size - 1);
    fa_counts[i] = ndim*ncol*(loop_end-loop_start);
    fa_displacements[i] = ndim*ncol*loop_start;
    dtm_counts[i] = (loop_end-loop_start);
    dtm_displacements[i] = (loop_start);

    local_na = loop_end-loop_start;
    MPI_Send(&local_na ,1,MPI_LONG,i,0,MPI_COMM_WORLD);
    MPI_Send(&loop_start    ,1,MPI_LONG,i,0,MPI_COMM_WORLD);
  }

  {
    integer params[11];
    params[0]=ndim;
    params[1]=ncol;
    params[2]=nint;
    params[3]=ncb;
    params[4]=nrc;
    params[5]=nra;
    params[6]=nca;
    params[7]=ndxloc;
#ifdef MANIFOLD
    params[8]=nalc;
    MPI_Bcast(params     ,9,MPI_LONG,0,MPI_COMM_WORLD);
#else
    MPI_Bcast(params     ,8,MPI_LONG,0,MPI_COMM_WORLD);
#endif
  }    

  {
    int position=0;
    void *buffer;
    int bufsize;
    int size_int,size_double;
    int niap,nrap;
    /* Here we compute the number of elements in the iap and rap structures.
       Since each of the structures is homogeneous we just divide the total
       size by the size of the individual elements.*/
    niap = sizeof(iap_type)/sizeof(integer);
    nrap = sizeof(rap_type)/sizeof(doublereal);
    MPI_Pack_size(niap+NPARX,MPI_LONG,MPI_COMM_WORLD,&size_int);
    MPI_Pack_size(nrap+NPARX2+
		  ndxloc*ndim*ncol+
		  ndxloc*ndim*ncol+
		  (ncol + 1)*ncol+
		  (ncol + 1)*ncol+
		  (ncol + 1)+
		  ndxloc*ndim*ncol+
		  ndxloc*ndim*ncol+
		  ndim*8+
		  ncb+
		  NPARX,
		  MPI_DOUBLE,MPI_COMM_WORLD,&size_double);
    bufsize = size_int + size_double;
    buffer=malloc((unsigned)bufsize);

    MPI_Pack(iap    ,niap,MPI_LONG,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(rap    ,nrap,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    /**********************************************/
    MPI_Pack(par    ,NPARX2,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(icp    ,NPARX,MPI_LONG,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(ups[0] ,ndxloc*ndim*ncol,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(uoldps[0],ndxloc*ndim*ncol,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(wp[0]  ,(ncol + 1)*ncol,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(wt[0]  ,(ncol + 1)*ncol,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(wi     ,(ncol + 1),MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(udotps[0],ndxloc*ndim*ncol,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(upoldp[0],ndxloc*ndim*ncol,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);

    MPI_Pack(thu    ,ndim*8,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(thl    ,ncb,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    MPI_Pack(rldot  ,NPARX,MPI_DOUBLE,buffer,bufsize,&position,MPI_COMM_WORLD);
    
    MPI_Bcast(buffer     ,position,MPI_PACKED,0,MPI_COMM_WORLD);
  }

  MPI_Scatterv(dtm        ,dtm_counts,dtm_displacements,MPI_DOUBLE,
	       NULL,0,MPI_DOUBLE,
	       0,MPI_COMM_WORLD);

  /* Worker runs here */

  MPI_Gatherv(NULL,0,MPI_DOUBLE,
	      fa[0],fa_counts,fa_displacements,MPI_DOUBLE,
	      0,MPI_COMM_WORLD);
  {
    /*I create a temporary send buffer for the MPI_Reduce
      command.  This is because there isn't an
      asymmetric version (like MPI_Scatterv).*/
    double *fctemp = malloc(nint*sizeof(doublereal));
    for(i=0;i<nint;i++)
      fctemp[i]=fc[i];
    MPI_Reduce(fctemp,fc,nint,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD);
    free(fctemp);
  }
  
  return 0;
}
예제 #28
0
/*
 * This is a simple program that tests bsend.  It may be run as a single
 * process to simplify debugging; in addition, bsend allows send-to-self
 * programs.
 */
int main(int argc, char *argv[])
{
    MPI_Comm comm = MPI_COMM_WORLD;
    int dest = 0, src = 0, tag = 1;
    int s1, s2, s3;
    char *buf, *bbuf;
    char msg1[7], msg3[17];
    double msg2[2];
    char rmsg1[64], rmsg3[64];
    double rmsg2[64];
    int errs = 0, rank;
    int bufsize, bsize;

    MTest_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    /* According to the standard, we must use the PACK_SIZE length of each
     * message in the computation of the message buffer size */
    MPI_Pack_size(7, MPI_CHAR, comm, &s1);
    MPI_Pack_size(2, MPI_DOUBLE, comm, &s2);
    MPI_Pack_size(17, MPI_CHAR, comm, &s3);
    bufsize = 3 * MPI_BSEND_OVERHEAD + s1 + s2 + s3;
    buf = (char *) malloc(bufsize);
    MPI_Buffer_attach(buf, bufsize);

    strncpy(msg1, "012345", 7);
    strncpy(msg3, "0123401234012341", 17);
    msg2[0] = 1.23;
    msg2[1] = 3.21;

    if (rank == src) {
        /* These message sizes are chosen to expose any alignment problems */
        MPI_Bsend(msg1, 7, MPI_CHAR, dest, tag, comm);
        MPI_Bsend(msg2, 2, MPI_DOUBLE, dest, tag, comm);
        MPI_Bsend(msg3, 17, MPI_CHAR, dest, tag, comm);
    }

    if (rank == dest) {
        MPI_Recv(rmsg1, 7, MPI_CHAR, src, tag, comm, MPI_STATUS_IGNORE);
        MPI_Recv(rmsg2, 10, MPI_DOUBLE, src, tag, comm, MPI_STATUS_IGNORE);
        MPI_Recv(rmsg3, 17, MPI_CHAR, src, tag, comm, MPI_STATUS_IGNORE);

        if (strcmp(rmsg1, msg1) != 0) {
            errs++;
            fprintf(stderr, "message 1 (%s) should be %s\n", rmsg1, msg1);
        }
        if (rmsg2[0] != msg2[0] || rmsg2[1] != msg2[1]) {
            errs++;
            fprintf(stderr,
                    "message 2 incorrect, values are (%f,%f) but should be (%f,%f)\n",
                    rmsg2[0], rmsg2[1], msg2[0], msg2[1]);
        }
        if (strcmp(rmsg3, msg3) != 0) {
            errs++;
            fprintf(stderr, "message 3 (%s) should be %s\n", rmsg3, msg3);
        }
    }

    /* We can't guarantee that messages arrive until the detach */
    MPI_Buffer_detach(&bbuf, &bsize);

    MTest_Finalize(errs);

    MPI_Finalize();
    return 0;
}
예제 #29
0
static void
mpi_worker(const ESL_GETOPTS *go, struct cfg_s *cfg)
{
  int           xstatus = eslOK;
  int           status;
  int           type;
  P7_BUILDER   *bld         = NULL;
  ESL_MSA      *msa         = NULL;
  ESL_MSA      *postmsa     = NULL;
  ESL_MSA     **postmsa_ptr = (cfg->postmsafile != NULL) ? &postmsa : NULL;
  P7_HMM       *hmm         = NULL;
  P7_BG        *bg          = NULL;
  char         *wbuf        = NULL;	/* packed send/recv buffer  */
  void         *tmp;			/* for reallocation of wbuf */
  int           wn          = 0;	/* allocation size for wbuf */
  int           sz, n;		        /* size of a packed message */
  int           pos;
  char          errmsg[eslERRBUFSIZE];

  /* After master initialization: master broadcasts its status.
   */
  MPI_Bcast(&xstatus, 1, MPI_INT, 0, MPI_COMM_WORLD);
  if (xstatus != eslOK) return; /* master saw an error code; workers do an immediate normal shutdown. */
  ESL_DPRINTF2(("worker %d: sees that master has initialized\n", cfg->my_rank));
  
  /* Master now broadcasts worker initialization information (alphabet type) 
   * Workers returns their status post-initialization.
   * Initial allocation of wbuf must be large enough to guarantee that
   * we can pack an error result into it, because after initialization,
   * errors will be returned as packed (code, errmsg) messages.
   */
  MPI_Bcast(&type, 1, MPI_INT, 0, MPI_COMM_WORLD);
  if (xstatus == eslOK) { if ((cfg->abc = esl_alphabet_Create(type))      == NULL)    xstatus = eslEMEM; }
  if (xstatus == eslOK) { wn = 4096;  if ((wbuf = malloc(wn * sizeof(char))) == NULL) xstatus = eslEMEM; }
  if (xstatus == eslOK) { if ((bld = p7_builder_Create(go, cfg->abc))     == NULL)    xstatus = eslEMEM; }
  MPI_Reduce(&xstatus, &status, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); /* everyone sends xstatus back to master */
  if (xstatus != eslOK) {
    if (wbuf != NULL) free(wbuf);
    if (bld  != NULL) p7_builder_Destroy(bld);
    return; /* shutdown; we passed the error back for the master to deal with. */
  }

  bg = p7_bg_Create(cfg->abc);

  ESL_DPRINTF2(("worker %d: initialized\n", cfg->my_rank));

                      /* source = 0 (master); tag = 0 */
  while (esl_msa_MPIRecv(0, 0, MPI_COMM_WORLD, cfg->abc, &wbuf, &wn, &msa) == eslOK) 
    {
      /* Build the HMM */
      ESL_DPRINTF2(("worker %d: has received MSA %s (%d columns, %d seqs)\n", cfg->my_rank, msa->name, msa->alen, msa->nseq));
      if ((status = p7_Builder(bld, msa, bg, &hmm, NULL, NULL, NULL, postmsa_ptr)) != eslOK) { strcpy(errmsg, bld->errbuf); goto ERROR; }
      ESL_DPRINTF2(("worker %d: has produced an HMM %s\n", cfg->my_rank, hmm->name));

      /* Calculate upper bound on size of sending status, HMM, and optional postmsa; make sure wbuf can hold it. */
      n = 0;
      if (MPI_Pack_size(1,    MPI_INT, MPI_COMM_WORLD, &sz) != 0)     goto ERROR;   n += sz;
      if (p7_hmm_MPIPackSize( hmm,     MPI_COMM_WORLD, &sz) != eslOK) goto ERROR;   n += sz;
      if (esl_msa_MPIPackSize(postmsa, MPI_COMM_WORLD, &sz) != eslOK) goto ERROR;   n += sz;
      if (n > wn) { ESL_RALLOC(wbuf, tmp, sizeof(char) * n); wn = n; }
      ESL_DPRINTF2(("worker %d: has calculated that HMM will pack into %d bytes\n", cfg->my_rank, n));

      /* Send status, HMM, and optional postmsa back to the master */
      pos = 0;
      if (MPI_Pack       (&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD) != 0)     goto ERROR;
      if (p7_hmm_MPIPack (hmm,                 wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR;
      if (esl_msa_MPIPack(postmsa,             wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR;
      MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD);
      ESL_DPRINTF2(("worker %d: has sent HMM to master in message of %d bytes\n", cfg->my_rank, pos));

      esl_msa_Destroy(msa);     msa     = NULL;
      esl_msa_Destroy(postmsa); postmsa = NULL;
      p7_hmm_Destroy(hmm);      hmm     = NULL;
    }

  if (wbuf != NULL) free(wbuf);
  p7_builder_Destroy(bld);
  return;

 ERROR:
  ESL_DPRINTF2(("worker %d: fails, is sending an error message, as follows:\n%s\n", cfg->my_rank, errmsg));
  pos = 0;
  MPI_Pack(&status, 1,                MPI_INT,  wbuf, wn, &pos, MPI_COMM_WORLD);
  MPI_Pack(errmsg,  eslERRBUFSIZE,    MPI_CHAR, wbuf, wn, &pos, MPI_COMM_WORLD);
  MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD);
  if (wbuf != NULL) free(wbuf);
  if (msa  != NULL) esl_msa_Destroy(msa);
  if (hmm  != NULL) p7_hmm_Destroy(hmm);
  if (bld  != NULL) p7_builder_Destroy(bld);
  return;
}
void master(const struct fracInfo info)
{
    int ntasks, dest, msgsize;
    struct fracData *work = malloc(sizeof(*work));
    MPI_Status status;
    int rowsTaken = 0;

    MPI_Comm_size(MPI_COMM_WORLD, &ntasks);    

    size_t size = sizeof(unsigned char) * (unsigned long)info.nCols * (unsigned long)info.nRows;
    unsigned char *fractal = (unsigned char*)malloc(size);
    if(!fractal) {
        printf("fractal allocation failed, %lu bytes\n", size);
        exit(1);
    }

    // Allocate buffer
    int membersize, emptysize, fullsize;
    int position;
    char *buffer;
    MPI_Pack_size(1, MPI_INT, MPI_COMM_WORLD, &membersize);
    emptysize = membersize;
    MPI_Pack_size(1, MPI_INT, MPI_COMM_WORLD, &membersize);
    emptysize += membersize;
    MPI_Pack_size(get_max_work_size(&info), MPI_UNSIGNED_CHAR, MPI_COMM_WORLD, &membersize);
    fullsize = emptysize + membersize;

    buffer = malloc(fullsize);    
    if(!buffer) {
        printf("buffer allocation failed, %d bytes\n",fullsize);
        exit(1);
    }

    // Send initial data
    for (dest = 1; dest < ntasks; dest++) {
        //Get next work item
        get_work(&info,&rowsTaken,work);
        
        //pack and send work       
        position = 0;
        MPI_Pack(&work->startRow,1,MPI_INT,buffer,emptysize,&position,MPI_COMM_WORLD);
        MPI_Pack(&work->nRows,1,MPI_INT,buffer,emptysize,&position,MPI_COMM_WORLD);
        MPI_Send(buffer, position, MPI_PACKED, dest, WORKTAG, MPI_COMM_WORLD);
    }

    printf("sent initial work\n");
    //Get next work item
    get_work(&info,&rowsTaken,work);
    int startRow, nRows;
    while(work->nRows) {
        // Recieve and unpack work
        MPI_Recv(buffer, fullsize, MPI_PACKED, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
        position = 0;
        MPI_Get_count(&status, MPI_PACKED, &msgsize);
        MPI_Unpack(buffer, msgsize, &position, &startRow,1,MPI_INT,MPI_COMM_WORLD);
        MPI_Unpack(buffer, msgsize, &position, &nRows,1,MPI_INT,MPI_COMM_WORLD);    
        MPI_Unpack(buffer, msgsize, &position, fractal+((unsigned long)startRow*info.nCols), nRows*info.nCols, MPI_UNSIGNED_CHAR, MPI_COMM_WORLD);

        //pack and send work       
        position = 0;
        MPI_Pack(&work->startRow,1,MPI_INT,buffer,emptysize,&position,MPI_COMM_WORLD);
        MPI_Pack(&work->nRows,1,MPI_INT,buffer,emptysize,&position,MPI_COMM_WORLD);
        MPI_Send(buffer, position, MPI_PACKED, status.MPI_SOURCE, WORKTAG, MPI_COMM_WORLD);

        //Get next work item
        get_work(&info,&rowsTaken,work);

        if(status.MPI_SOURCE==1)
            printf("%d\n",work->startRow);
    }

    // Recieve all remaining work
    for (dest = 1; dest < ntasks; dest++) {
        // Recieve and unpack work
        MPI_Recv(buffer, fullsize, MPI_PACKED, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
        position = 0;
        MPI_Get_count(&status, MPI_PACKED, &msgsize);

        MPI_Unpack(buffer, msgsize, &position, &startRow,1,MPI_INT,MPI_COMM_WORLD);
        MPI_Unpack(buffer, msgsize, &position, &nRows,1,MPI_INT,MPI_COMM_WORLD);
        // unpack pixel data
        MPI_Unpack(buffer, msgsize, &position, fractal+((unsigned long)startRow*info.nCols), nRows*info.nCols, MPI_UNSIGNED_CHAR, MPI_COMM_WORLD);

        // Kill slaves
        MPI_Send(0,0,MPI_INT,dest,DIETAG,MPI_COMM_WORLD);
    }

    free(work);
    free(buffer);

    //Save image as TIFF
    unsigned int nx = info.nCols;
    unsigned int ny = info.nRows;
    char fileName[] = "/home/pi/Mandelbrot/Mandelbrot.tiff";
    TIFF *out = TIFFOpen(fileName, "w");
    uint32 tileDim = 256;
    tsize_t tileBytes = tileDim*tileDim*sizeof(char);
    unsigned char *buf = (unsigned char *)_TIFFmalloc(tileBytes);
    char description[1024];
    snprintf(description, sizeof(description),"xStart:%f yStart:%f spacing:%f AAx:%d",info.xStart,info.yStart,info.spacing,info.AA);
    TIFFSetField(out, TIFFTAG_IMAGEDESCRIPTION, description);
    TIFFSetField(out, TIFFTAG_IMAGEWIDTH, (uint32) nx);
    TIFFSetField(out, TIFFTAG_IMAGELENGTH, (uint32) ny);
    TIFFSetField(out, TIFFTAG_ORIENTATION, ORIENTATION_TOPLEFT);
    TIFFSetField(out, TIFFTAG_SAMPLESPERPIXEL, 1);
    TIFFSetField(out, TIFFTAG_BITSPERSAMPLE, 8);
    TIFFSetField(out, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG);
    TIFFSetField(out, TIFFTAG_PHOTOMETRIC, PHOTOMETRIC_MINISBLACK);
    TIFFSetField(out, TIFFTAG_COMPRESSION, COMPRESSION_LZW);
    TIFFSetField(out, TIFFTAG_TILEWIDTH, tileDim);
    TIFFSetField(out, TIFFTAG_TILELENGTH,  tileDim);
//    TIFFSetField(out, TIFFTAG_PREDICTOR, PREDICTOR_HORIZONTAL);
//    TIFFSetField(out, TIFFTAG_XRESOLUTION, resolution);
//    TIFFSetField(out, TIFFTAG_YRESOLUTION, resolution);
//    TIFFSetField(out, TIFFTAG_RESOLUTIONUNIT, RESUNIT_INCH);    
    unsigned long x,y,i,j;
    unsigned long tileStart;
    // Iterate through and write tiles
    for(y=0; y<ny; y+=tileDim) {
        for(x=0; x<nx; x+=tileDim) {
            // Fill tile with fractal data
            tileStart = y*nx+x;
            for(i=0; i<tileDim; i++) {
                for(j=0; j<tileDim; j++) {
                    if(x+j < nx && y+i < ny)
                        buf[i*tileDim+j] = fractal[(y+i)*nx+(x+j)];
                    else
                        buf[i*tileDim+j] = (unsigned char)0;
                }
            }
            TIFFWriteTile(out, buf, x, y, 0, 0);
        }
    }
    
    TIFFClose(out);
    _TIFFfree(buf);
    free(fractal);
}