Пример #1
0
void DiamondTop::freeRequests()
{
	if(this->bufferMode == 1)	
		MPI_Request_free(&reqs[0]);
	else if(this->bufferMode == 2)
	{
		MPI_Request_free(&reqs[0]);
		MPI_Request_free(&reqs[1]);
	}
	else
		for(int i=0;i<levels;i++)
			MPI_Request_free(&reqs[i]);
	//printf("All MPI_Requests Were Cleared!!\n");
}
Пример #2
0
int md_wrap_request_free(MPI_Request *request)

/*******************************************************************************

  Machine dependent wrapped request object deletion routine.

  Author:          Michael A. Heroux, SNL, 9214
  =======

  Return code:     int
  ============

  Parameter list:
  ===============

  request:           Pointer to an existing request object that will be freed.

*******************************************************************************/
{

  int err = 0;
  if (request != NULL)
    err = MPI_Request_free(request);

  return err;

} /* md_wrap_request_free */
Пример #3
0
JNIEXPORT jlong JNICALL Java_mpi_Request_free(
        JNIEnv *env, jobject jthis, jlong handle)
{
    MPI_Request req = (MPI_Request)handle;
    int rc = MPI_Request_free(&req);
    ompi_java_exceptionCheck(env, rc);
    return (jlong)req;
}
Пример #4
0
    ~Request()   {
#ifdef HAVE_MPI
        // explicitly free this request if not
        // done so already, otherwise this would
        // be a memory leak!
        if (_request != MPI_REQUEST_NULL)	MPI_Request_free(&_request);
#endif
    }
Пример #5
0
int main( int argc, char *argv[] )
{
    MPI_Status status;
    MPI_Request request;
    int a[10], b[10];
    int buf[BUFSIZE], *bptr, bl, i, j, rank, size;
    int errs = 0;

    MTest_Init( 0, 0 );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    MPI_Comm_size( MPI_COMM_WORLD, &size );
    MPI_Buffer_attach( buf, BUFSIZE );

    for (j=0; j<10; j++) {
	MPI_Bsend_init( a, 10, MPI_INT, 0, 27+j, MPI_COMM_WORLD, &request );
	for (i=0; i<10; i++) {
	    a[i] = (rank + 10 * j) * size + i;
	}
	MPI_Start( &request );
	MPI_Wait( &request, &status );
	MPI_Request_free( &request );
    }
    if (rank == 0) {

	for (i=0; i<size; i++) {
	    for (j=0; j<10; j++) {
		int k;
		status.MPI_TAG = -10;
		status.MPI_SOURCE = -20;
		MPI_Recv( b, 10, MPI_INT, i, 27+j, MPI_COMM_WORLD, &status );
    
		if (status.MPI_TAG != 27+j) {
		    errs++;
		    printf( "Wrong tag = %d\n", status.MPI_TAG );
		}
		if (status.MPI_SOURCE != i) {
		    errs++;
		    printf( "Wrong source = %d\n", status.MPI_SOURCE );
		}
		for (k=0; k<10; k++) {
		    if (b[k] != (i + 10 * j) * size + k) {
			errs++;
			printf( "received b[%d] = %d from %d tag %d\n",
				k, b[k], i, 27+j );
		    }
		}
	    }
	}
    }
    MPI_Buffer_detach( &bptr, &bl );
    
    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
}
Пример #6
0
void amps_FreePackage(amps_Package package)
{
  int i;

  if (package)
  {
    if (package->commited)
    {
      for (i = 0; i < package->num_recv; i++)
      {
        if (package->recv_invoices[i]->mpi_type != MPI_DATATYPE_NULL)
        {
          MPI_Type_free(&(package->recv_invoices[i]->mpi_type));
        }


        MPI_Request_free(&package->recv_requests[i]);
      }

      for (i = 0; i < package->num_send; i++)
      {
        if (package->send_invoices[i]->mpi_type != MPI_DATATYPE_NULL)
        {
          MPI_Type_free(&package->send_invoices[i]->mpi_type);
        }

        MPI_Request_free(&package->send_requests[i]);
      }

      if (package->num_send + package->num_recv)
      {
        free(package->recv_requests);
        free(package->status);
      }

      package->commited = FALSE;
    }

    free(package);
  }
}
Пример #7
0
/*
 * This example causes the IBM SP2 MPI version to generate the message
 * ERROR: 0032-158 Persistent request already active  (2) in MPI_Startall, task 0
 * in the SECOND set of MPI_Startall (after the MPI_Request_free).
 */
int main( int argc, char **argv )
{
    MPI_Request r[4];
    MPI_Status  statuses[4];
    double sbuf1[10], sbuf2[10];
    double rbuf1[10], rbuf2[10];
    int size, rank, up_nbr, down_nbr, i;

    MPI_Init( &argc, &argv );
    MPI_Comm_size( MPI_COMM_WORLD, &size );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );

    up_nbr = (rank + 1) % size;
    down_nbr = (size + rank - 1) % size;

    MPI_Recv_init( rbuf1, 10, MPI_DOUBLE, down_nbr, 0, MPI_COMM_WORLD, &r[0] );
    MPI_Recv_init( rbuf2, 10, MPI_DOUBLE, up_nbr, 1, MPI_COMM_WORLD, &r[1] );
    MPI_Send_init( sbuf1, 10, MPI_DOUBLE, up_nbr, 0, MPI_COMM_WORLD, &r[2] );
    MPI_Send_init( sbuf2, 10, MPI_DOUBLE, down_nbr, 1, MPI_COMM_WORLD, &r[3] );
    MPI_Startall( 4, r );
    MPI_Waitall( 4, r, statuses );

    for (i=0; i<4; i++) {
	MPI_Request_free( &r[i] );
	}

    MPI_Recv_init( rbuf1, 10, MPI_DOUBLE, down_nbr, 0, MPI_COMM_WORLD, &r[0] );
    MPI_Recv_init( rbuf2, 10, MPI_DOUBLE, up_nbr, 1, MPI_COMM_WORLD, &r[1] );
    MPI_Send_init( sbuf1, 10, MPI_DOUBLE, up_nbr, 0, MPI_COMM_WORLD, &r[2] );
    MPI_Send_init( sbuf2, 10, MPI_DOUBLE, down_nbr, 1, MPI_COMM_WORLD, &r[3] );
    MPI_Startall( 4, r );
    MPI_Waitall( 4, r, statuses );

    for (i=0; i<4; i++) {
	MPI_Request_free( &r[i] );
	}

    if (rank == 0) printf( "No errors\n" );
    MPI_Finalize();
    return 0;
}
Пример #8
0
void mpi_request_free_f(MPI_Fint *request, MPI_Fint *ierr)
{
    int err;

    MPI_Request c_req = MPI_Request_f2c( *request ); 
    err = MPI_Request_free(&c_req);
    *ierr = OMPI_INT_2_FINT(err);

    if (MPI_SUCCESS == err) {
        *request = OMPI_INT_2_FINT(MPI_REQUEST_NULL->req_f_to_c_index);
    }
}
Пример #9
0
// Register a wakeup callback for the communication thread
void flow_t::post_wakeup(line_details_t& line, const wakeup_block_t b) {
#if PENTAGO_MPI_FUNNEL
  requests.add_immediate(curry(&flow_t::wakeup,this,&line,b));
#else
  static_assert(sizeof(line_details_t*)==sizeof(long long int),"");
  // Send a pointer to ourselves to the communication thread
  MPI_Request request;
  CHECK(MPI_Isend((void*)&line.self,1,MPI_LONG_LONG_INT,0,wakeup_tag(b),comms.wakeup_comm,&request));
  // Since requests_t::free is not thread safe, we're forced to use MPI_Request_free here.
  // This is bad, because http://blogs.cisco.com/performance/mpi_request_free-is-evil.
  CHECK(MPI_Request_free(&request));
#endif
}
Пример #10
0
void _amps_wait_exchange(amps_Handle handle)
{
  int notdone;
  int i;

  MPI_Status *status;

  if(handle -> package -> num_recv + handle -> package -> num_send)
  {
     status = (MPI_Status*)calloc((handle -> package -> num_recv + 
		      handle -> package -> num_send), sizeof(MPI_Status));

     MPI_Waitall(handle -> package -> num_recv + handle -> package -> num_send,
		 handle -> package -> requests,
		 status);

     free(status);

     for(i = 0; i < handle -> package -> num_recv; i++)
     {
	if( handle -> package -> recv_invoices[i] -> mpi_type != MPI_DATATYPE_NULL )
	{
	   MPI_Type_free(&(handle -> package -> recv_invoices[i] -> mpi_type));   
	}

	MPI_Request_free(&handle -> package -> requests[i]);
     }
     
     for(i = 0; i < handle -> package -> num_send; i++)
     {
	if( handle -> package -> send_invoices[i] -> mpi_type != MPI_DATATYPE_NULL ) 
	{
	   MPI_Type_free(&handle -> package -> send_invoices[i] -> mpi_type);
	}

	MPI_Request_free(&handle -> package -> requests[handle -> package -> num_recv + i]);
     }
  }
}
Пример #11
0
void thorium_mpi1_request_destroy(struct thorium_mpi1_request *self)
{
    if (self->request != MPI_REQUEST_NULL) {
        MPI_Request_free(&self->request);

        self->request = MPI_REQUEST_NULL;
    }

    self->source = -1;
    self->tag = -1;
    self->buffer = NULL;
    self->worker = -1;
    self->count = -1;
}
Пример #12
0
/*!
    Cancels the receive associated to the specified rank.

    \param rank is the rank associated to the receive to cancel
*/
void DataCommunicator::cancelRecv(int rank)
{
    if (m_recvIds.count(rank) == 0) {
        return;
    }

    int id = m_recvIds[rank];
    if (m_recvRequests[id] == MPI_REQUEST_NULL) {
        return;
    }

    MPI_Cancel(&m_recvRequests[id]);
    MPI_Request_free(&m_recvRequests[id]);
}
Пример #13
0
void memheap_oob_destruct(void)
{
    int i;
    oob_comm_request_t *r;

    opal_progress_unregister(oshmem_mkey_recv_cb);

    for (i = 0; i < MEMHEAP_RECV_REQS_MAX; i++) { 
        r = &memheap_oob.req_pool[i];
        MPI_Cancel(&r->recv_req);
        MPI_Request_free(&r->recv_req);
    }

    OBJ_DESTRUCT(&memheap_oob.req_list);
    OBJ_DESTRUCT(&memheap_oob.lck);
    OBJ_DESTRUCT(&memheap_oob.cond);
}
Пример #14
0
int
SAMRAI_MPI::Request_free(
   Request* request)
{
#ifndef HAVE_MPI
   NULL_USE(request);
#endif
   int rval = MPI_SUCCESS;
   if (!s_mpi_is_initialized) {
      TBOX_ERROR("SAMRAI_MPI::Get_count is a no-op without run-time MPI!");
   }
#ifdef HAVE_MPI
   else {
      rval = MPI_Request_free(request);
   }
#endif
   return rval;
}
Пример #15
0
EXPORT_MPI_API void FORTRAN_API mpi_request_free_( MPI_Fint *request, MPI_Fint *__ierr )
{
    MPI_Request lrequest = MPI_Request_f2c(*request);
    *__ierr = MPI_Request_free( &lrequest );

#ifdef OLD_POINTER
/* 
   We actually need to remove the pointer from the mapping if the ref
   count is zero.  We do that by checking to see if lrequest was set to
   NULL.
 */
    if (!lrequest) {
	MPIR_RmPointer( *(int*)request );
	*(int*)request = 0;
    }
#endif
    *request = MPI_Request_c2f(lrequest);

}
Пример #16
0
/**
 * @brief Responds with no_work to pending work requests.
 *
 * Answers any pending work requests in case a rank is blocking,
 * waiting for a response.
 */
static void CIRCLE_cleanup_mpi_messages(CIRCLE_state_st* sptr)
{
    int i = 0;
    int j = 0;

    /* TODO: this is O(N^2)... need a better way at large scale */
    /* Make sure that all pending work requests are answered. */
    for(j = 0; j < sptr->size; j++) {
        for(i = 0; i < sptr->size; i++) {
            if(i != sptr->rank) {
                sptr->request_flag[i] = 0;

                if(MPI_Test(&sptr->mpi_state_st->request_request[i], \
                            &sptr->request_flag[i], \
                            &sptr->mpi_state_st->request_status[i]) \
                        != MPI_SUCCESS) {

                    MPI_Abort(*sptr->mpi_state_st->work_comm, \
                              LIBCIRCLE_MPI_ERROR);
                }

                if(sptr->request_flag[i]) {
                    MPI_Start(&sptr->mpi_state_st->request_request[i]);
                    CIRCLE_send_no_work(i);
                }
            }
        }
    }

    /* free off persistent requests */
    for(i = 0; i < sptr->size; i++) {
        if(i != sptr->rank) {
            MPI_Request_free(&sptr->mpi_state_st->request_request[i]);
        }
    }

    return;
}
Пример #17
0
Файл: red.c Проект: arkuzmin/ppp
int main (int argc,char **argv)
{
	MPI_Status status;

	int rank, size;
	struct
	{
		int value;
		int rank;
	} num, max, rcvd;

	MPI_Init(&argc,&argv);
	MPI_Comm_rank (MPI_COMM_WORLD,&rank);
	MPI_Comm_size (MPI_COMM_WORLD,&size);
	
	char *tracefile = getenv("TVTRACE");

	if( tracefile != NULL ){
		printf( "tv tracefile=%s\n", tracefile );
		MPI_Pcontrol(TRACEFILES, NULL, tracefile, 0);      
	}
	else{
		MPI_Pcontrol(TRACEFILES, NULL, "trace", 0);
	}
	MPI_Pcontrol(TRACELEVEL, 1, 1, 1);
	MPI_Pcontrol(TRACENODE, 1000000, 1, 1);

	num.value = my_random(rank);
	num.rank = rank;
	printf("Node %d: value = %d\n", num.rank, num.value);

	double sTime, eTime;
	sTime = MPI_Wtime();
	MPI_Pcontrol(TRACEEVENT, "entry", 2, 0, "");
	
	MPI_Reduce(&num, &max, 1, MPI_2INT, MPI_MAXLOC, 0, MPI_COMM_WORLD);
	
	MPI_Pcontrol(TRACEEVENT, "exit", 2, 0, "");
	eTime = MPI_Wtime();
	
	MPI_Barrier( MPI_COMM_WORLD );
	
	MPI_Pcontrol(TRACEEVENT, "entry", 1, 0, "");
	if (rank == 0)
	{
		print_result("MPI_Reduce", max.rank, max.value, eTime - sTime);
		sTime = MPI_Wtime();
		max.value = num.value;
		max.rank = num.rank;
		int i;
		for(i = 1; i < size; i++)
		{
			MPI_Recv(&rcvd, 1, MPI_2INT, i, TAG, MPI_COMM_WORLD, &status);
			if (rcvd.value > max.value)
			{
				max.value = rcvd.value;
				max.rank = rcvd.rank;
			} 
		}
		eTime = MPI_Wtime();
		print_result("Send-receive", max.rank, max.value, eTime - sTime);
	}
	else
	{
		MPI_Ssend(&num, 1, MPI_2INT, 0, TAG, MPI_COMM_WORLD);
	}
	MPI_Pcontrol(TRACEEVENT, "exit", 1, 0, "");

#if 0


	if( !rank ){
		double *a,*b,*c, *c0;
		int i,i1,j,k;
		int ann;
		MPI_Status *st;
		MPI_Request *rq,rq1;
		rq = (MPI_Request*) malloc( (size-1)*sizeof(MPI_Request) );
		st = (MPI_Status*) malloc( (size-1)*sizeof(MPI_Status) );


		ann=an/size+((an%size)?1:0);
		//      printf("[%d]ann=%d\n", rank, ann );

		a=(double*) malloc(am*an*sizeof(double));
		b=(double*) malloc(am*bm*sizeof(double));
		c=(double*) malloc(an*bm*sizeof(double));
		for(i=0;i<am*an;i++)
		a[i]=rand()%301;
		for(i=0;i<am*bm;i++)
		b[i]=rand()%251;
		printf( "Data ready [%d]\n", rank );
		
		c0 = (double*)malloc(an*bm*sizeof(double));

		
		time = MPI_Wtime();  
		for (i=0; i<an; i++)
		for (j=0; j<bm; j++)
		{
			double s = 0.0;
			for (k=0; k<am; k++)
			s+= a[i*am+k]*b[k*bm+j];
			c0[i*bm+j] = s;
		} 
		time = MPI_Wtime() - time;
		printf("Time seq[%d] = %lf\n", rank, time );
		time_seq = time;

		MPI_Barrier( MPI_COMM_WORLD );
		time=MPI_Wtime();

		MPI_Bcast( b, am*bm, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		printf( "Data Bcast [%d]\n", rank );

		for( i1=0, j=1; j<size; j++, i1+=ann*am ){
			printf( "Data to Send [%d] %016x[%4d] =>> %d\n", rank, a+i1, i1, j );
			MPI_Isend( a+i1, ann*am, MPI_DOUBLE, j, 101, MPI_COMM_WORLD, &rq1 );
			MPI_Request_free( &rq1 ); 
			printf( "Data Send [%d] =>> %d\n", rank, j );
		}
		printf( "Data Send [%d]\n", rank );
		
		MPI_Isend( a+i1, 1, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &rq1 );
		MPI_Request_free( &rq1 ); 
		
		printf( "Data Send [%d] =>> %d\n", rank, j );


		for(i=(i1/am);i<an;i++)
		for(j=0;j<bm;j++){
			double s=0.0;
			for(k=0;k<am;k++)
			s+=a[i*am+k]*b[k*bm+j];
			c[i*bm+j]=s;
		}

		printf( "Job done  [%d]\n", rank );
		for( i1=0, j=1; j<size; j++, i1+=(ann*bm) ){
			printf( "Data to Recv [%d] %016x[%4d] =>> %d\n", rank, c+i1, i1/bm, j );
			MPI_Irecv( c+i1, ann*am, MPI_DOUBLE, j, 102, MPI_COMM_WORLD, rq+(j-1) );
		}         
		MPI_Waitall( size-1, rq, st );
		
		time=MPI_Wtime()-time;
		printf("time [%d]=%12.8lf\n",rank,time);
		time_par = time;

		printf( "Data collected [%d]\n", rank );
		
		time=MPI_Wtime();
		int ok = 1;
		for(i=0;i<an*bm;i++)
		if( c[i] != c0[i] ){
			ok = 0;
			printf( "Fail [%d %d] %lf != %lf\n", i/bm, i%bm, c[i], c0[i] );
			break;
		}
		time=MPI_Wtime()-time;
		if( ok ){
			printf( "Data verifeid [%d] time = %lf\n", rank, time );
			printf( "SpeedUp S(%d) = %14.10lf\n", size, time_seq/time_par );
			printf( "Efitncy E(%d) = %14.10lf\n", size, time_seq/(time_par*size) );
		}
		
	}
	else
	{
		int ann;
		double *a,*b,*c;
		MPI_Status st;
		int i,j,k;

		MPI_Pcontrol(TRACEEVENT, "entry", 0, 0, "");

		ann= an/size + ((an%size)?1:0);
		//      if(rank==1)
		//        printf("[%d]ann=%d = %d / %d \n", rank, ann, an, size );
		
		a=(double*)malloc(ann*am*sizeof(double));
		b=(double*)malloc(bm*am*sizeof(double));
		c=(double*)malloc(ann*bm*sizeof(double));
		printf( "Mem allocated [%d]\n", rank );

		
		MPI_Barrier( MPI_COMM_WORLD );
		MPI_Pcontrol(TRACEEVENT, "exit", 0, 0, "");
		time = MPI_Wtime();


		MPI_Pcontrol(TRACEEVENT, "entry", 1, 0, "");
		
		MPI_Bcast(b,am*bm,MPI_DOUBLE,0,MPI_COMM_WORLD);
		printf( "Data Bcast [%d]\n", rank );
		
		MPI_Recv( a, ann*am, MPI_DOUBLE, 0, 101, MPI_COMM_WORLD, &st);
		printf( "Data Recv [%d]\n", rank );
		
		MPI_Pcontrol(TRACEEVENT, "exit", 1, 0, "");
		
		MPI_Pcontrol(TRACEEVENT, "entry", 2, 0, "");
		for( i=0; i<ann; i++ )
		for(j=0;j<bm;j++){
			double s=0.0;
			
			for( k=0; k<am; k++ ){
				s+=a[i*am+k]*b[k*bm+j];
			}
			/*    
			if(1==rank){
			if(0==j){
				printf( "c[%d<%d %d] = %lf\n", i,ann,j, s );
			}
			}
*/
			c[i*bm+j]=s;
		}
		printf( "Job done  [%d]\n", rank );
		MPI_Pcontrol(TRACEEVENT, "exit", 2, 0, "");

		MPI_Pcontrol(TRACEEVENT, "entry", 3, 0, "");
		MPI_Send( c, ann*bm,  MPI_DOUBLE, 0, 102, MPI_COMM_WORLD);
		printf( "Data returned [%d]\n", rank );
		MPI_Pcontrol(TRACEEVENT, "exit", 3, 0, "");

		time=MPI_Wtime()-time;
		printf("time [%d]=%12.8lf\n",rank,time);
	}

#endif

	MPI_Finalize();
	return 0;
}
Пример #18
0
int main( int argc, char **argv )
{
	int rank, size, loop, max_loop = DEFAULT_LOOP, max_req = DEFAULT_REQ;
	int buf_len = DEFAULT_LEN;
	int i, j, errs = 0, toterrs;
	MPI_Request r;
	MPI_Status  status;
	int *(b[MAX_REQ]);
	MPI_Datatype dtype;
	int sendrank = 0, recvrank = 1;

	MPI_Init( &argc, &argv );

	dtype = MPI_INT;

	MPI_Comm_rank( MPI_COMM_WORLD, &rank );

	/* 
	The following test allows this test to run on small-memory systems
	that support the sysconf call interface.  This test keeps the test from
	becoming swap-bound.  For example, on an old Linux system or a
	Sony Playstation 2 (really!) 
	*/
#if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
	if (rank == sendrank) 
	{ 
		long n_pages, pagesize;
		int  msglen_max = max_req * buf_len * sizeof(int);
		n_pages  = sysconf( _SC_PHYS_PAGES );
		pagesize = sysconf( _SC_PAGESIZE );
		/* printf( "Total mem = %ld\n", n_pages * pagesize ); */
		/* We want to avoid integer overflow in the size calculation.
		The best way is to avoid computing any products (such
		as total memory = n_pages * pagesize) and instead
		compute a msglen_max that fits within 1/4 of the available 
		pages */
		if (n_pages > 0 && pagesize > 0) {
			/* Recompute msglen_max */
			int msgpages = 4 * ((msglen_max + pagesize - 1)/ pagesize);
			while (n_pages < msgpages) { 
				msglen_max /= 2; msgpages /= 2; buf_len /= 2; 
			}
		}
	}
#else
	/* printf( "No sysconf\n" ); */
#endif

	/* Check command line args (allow usage even with one processor */
	argv++;
	argc--;
	while (argc--) {
		if (strcmp( "-loop" , *argv ) == 0) {
			argv++; argc--;
			max_loop = atoi( *argv );
		}
		else if (strcmp( "-req", *argv ) == 0) {
			argv++; argc--;
			max_req = atoi( *argv );
		}
		else if (strcmp( "-len", *argv ) == 0) {
			argv++; argc--;
			buf_len = atoi( *argv );
		}
		else {
			fprintf( stdout, 
				"Usage: reqfree [ -loop n ] [ -req n ] [ -len n ]\n" );
			fflush(stdout);
			fprintf( stderr, "[%i] Aborting\n",rank );fflush(stderr);
			MPI_Abort( MPI_COMM_WORLD, 1 );
		}
		argv++;
	}

	MPI_Comm_size( MPI_COMM_WORLD, &size );
/*	if (size != 2) {
		fprintf( stderr, "This program requires two processes\n" );
		MPI_Abort( MPI_COMM_WORLD, 1 );
	}*/

	/* Assume only processor 0 has the command line */
	MPI_Bcast( &max_loop, 1, MPI_INT, 0, MPI_COMM_WORLD );
	MPI_Bcast( &max_req, 1, MPI_INT, 0, MPI_COMM_WORLD );
	MPI_Bcast( &buf_len, 1, MPI_INT, 0, MPI_COMM_WORLD );

	if (rank <= 1)
	{
		/* Allocate buffers */
		for (i=0; i<max_req; i++) {
			b[i] = (int *) malloc(buf_len * sizeof(int) );
			if (!b[i]) {
				fprintf( stderr, "Could not allocate %dth block of %d ints\n", 
					i, buf_len );
				fprintf( stderr, "[%i] Aborting\n",rank );fflush(stderr);
				MPI_Abort( MPI_COMM_WORLD, 2 );
			}
			if (rank != sendrank) break;
			for (j=0; j<buf_len; j++) {
				b[i][j] = i * buf_len + j;
			}
		}

		/* Loop several times to capture resource leaks */
		for (loop=0; loop<max_loop; loop++) {
			if (rank == sendrank) {
				for (i=0; i<max_req; i++) {
					MPI_Isend( b[i], buf_len, dtype, recvrank, 0, 
						MPI_COMM_WORLD, &r );
					MPI_Request_free( &r ); 
				}
				MPI_Barrier( MPI_COMM_WORLD );
				MPI_Barrier( MPI_COMM_WORLD );
			}
			else {
				MPI_Barrier( MPI_COMM_WORLD );
				for (i=0; i<max_req; i++) {
					MPI_Recv( b[0], buf_len, dtype, sendrank, 0, MPI_COMM_WORLD, 
						&status );
					for (j=0; j<buf_len; j++) {
						if (b[0][j] != i * buf_len + j) {
							errs++;
							fprintf( stdout, 
								"at %d in %dth message, got %d expected %d\n",
								j, i, b[0][j], i * buf_len + j );
							break;
						}
					}
				}
				MPI_Barrier( MPI_COMM_WORLD );
			}
		}//loop
	}
	else
	{
		/* more than two processes just do common barrier*/
		for (loop=0; loop<max_loop; loop++) {
			MPI_Barrier( MPI_COMM_WORLD );
			MPI_Barrier( MPI_COMM_WORLD );
		}
	}

	MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
	if (rank == 0) {
		if (toterrs == 0) printf( " No Errors\n" );
		else              printf( "Found %d errors\n", toterrs );
	}

	MPI_Finalize( );
	return 0;
}
Пример #19
0
int main(int argc, char *argv[])
{
	unsigned char str[154];
	unsigned int arr[] = {9,2,5,8,4,2,4,1,6,9,1,8,9,9,6,1,5,7,0,7,7,4,3,7,6,3,9,5,4,2,3,0,4,4,1,5,3,3,7,2,3,3,7,0,9,4,5,2,8,4,6,\
                              2,1,3,4,1,4,2,6,0,8,5,1,7,3,1,4,4,7,0,5,3,4,4,8,9,1,1,9,8,3,5,1,8,3,4,4,8,3,2,8,1,2,8,7,4,1,8,1,8,0,4,\
                              8,4,2,4,4,5,4,9,1,8,3,4,9,5,6,3,3,1,4,6,4,1,0,2,0,2,5,1,4,8,5,9,9,6,9,4,0,3,6,5,5,9,5,4,2,2,3,7,8,5,9,7};

	long i;

	double t1, t2, Itime;
	int provided;

	/* Allocation */

	v1 = (vector *) malloc (VLEN * sizeof (vector));
	v2 = (vector *) malloc (VLEN * sizeof (vector));
	v3 = (vector *) malloc (VLEN * sizeof (vector));

	fin_sum = (mp_limb_t *) malloc ((2*LIMBS+1) * sizeof (mp_limb_t));
	result = (mp_limb_t *) malloc ((2*LIMBS+1) * sizeof (mp_limb_t));
	q = (mp_limb_t *) malloc (LIMBS * sizeof (mp_limb_t));

	MPI_Init_thread (&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
	MPI_Comm_rank (MPI_COMM_WORLD, &id);
	MPI_Comm_size (MPI_COMM_WORLD, &p);

	MPI_Type_contiguous (2*LIMBS+1, MPI_UNSIGNED_LONG_LONG, &mpntype0);
	MPI_Type_commit (&mpntype0);

	MPI_Type_contiguous (LIMBS, MPI_UNSIGNED_LONG_LONG, &mpntype1);
	MPI_Type_commit (&mpntype1);

	MPI_Op_create ((MPI_User_function *)addmpn, 1, &mpn_sum);

	for (i=0; i<154; ++i)	str[i] = (unsigned char)arr[i];
	mpn_set_str (q, str, 154, 10);
	//if (!id) gmp_printf ("Modulus: %Nd\n", q, LIMBS);

	MPI_Barrier (MPI_COMM_WORLD);

	/* Setting limits for 2 MPI nodes */

	VOffset = BLOCK_LOW(id,p,VLEN);
	VChunk  = BLOCK_SIZE(id,p,VLEN);

	/* Setting limits for NCORES-1 threads */

	for (i=0; i<NCORES-1; ++i)
	{
		VStart[i] = VOffset + BLOCK_LOW(i,NCORES-1,VChunk);
		VEnd[i]   = VOffset + BLOCK_HIGH(i,NCORES-1,VChunk);
	}

	for (i=0; i<VLEN; ++i)	mpn_random (v1[i], LIMBS);
	for (i=0; i<VLEN; ++i)	mpn_random (v2[i], LIMBS);
	for (i=BLOCK_LOW(id,p,VLEN); i<=BLOCK_HIGH(id,p,VLEN); ++i)	mpn_random (v3[i], LIMBS);
		
	MPI_Barrier (MPI_COMM_WORLD);

	t1 = MPI_Wtime ();

	for (i=0; i<NCORES; ++i)
		pthread_create(&threads[i], &attr, VectMul, (void *) i);

	for (i=0; i<NCORES; ++i)
		pthread_join (threads[i], NULL);

	t2 = MPI_Wtime ();
	Itime = t2 - t1;
	if (!id) printf ("Total time taken: %lf\n",Itime);
	
	if (!id) gmp_printf ("Result: %Nd\n", cnum, LIMBS);

	MPI_Op_free(&mpn_sum);
	MPI_Request_free (&Rrqst);
	MPI_Request_free (&Srqst);
	MPI_Finalize ();

	return 0;	
}
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  int comm = MPI_COMM_WORLD;
  char processor_name[128];
  int namelen = 128;
  int buf[BUF_SIZE * 2];
  int i, j, k, index, outcount, flag;
  int indices[2];
  MPI_Request aReq[2];
  MPI_Status aStatus[2];

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (comm, &nprocs);
  MPI_Comm_rank (comm, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  if (rank == 0) {
    /* set up persistent sends... */
    MPI_Send_init (&buf[0], BUF_SIZE, MPI_INT, 1, 0, comm, &aReq[0]);
    MPI_Send_init (&buf[BUF_SIZE], BUF_SIZE, MPI_INT, 1, 1, comm, &aReq[1]);

    /* initialize the send buffers */
    for (i = 0; i < BUF_SIZE; i++) {
      buf[i] = i;
      buf[BUF_SIZE + i] = BUF_SIZE - 1 - i;
    }
  }

  for (k = 0; k < 4; k++) {
    if (rank == 1) {
      /* zero out the receive buffers */
      bzero (buf, sizeof(int) * BUF_SIZE * 2);
    }

    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0) {
      /* start the persistent sends... */
      if (k % 2) {
	MPI_Startall (2, &aReq[0]);
      }
      else {
	for (j = 0; j < 2; j++) {
	  MPI_Start (&aReq[j]);
	}
      }
    
      /* complete the sends */
      if (k < 2) {
	/* use MPI_Waitany */
	for (j = 0; j < 2; j++)
	  MPI_Waitany (2, aReq, &index, aStatus);
      }
      else {
	/* use MPI_Waitsome */
	j = 0;
	while (j < 2) {
	  MPI_Waitsome (2, aReq, &outcount, indices, aStatus);
	  j += outcount;
	}
      }
    }
    else if (rank == 1) {
      /* set up receives for all of the sends */
      for (j = 0; j < 2; j++) {
	MPI_Irecv (&buf[j * BUF_SIZE], BUF_SIZE, 
		   MPI_INT, 0, j, comm, &aReq[j]);
      }
      /* complete all of the receives... */
      MPI_Waitall (2, aReq, aStatus);
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  if (rank == 0) {
    /* free the persistent requests */
    for (i = 0 ; i < 2; i++) {
      MPI_Request_free (&aReq[i]);
    }
  }

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
int main (int argc, char *argv[])
{

  MPI_Init (&argc, &argv);
  
  int nProc, iProc;

  MPI_Comm_rank (MPI_COMM_WORLD, &iProc);
  MPI_Comm_size (MPI_COMM_WORLD, &nProc);

  // number of threads
  const int NTHREADS = 6;

  // number of buffers
  const int NWAY     = 2;

  // left neighbour
  const int left  = LEFT(iProc, nProc);

  // right neighbour
  const int right = RIGHT(iProc, nProc);

  // allocate array of for local vector, left halo and right halo
  double* array = malloc (NWAY * (NTHREADS+2) * 2 * VLEN * sizeof (double));
  ASSERT (array != 0);

  // initial buffer id
  int buffer_id = 0;

  // initialize data
  data_init (NTHREADS, iProc, buffer_id, array);
  
  omp_set_num_threads (NTHREADS);

  MPI_Barrier (MPI_COMM_WORLD);

  double time = -now();

#pragma omp parallel default (shared) firstprivate (buffer_id)
  {
    const int tid = omp_get_thread_num();

    for (int k = 0; k < NITER; ++k)
    {
      for ( int i = 0; i < nProc * NTHREADS; ++i )
      {

	const int slice_id    = tid + 1;
	const int left_halo   = 0;
	const int right_halo  = NTHREADS+1;

	if (tid == 0)
	  {

	    MPI_Request send_req[2];
	    MPI_Request recv_req[2];

	    // post recv
	    MPI_Irecv ( &array_ELEM_right (buffer_id, left_halo, 0), VLEN, MPI_DOUBLE
		       , left, i, MPI_COMM_WORLD, &recv_req[0]);

	    // post recv
	    MPI_Irecv ( &array_ELEM_left (buffer_id, right_halo, 0), VLEN, MPI_DOUBLE
		       , right, i, MPI_COMM_WORLD, &recv_req[1]);

	    // issue send
	    MPI_Isend ( &array_ELEM_right (buffer_id, right_halo - 1, 0), VLEN, MPI_DOUBLE
			 , right, i, MPI_COMM_WORLD, &send_req[0]);

	    // issue send
	    MPI_Isend ( &array_ELEM_left (buffer_id, left_halo + 1, 0), VLEN, MPI_DOUBLE
			 , left, i, MPI_COMM_WORLD, &send_req[1]);

	    // free send request
	    MPI_Request_free(&send_req[0]);
	    
	    MPI_Request_free(&send_req[1]);

	    // wait for Irecv, Isend
	    MPI_Waitall (2, recv_req, MPI_STATUSES_IGNORE);

	  }

#pragma omp barrier

	// compute data, read from id "buffer_id", write to id "1 - buffer_id"
	data_compute (NTHREADS, array, 1 - buffer_id, buffer_id, slice_id);

#pragma omp barrier

	// alternate the buffer
	buffer_id = 1 - buffer_id;

      }
    }
  }
  time += now();

  data_verify (NTHREADS, iProc, ( NITER * nProc * NTHREADS ) % NWAY, array);

  printf ("# mpi %s nProc %d vlen %i niter %d nthreads %i nway %i time %g\n"
         , argv[0], nProc, VLEN, NITER, NTHREADS, NWAY, time
         );
  
  MPI_Finalize();

  free (array);

  return EXIT_SUCCESS;
}
Пример #22
0
HYPRE_Int
hypre_MPI_Request_free( hypre_MPI_Request *request )
{
   return (HYPRE_Int) MPI_Request_free(request);
}
Пример #23
0
void _XMP_reflect_pcopy_sched_dim(_XMP_array_t *adesc, int target_dim,
				  int lwidth, int uwidth, int is_periodic, int shadow_comm_type){

  if (lwidth == 0 && uwidth == 0) return;

  _XMP_array_info_t *ai = &(adesc->info[target_dim]);
  _XMP_array_info_t *ainfo = adesc->info;
  _XMP_ASSERT(ai->align_manner == _XMP_N_ALIGN_BLOCK);
  _XMP_ASSERT(ai->is_shadow_comm_member);

  if (lwidth > ai->shadow_size_lo || uwidth > ai->shadow_size_hi){
    _XMP_fatal("reflect width is larger than shadow width.");
  }

  _XMP_reflect_sched_t *reflect = ai->reflect_sched;

  int target_tdim = ai->align_template_index;
  _XMP_nodes_info_t *ni = adesc->align_template->chunk[target_tdim].onto_nodes_info;

  if (ni->size == 1 && !is_periodic) return;

  int ndims = adesc->dim;

  // 0-origin
  int my_pos = ni->rank;
  int lb_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_lower);
  int ub_pos = _XMP_get_owner_pos(adesc, target_dim, ai->ser_upper);

  int lo_pos = (my_pos == lb_pos) ? ub_pos : my_pos - 1;
  int hi_pos = (my_pos == ub_pos) ? lb_pos : my_pos + 1;

  MPI_Comm *comm = adesc->align_template->onto_nodes->comm;
  int my_rank = adesc->align_template->onto_nodes->comm_rank;

  int lo_rank = my_rank + (lo_pos - my_pos) * ni->multiplier;
  int hi_rank = my_rank + (hi_pos - my_pos) * ni->multiplier;

  int count = 0, blocklength = 0;
  long long stride = 0;

  int type_size = adesc->type_size;
  void *array_addr = adesc->array_addr_p;

  void *lo_send_array = NULL, *lo_recv_array = NULL;
  void *hi_send_array = NULL, *hi_recv_array = NULL;

  void *lo_send_buf = NULL;
  void *lo_recv_buf = NULL;
  void *hi_send_buf = NULL;
  void *hi_recv_buf = NULL;

  int lo_buf_size = 0;
  int hi_buf_size = 0;

  if (reflect->prev_pcopy_sched_type &&
      lwidth == reflect->lo_width &&
      uwidth == reflect->hi_width &&
      is_periodic == reflect->is_periodic){
    if ((adesc->order == MPI_ORDER_FORTRAN && target_dim != ndims - 1) ||
	(adesc->order == MPI_ORDER_C && target_dim != 0)){
      goto init_comm;
    }
    else if (reflect->prev_pcopy_sched_type != shadow_comm_type){
      count = reflect->count;
      blocklength = reflect->blocklength;
      stride = reflect->stride;
      goto alloc_buf;
    }
  }
  
  //
  // setup data_type
  //

  if (adesc->order == MPI_ORDER_FORTRAN){ /* for XMP/F */

    count = 1;
    blocklength = type_size;
    stride = ainfo[0].alloc_size * type_size;

    for (int i = ndims - 2; i >= target_dim; i--){
      count *= ainfo[i+1].alloc_size;
    }

    for (int i = 1; i <= target_dim; i++){
      blocklength *= ainfo[i-1].alloc_size;
      stride *= ainfo[i].alloc_size;
    }

  }
  else if (adesc->order == MPI_ORDER_C){ /* for XMP/C */

    count = 1;
    blocklength = type_size;
    stride = ainfo[ndims-1].alloc_size * type_size;

    for (int i = 1; i <= target_dim; i++){
      count *= ainfo[i-1].alloc_size;
    }

    for (int i = ndims - 2; i >= target_dim; i--){
      blocklength *= ainfo[i+1].alloc_size;
      stride *= ainfo[i].alloc_size;
    }

  }
  else {
    _XMP_fatal("cannot determin the base language.");
  }

  //
  // calculate base address
  //

 alloc_buf:
  
  // for lower reflect

  if (lwidth){

    lo_send_array = array_addr;
    lo_recv_array = array_addr;

    for (int i = 0; i < ndims; i++) {

      int lb_send, lb_recv;
      unsigned long long dim_acc;

      if (i == target_dim) {
	lb_send = ainfo[i].local_upper - lwidth + 1;
	lb_recv = ainfo[i].shadow_size_lo - lwidth;;
      }
      else {
	// Note: including shadow area
	lb_send = 0;
	lb_recv = 0;
      }

      dim_acc = ainfo[i].dim_acc;

      lo_send_array = (void *)((char *)lo_send_array + lb_send * dim_acc * type_size);
      lo_recv_array = (void *)((char *)lo_recv_array + lb_recv * dim_acc * type_size);

    }

  }

  // for upper reflect

  if (uwidth){

    hi_send_array = array_addr;
    hi_recv_array = array_addr;

    for (int i = 0; i < ndims; i++) {

      int lb_send, lb_recv;
      unsigned long long dim_acc;

      if (i == target_dim) {
	lb_send = ainfo[i].local_lower;
	lb_recv = ainfo[i].local_upper + 1;
      }
      else {
	// Note: including shadow area
	lb_send = 0;
	lb_recv = 0;
      }

      dim_acc = ainfo[i].dim_acc;

      hi_send_array = (void *)((char *)hi_send_array + lb_send * dim_acc * type_size);
      hi_recv_array = (void *)((char *)hi_recv_array + lb_recv * dim_acc * type_size);

    }

  }

  //
  // Allocate buffers
  //

  if (reflect->prev_pcopy_sched_type == _XMP_COMM_REFLECT &&
      ((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
       (adesc->order == MPI_ORDER_C && target_dim == 0))){
    ;
  }
  else {
    _XMP_free(reflect->lo_send_buf);
    _XMP_free(reflect->lo_recv_buf);
    _XMP_free(reflect->hi_send_buf);
    _XMP_free(reflect->hi_recv_buf);
  }

  // for lower reflect

  if (lwidth){

    lo_buf_size = lwidth * blocklength * count;

    if (shadow_comm_type == _XMP_COMM_REFLECT &&
	((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
	 (adesc->order == MPI_ORDER_C && target_dim == 0))){
      lo_send_buf = lo_send_array;
      lo_recv_buf = lo_recv_array;
    }
    else {
      _XMP_TSTART(t0);
      lo_send_buf = _XMP_alloc(lo_buf_size);
      lo_recv_buf = _XMP_alloc(lo_buf_size);
      _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
    }

  }

  // for upper reflect

  if (uwidth){

    hi_buf_size = uwidth * blocklength * count;

    if (shadow_comm_type == _XMP_COMM_REFLECT &&
	((adesc->order == MPI_ORDER_FORTRAN && target_dim == ndims - 1) ||
	 (adesc->order == MPI_ORDER_C && target_dim == 0))){
      hi_send_buf = hi_send_array;
      hi_recv_buf = hi_recv_array;
    }
    else {
      _XMP_TSTART(t0);
      hi_send_buf = _XMP_alloc(hi_buf_size);
      hi_recv_buf = _XMP_alloc(hi_buf_size);
      _XMP_TEND2(xmptiming_.t_mem, xmptiming_.tdim_mem[target_dim], t0);
    }

  }

  //
  // cache schedule
  //

  reflect->count = count;
  reflect->blocklength = blocklength;
  reflect->stride = stride;

  reflect->lo_send_array = lo_send_array;
  reflect->lo_recv_array = lo_recv_array;
  reflect->hi_send_array = hi_send_array;
  reflect->hi_recv_array = hi_recv_array;

  reflect->lo_send_buf = lo_send_buf;
  reflect->lo_recv_buf = lo_recv_buf;
  reflect->hi_send_buf = hi_send_buf;
  reflect->hi_recv_buf = hi_recv_buf;

  //
  // initialize communication
  //

  int src, dst;

 init_comm:
  
  if (!is_periodic && my_pos == lb_pos){ // no periodic
    lo_rank = MPI_PROC_NULL;
  }

  if (!is_periodic && my_pos == ub_pos){ // no periodic
    hi_rank = MPI_PROC_NULL;
  }

  lo_buf_size = lwidth * reflect->blocklength * reflect->count;
  hi_buf_size = uwidth * reflect->blocklength * reflect->count;

  // for lower shadow

  if (lwidth){
    src = lo_rank;
    dst = hi_rank;
  }
  else {
    src = MPI_PROC_NULL;
    dst = MPI_PROC_NULL;
  }

  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
    if (reflect->req_reduce[0] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req_reduce[0]);
    }
	
    if (reflect->req_reduce[1] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req_reduce[1]);
    }

    MPI_Send_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src,
		  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[0]);
    MPI_Recv_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst,
		  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req_reduce[1]);
  }
  else {
    if (reflect->req[0] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req[0]);
    }
	
    if (reflect->req[1] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req[1]);
    }

    MPI_Recv_init(reflect->lo_recv_buf, lo_buf_size, MPI_BYTE, src,
		  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[0]);
    MPI_Send_init(reflect->lo_send_buf, lo_buf_size, MPI_BYTE, dst,
		  _XMP_N_MPI_TAG_REFLECT_LO, *comm, &reflect->req[1]);
  }
  
  // for upper shadow

  if (uwidth){
    src = hi_rank;
    dst = lo_rank;
  }
  else {
    src = MPI_PROC_NULL;
    dst = MPI_PROC_NULL;
  }

  if (shadow_comm_type == _XMP_COMM_REDUCE_SHADOW){
    if (reflect->req_reduce[2] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req_reduce[2]);
    }
	
    if (reflect->req_reduce[3] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req_reduce[3]);
    }

    MPI_Send_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src,
		  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[2]);
    MPI_Recv_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst,
		  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req_reduce[3]);
  }
  else {
    if (reflect->req[2] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req[2]);
    }
	
    if (reflect->req[3] != MPI_REQUEST_NULL){
      MPI_Request_free(&reflect->req[3]);
    }

    MPI_Recv_init(reflect->hi_recv_buf, hi_buf_size, MPI_BYTE, src,
		  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[2]);
    MPI_Send_init(reflect->hi_send_buf, hi_buf_size, MPI_BYTE, dst,
		  _XMP_N_MPI_TAG_REFLECT_HI, *comm, &reflect->req[3]);
  }
  
  reflect->prev_pcopy_sched_type = shadow_comm_type;
  
  reflect->lo_rank = lo_rank;
  reflect->hi_rank = hi_rank;

}
Пример #24
0
dart_ret_t dart_waitall(
  dart_handle_t * handle,
  size_t          n)
{
  int i, r_n;
  int num_handles = (int)n;
  DART_LOG_DEBUG("dart_waitall()");
  if (n == 0) {
    DART_LOG_ERROR("dart_waitall > number of handles = 0");
    return DART_OK;
  }
  if (n > INT_MAX) {
    DART_LOG_ERROR("dart_waitall ! number of handles > INT_MAX");
    return DART_ERR_INVAL;
  }
  DART_LOG_DEBUG("dart_waitall: number of handles: %d", num_handles);
  if (*handle) {
    MPI_Status  *mpi_sta;
    MPI_Request *mpi_req;
    mpi_req = (MPI_Request *) malloc(num_handles * sizeof(MPI_Request));
    mpi_sta = (MPI_Status *)  malloc(num_handles * sizeof(MPI_Status));
    /*
     * copy requests from DART handles to MPI request array:
     */
    DART_LOG_TRACE("dart_waitall: copying DART handles to MPI request array");
    r_n = 0;
    for (i = 0; i < num_handles; i++) {
      if (handle[i] != NULL) {
        DART_LOG_DEBUG("dart_waitall: -- handle[%d](%p): "
                       "dest:%d win:%"PRIu64" req:%"PRIu64"",
                       i, (void*)handle[i],
                       handle[i]->dest,
                       (uint64_t)handle[i]->win,
                       (uint64_t)handle[i]->request);
        mpi_req[r_n] = handle[i]->request;
        r_n++;
      }
    }
    /*
     * wait for communication of MPI requests:
     */
    DART_LOG_DEBUG("dart_waitall: MPI_Waitall, %d requests from %d handles",
                   r_n, num_handles);
    /* From the MPI 3.1 standard:
     *
     * The i-th entry in array_of_statuses is set to the return
     * status of the i-th operation. Active persistent requests
     * are marked inactive.
     * Requests of any other type are deallocated and the
     * corresponding handles in the array are set to
     * MPI_REQUEST_NULL.
     * The list may contain null or inactive handles.
     * The call sets to empty the status of each such entry.
     */
    if (r_n > 0) {
      if (MPI_Waitall(r_n, mpi_req, mpi_sta) == MPI_SUCCESS) {
        DART_LOG_DEBUG("dart_waitall: MPI_Waitall completed");
      } else {
        DART_LOG_ERROR("dart_waitall: MPI_Waitall failed");
        DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries");
        free(mpi_req);
        DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries");
        free(mpi_sta);
        return DART_ERR_INVAL;
      }
    } else {
      DART_LOG_DEBUG("dart_waitall > number of requests = 0");
      return DART_OK;
    }
    /*
     * copy MPI requests back to DART handles:
     */
    DART_LOG_TRACE("dart_waitall: copying MPI requests back to DART handles");
    r_n = 0;
    for (i = 0; i < num_handles; i++) {
      if (handle[i]) {
        if (mpi_req[r_n] == MPI_REQUEST_NULL) {
          DART_LOG_TRACE("dart_waitall: -- mpi_req[%d] = MPI_REQUEST_NULL",
                         r_n);
        } else {
          DART_LOG_TRACE("dart_waitall: -- mpi_req[%d] = %d",
                         r_n, mpi_req[r_n]);
        }
        DART_LOG_TRACE("dart_waitall: -- mpi_sta[%d].MPI_SOURCE: %d",
                       r_n, mpi_sta[r_n].MPI_SOURCE);
        DART_LOG_TRACE("dart_waitall: -- mpi_sta[%d].MPI_ERROR:  %d:%s",
                       r_n,
                       mpi_sta[r_n].MPI_ERROR,
                       DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR));
        handle[i]->request = mpi_req[r_n];
        r_n++;
      }
    }
    /*
     * wait for completion of MPI requests at origins and targets:
     */
    DART_LOG_DEBUG("dart_waitall: waiting for remote completion");
    for (i = 0; i < num_handles; i++) {
      if (handle[i]) {
        if (handle[i]->request == MPI_REQUEST_NULL) {
          DART_LOG_TRACE("dart_waitall: -- handle[%d] done (MPI_REQUEST_NULL)",
                         i);
        } else {
          DART_LOG_DEBUG("dart_waitall: -- MPI_Win_flush(handle[%d]: %p))",
                         i, (void*)handle[i]);
          DART_LOG_TRACE("dart_waitall:      handle[%d]->dest: %d",
                         i, handle[i]->dest);
          DART_LOG_TRACE("dart_waitall:      handle[%d]->win:  %"PRIu64"",
                         i, (uint64_t)handle[i]->win);
          DART_LOG_TRACE("dart_waitall:      handle[%d]->req:  %"PRIu64"",
                         i, (uint64_t)handle[i]->request);
          /*
           * MPI_Win_flush to wait for remote completion:
           */
          if (MPI_Win_flush(handle[i]->dest, handle[i]->win) != MPI_SUCCESS) {
            DART_LOG_ERROR("dart_waitall: MPI_Win_flush failed");
            DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries");
            free(mpi_req);
            DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries");
            free(mpi_sta);
            return DART_ERR_INVAL;
          }
          DART_LOG_TRACE("dart_waitall: -- MPI_Request_free");
          if (MPI_Request_free(&handle[i]->request) != MPI_SUCCESS) {
            DART_LOG_ERROR("dart_waitall: MPI_Request_free failed");
            DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries");
            free(mpi_req);
            DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries");
            free(mpi_sta);
            return DART_ERR_INVAL;
          }
        }
      }
    }
    /*
     * free memory:
     */
    DART_LOG_DEBUG("dart_waitall: free handles");
    for (i = 0; i < num_handles; i++) {
      if (handle[i]) {
        /* Free handle resource */
        DART_LOG_TRACE("dart_waitall: -- free handle[%d]: %p",
                       i, (void*)(handle[i]));
        free(handle[i]);
        handle[i] = NULL;
      }
    }
    DART_LOG_TRACE("dart_waitall: free MPI_Request temporaries");
    free(mpi_req);
    DART_LOG_TRACE("dart_waitall: free MPI_Status temporaries");
    free(mpi_sta);
  }
  DART_LOG_DEBUG("dart_waitall > finished");
  return DART_OK;
}
Пример #25
0
void
pzgstrs(int_t n, LUstruct_t *LUstruct, 
	ScalePermstruct_t *ScalePermstruct,
	gridinfo_t *grid, doublecomplex *B,
	int_t m_loc, int_t fst_row, int_t ldb, int nrhs,
	SOLVEstruct_t *SOLVEstruct,
	SuperLUStat_t *stat, int *info)
{
/*
 * Purpose
 * =======
 *
 * PZGSTRS solves a system of distributed linear equations
 * A*X = B with a general N-by-N matrix A using the LU factorization
 * computed by PZGSTRF.
 * If the equilibration, and row and column permutations were performed,
 * the LU factorization was performed for A1 where
 *     A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 * and the linear system solved is
 *     A1 * Y = Pc*Pr*B1, where B was overwritten by B1 = diag(R)*B, and
 * the permutation to B1 by Pc*Pr is applied internally in this routine.
 * 
 * Arguments
 * =========
 *
 * n      (input) int (global)
 *        The order of the system of linear equations.
 *
 * LUstruct (input) LUstruct_t*
 *        The distributed data structures storing L and U factors.
 *        The L and U factors are obtained from PZGSTRF for
 *        the possibly scaled and permuted matrix A.
 *        See superlu_zdefs.h for the definition of 'LUstruct_t'.
 *        A may be scaled and permuted into A1, so that
 *        A1 = Pc*Pr*diag(R)*A*diag(C)*Pc^T = L*U
 *
 * grid   (input) gridinfo_t*
 *        The 2D process mesh. It contains the MPI communicator, the number
 *        of process rows (NPROW), the number of process columns (NPCOL),
 *        and my process rank. It is an input argument to all the
 *        parallel routines.
 *        Grid can be initialized by subroutine SUPERLU_GRIDINIT.
 *        See superlu_defs.h for the definition of 'gridinfo_t'.
 *
 * B      (input/output) doublecomplex*
 *        On entry, the distributed right-hand side matrix of the possibly
 *        equilibrated system. That is, B may be overwritten by diag(R)*B.
 *        On exit, the distributed solution matrix Y of the possibly
 *        equilibrated system if info = 0, where Y = Pc*diag(C)^(-1)*X,
 *        and X is the solution of the original system.
 *
 * m_loc  (input) int (local)
 *        The local row dimension of matrix B.
 *
 * fst_row (input) int (global)
 *        The row number of B's first row in the global matrix.
 *
 * ldb    (input) int (local)
 *        The leading dimension of matrix B.
 *
 * nrhs   (input) int (global)
 *        Number of right-hand sides.
 * 
 * SOLVEstruct (output) SOLVEstruct_t* (global)
 *        Contains the information for the communication during the
 *        solution phase.
 *
 * stat   (output) SuperLUStat_t*
 *        Record the statistics about the triangular solves.
 *        See util.h for the definition of 'SuperLUStat_t'.
 *
 * info   (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 *        
 */
    Glu_persist_t *Glu_persist = LUstruct->Glu_persist;
    LocalLU_t *Llu = LUstruct->Llu;
    doublecomplex alpha = {1.0, 0.0};
    doublecomplex zero = {0.0, 0.0};
    doublecomplex *lsum;  /* Local running sum of the updates to B-components */
    doublecomplex *x;     /* X component at step k. */
		    /* NOTE: x and lsum are of same size. */
    doublecomplex *lusup, *dest;
    doublecomplex *recvbuf, *tempv;
    doublecomplex *rtemp; /* Result of full matrix-vector multiply. */
    int_t  **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr;
    int_t  *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */
    Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */
    int_t  **Ucb_valptr;      /* Vertical linked list pointing to Unzval[] */
    int_t  iam, kcol, krow, mycol, myrow;
    int_t  i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr;
    int_t  nb, nlb, nub, nsupers;
    int_t  *xsup, *supno, *lsub, *usub;
    int_t  *ilsum;    /* Starting position of each supernode in lsum (LOCAL)*/
    int_t  Pc, Pr;
    int    knsupc, nsupr;
    int    ldalsum;   /* Number of lsum entries locally owned. */
    int    maxrecvsz, p, pi;
    int_t  **Lrowind_bc_ptr;
    doublecomplex **Lnzval_bc_ptr;
    MPI_Status status;
#ifdef ISEND_IRECV
    MPI_Request *send_req, recv_req;
#endif
    pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm;

    /*-- Counts used for L-solve --*/
    int_t  *fmod;         /* Modification count for L-solve --
                             Count the number of local block products to
                             be summed into lsum[lk]. */
    int_t  **fsendx_plist = Llu->fsendx_plist;
    int_t  nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */
    int_t  *frecv;        /* Count of lsum[lk] contributions to be received
                             from processes in this row. 
                             It is only valid on the diagonal processes. */
    int_t  nfrecvmod = 0; /* Count of total modifications to be recv'd. */
    int_t  nleaf = 0, nroot = 0;

    /*-- Counts used for U-solve --*/
    int_t  *bmod;         /* Modification count for U-solve. */
    int_t  **bsendx_plist = Llu->bsendx_plist;
    int_t  nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */
    int_t  *brecv;        /* Count of modifications to be recv'd from
			     processes in this row. */
    int_t  nbrecvmod = 0; /* Count of total modifications to be recv'd. */
    double t;
#if ( DEBUGlevel>=2 )
    int_t Ublocks = 0;
#endif

    t = SuperLU_timer_();

    /* Test input parameters. */
    *info = 0;
    if ( n < 0 ) *info = -1;
    else if ( nrhs < 0 ) *info = -9;
    if ( *info ) {
	pxerbla("PZGSTRS", grid, -*info);
	return;
    }
	
    /*
     * Initialization.
     */
    iam = grid->iam;
    Pc = grid->npcol;
    Pr = grid->nprow;
    myrow = MYROW( iam, grid );
    mycol = MYCOL( iam, grid );
    xsup = Glu_persist->xsup;
    supno = Glu_persist->supno;
    nsupers = supno[n-1] + 1;
    Lrowind_bc_ptr = Llu->Lrowind_bc_ptr;
    Lnzval_bc_ptr = Llu->Lnzval_bc_ptr;
    nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC(iam, "Enter pzgstrs()");
#endif

    stat->ops[SOLVE] = 0.0;
    Llu->SolveMsgSent = 0;

    /* Save the count to be altered so it can be used by
       subsequent call to PDGSTRS. */
    if ( !(fmod = intMalloc_dist(nlb)) )
	ABORT("Calloc fails for fmod[].");
    for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i];
    if ( !(frecv = intMalloc_dist(nlb)) )
	ABORT("Malloc fails for frecv[].");
    Llu->frecv = frecv;

#ifdef ISEND_IRECV
    k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb;
    if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) )
	ABORT("Malloc fails for send_req[].");
#endif

#ifdef _CRAY
    ftcs1 = _cptofcd("L", strlen("L"));
    ftcs2 = _cptofcd("N", strlen("N"));
    ftcs3 = _cptofcd("U", strlen("U"));
#endif


    /* Obtain ilsum[] and ldalsum for process column 0. */
    ilsum = Llu->ilsum;
    ldalsum = Llu->ldalsum;

    /* Allocate working storage. */
    knsupc = sp_ienv_dist(3);
    maxrecvsz = knsupc * nrhs + SUPERLU_MAX( XK_H, LSUM_H );
    if ( !(lsum = doublecomplexCalloc_dist(((size_t)ldalsum)*nrhs + nlb*LSUM_H)) )
	ABORT("Calloc fails for lsum[].");
    if ( !(x = doublecomplexMalloc_dist(ldalsum * nrhs + nlb * XK_H)) )
	ABORT("Malloc fails for x[].");
    if ( !(recvbuf = doublecomplexMalloc_dist(maxrecvsz)) )
	ABORT("Malloc fails for recvbuf[].");
    if ( !(rtemp = doublecomplexCalloc_dist(maxrecvsz)) )
	ABORT("Malloc fails for rtemp[].");

    
    /*---------------------------------------------------
     * Forward solve Ly = b.
     *---------------------------------------------------*/
    /* Redistribute B into X on the diagonal processes. */
    pzReDistribute_B_to_X(B, m_loc, nrhs, ldb, fst_row, ilsum, x, 
			  ScalePermstruct, Glu_persist, grid, SOLVEstruct);

    /* Set up the headers in lsum[]. */
    ii = 0;
    for (k = 0; k < nsupers; ++k) {
	knsupc = SuperSize( k );
	krow = PROW( k, grid );
	if ( myrow == krow ) {
	    lk = LBi( k, grid );   /* Local block number. */
	    il = LSUM_BLK( lk );
	    lsum[il - LSUM_H].r = k;/* Block number prepended in the header.*/
	    lsum[il - LSUM_H].i = 0;
	}
	ii += knsupc;
    }

    /*
     * Compute frecv[] and nfrecvmod counts on the diagonal processes.
     */
    {
	superlu_scope_t *scp = &grid->rscp;

	for (k = 0; k < nsupers; ++k) {
	    krow = PROW( k, grid );
	    if ( myrow == krow ) {
		lk = LBi( k, grid );    /* Local block number. */
		kcol = PCOL( k, grid ); /* Root process in this row scope. */
		if ( mycol != kcol && fmod[lk] )
		    i = 1;  /* Contribution from non-diagonal process. */
		else i = 0;
		MPI_Reduce( &i, &frecv[lk], 1, mpi_int_t,
			   MPI_SUM, kcol, scp->comm );
		if ( mycol == kcol ) { /* Diagonal process. */
		    nfrecvmod += frecv[lk];
		    if ( !frecv[lk] && !fmod[lk] ) ++nleaf;
#if ( DEBUGlevel>=2 )
		    printf("(%2d) frecv[%4d]  %2d\n", iam, k, frecv[lk]);
		    assert( frecv[lk] < Pc );
#endif
		}
	    }
	}
    }

    /* ---------------------------------------------------------
       Solve the leaf nodes first by all the diagonal processes.
       --------------------------------------------------------- */
#if ( DEBUGlevel>=2 )
    printf("(%2d) nleaf %4d\n", iam, nleaf);
#endif
    for (k = 0; k < nsupers && nleaf; ++k) {
	krow = PROW( k, grid );
	kcol = PCOL( k, grid );
	if ( myrow == krow && mycol == kcol ) { /* Diagonal process */
	    knsupc = SuperSize( k );
	    lk = LBi( k, grid );
	    if ( frecv[lk]==0 && fmod[lk]==0 ) {
		fmod[lk] = -1;  /* Do not solve X[k] in the future. */
		ii = X_BLK( lk );
		lk = LBj( k, grid ); /* Local block number, column-wise. */
		lsub = Lrowind_bc_ptr[lk];
		lusup = Lnzval_bc_ptr[lk];
		nsupr = lsub[1];
#ifdef _CRAY
		CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha,
		      lusup, &nsupr, &x[ii], &knsupc);
#elif defined (USE_VENDOR_BLAS)
		ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, 
		       lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1);
#else
		ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, 
		       lusup, &nsupr, &x[ii], &knsupc);
#endif
		stat->ops[SOLVE] += 4 * knsupc * (knsupc - 1) * nrhs
		    + 10 * knsupc * nrhs; /* complex division */
		--nleaf;
#if ( DEBUGlevel>=2 )
		printf("(%2d) Solve X[%2d]\n", iam, k);
#endif
		
		/*
		 * Send Xk to process column Pc[k].
		 */
		for (p = 0; p < Pr; ++p) {
		    if ( fsendx_plist[lk][p] != EMPTY ) {
			pi = PNUM( p, kcol, grid );
#ifdef ISEND_IRECV
			MPI_Isend( &x[ii - XK_H], knsupc * nrhs + XK_H,
				   SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm,
                                   &send_req[Llu->SolveMsgSent++]);
#else
			MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H,
				 SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm );
#endif
#if ( DEBUGlevel>=2 )
			printf("(%2d) Sent X[%2.0f] to P %2d\n",
			       iam, x[ii-XK_H], pi);
#endif
		    }
		}
		/*
		 * Perform local block modifications: lsum[i] -= L_i,k * X[k]
		 */
		nb = lsub[0] - 1;
		lptr = BC_HEADER + LB_DESCRIPTOR + knsupc;
		luptr = knsupc; /* Skip diagonal block L(k,k). */
		
		zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k,
			   fmod, nb, lptr, luptr, xsup, grid, Llu, 
			   send_req, stat);
	    }
	} /* if diagonal process ... */
    } /* for k ... */

    /* -----------------------------------------------------------
       Compute the internal nodes asynchronously by all processes.
       ----------------------------------------------------------- */
#if ( DEBUGlevel>=2 )
    printf("(%2d) nfrecvx %4d,  nfrecvmod %4d,  nleaf %4d\n",
	   iam, nfrecvx, nfrecvmod, nleaf);
#endif

    while ( nfrecvx || nfrecvmod ) { /* While not finished. */

	/* Receive a message. */
#ifdef ISEND_IRECV
	/* -MPI- FATAL: Remote protocol queue full */
	MPI_Irecv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX,
                 MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &recv_req );
	MPI_Wait( &recv_req, &status );
#else
	MPI_Recv( recvbuf, maxrecvsz, SuperLU_MPI_DOUBLE_COMPLEX,
                  MPI_ANY_SOURCE, MPI_ANY_TAG, grid->comm, &status );
#endif

        k = (*recvbuf).r;

#if ( DEBUGlevel>=2 )
	printf("(%2d) Recv'd block %d, tag %2d\n", iam, k, status.MPI_TAG);
#endif
	
	switch ( status.MPI_TAG ) {
	  case Xk:
	      --nfrecvx;
	      lk = LBj( k, grid ); /* Local block number, column-wise. */
	      lsub = Lrowind_bc_ptr[lk];
	      lusup = Lnzval_bc_ptr[lk];
	      if ( lsub ) {
		  nb   = lsub[0];
		  lptr = BC_HEADER;
		  luptr = 0;
		  knsupc = SuperSize( k );

		  /*
		   * Perform local block modifications: lsum[i] -= L_i,k * X[k]
		   */
		  zlsum_fmod(lsum, x, &recvbuf[XK_H], rtemp, nrhs, knsupc, k,
			     fmod, nb, lptr, luptr, xsup, grid, Llu, 
			     send_req, stat);
	      } /* if lsub */

	      break;

	  case LSUM: /* Receiver must be a diagonal process */
	      --nfrecvmod;
	      lk = LBi( k, grid ); /* Local block number, row-wise. */
	      ii = X_BLK( lk );
	      knsupc = SuperSize( k );
	      tempv = &recvbuf[LSUM_H];
	      RHS_ITERATE(j) {
		  for (i = 0; i < knsupc; ++i)
		      z_add(&x[i + ii + j*knsupc],
			    &x[i + ii + j*knsupc],
			    &tempv[i + j*knsupc]);
	      }

	      if ( (--frecv[lk])==0 && fmod[lk]==0 ) {
		  fmod[lk] = -1; /* Do not solve X[k] in the future. */
		  lk = LBj( k, grid ); /* Local block number, column-wise. */
		  lsub = Lrowind_bc_ptr[lk];
		  lusup = Lnzval_bc_ptr[lk];
		  nsupr = lsub[1];
#ifdef _CRAY
		  CTRSM(ftcs1, ftcs1, ftcs2, ftcs3, &knsupc, &nrhs, &alpha,
			lusup, &nsupr, &x[ii], &knsupc);
#elif defined (USE_VENDOR_BLAS)
		  ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, 
			 lusup, &nsupr, &x[ii], &knsupc, 1, 1, 1, 1);
#else
		  ztrsm_("L", "L", "N", "U", &knsupc, &nrhs, &alpha, 
			 lusup, &nsupr, &x[ii], &knsupc);
#endif
		  stat->ops[SOLVE] += 4 * knsupc * (knsupc - 1) * nrhs
		      + 10 * knsupc * nrhs; /* complex division */
#if ( DEBUGlevel>=2 )
		  printf("(%2d) Solve X[%2d]\n", iam, k);
#endif
		
		  /*
		   * Send Xk to process column Pc[k].
		   */
		  kcol = PCOL( k, grid );
		  for (p = 0; p < Pr; ++p) {
		      if ( fsendx_plist[lk][p] != EMPTY ) {
			  pi = PNUM( p, kcol, grid );
#ifdef ISEND_IRECV
			  MPI_Isend( &x[ii-XK_H], knsupc * nrhs + XK_H,
                                     SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm,
                                     &send_req[Llu->SolveMsgSent++]);
#else
			  MPI_Send( &x[ii - XK_H], knsupc * nrhs + XK_H,
				    SuperLU_MPI_DOUBLE_COMPLEX, pi, Xk, grid->comm );
#endif
#if ( DEBUGlevel>=2 )
			  printf("(%2d) Sent X[%2.0f] to P %2d\n",
				 iam, x[ii-XK_H], pi);
#endif
		      }
                  }
		  /*
		   * Perform local block modifications.
		   */
		  nb = lsub[0] - 1;
		  lptr = BC_HEADER + LB_DESCRIPTOR + knsupc;
		  luptr = knsupc; /* Skip diagonal block L(k,k). */

		  zlsum_fmod(lsum, x, &x[ii], rtemp, nrhs, knsupc, k,
			     fmod, nb, lptr, luptr, xsup, grid, Llu,
			     send_req, stat);
	      } /* if */

	      break;

#if ( DEBUGlevel>=2 )
	    default:
	      printf("(%2d) Recv'd wrong message tag %4d\n", status.MPI_TAG);
	      break;
#endif
	  } /* switch */

    } /* while not finished ... */


#if ( PRNTlevel>=2 )
    t = SuperLU_timer_() - t;
    if ( !iam ) printf(".. L-solve time\t%8.2f\n", t);
    t = SuperLU_timer_();
#endif

#if ( DEBUGlevel==2 )
    {
      printf("(%d) .. After L-solve: y =\n", iam);
      for (i = 0, k = 0; k < nsupers; ++k) {
	  krow = PROW( k, grid );
	  kcol = PCOL( k, grid );
	  if ( myrow == krow && mycol == kcol ) { /* Diagonal process */
	      knsupc = SuperSize( k );
	      lk = LBi( k, grid );
	      ii = X_BLK( lk );
	      for (j = 0; j < knsupc; ++j)
		printf("\t(%d)\t%4d\t%.10f\n", iam, xsup[k]+j, x[ii+j]);
	      fflush(stdout);
	  }
	  MPI_Barrier( grid->comm );
      }
    }
#endif

    SUPERLU_FREE(fmod);
    SUPERLU_FREE(frecv);
    SUPERLU_FREE(rtemp);

#ifdef ISEND_IRECV
    for (i = 0; i < Llu->SolveMsgSent; ++i) MPI_Request_free(&send_req[i]);
    Llu->SolveMsgSent = 0;
#endif


    /*---------------------------------------------------
     * Back solve Ux = y.
     *
     * The Y components from the forward solve is already
     * on the diagonal processes.
     *---------------------------------------------------*/

    /* Save the count to be altered so it can be used by
       subsequent call to PZGSTRS. */
    if ( !(bmod = intMalloc_dist(nlb)) )
	ABORT("Calloc fails for bmod[].");
    for (i = 0; i < nlb; ++i) bmod[i] = Llu->bmod[i];
    if ( !(brecv = intMalloc_dist(nlb)) )
	ABORT("Malloc fails for brecv[].");
    Llu->brecv = brecv;

    /*
     * Compute brecv[] and nbrecvmod counts on the diagonal processes.
     */
    {
	superlu_scope_t *scp = &grid->rscp;

	for (k = 0; k < nsupers; ++k) {
	    krow = PROW( k, grid );
	    if ( myrow == krow ) {
		lk = LBi( k, grid );    /* Local block number. */
		kcol = PCOL( k, grid ); /* Root process in this row scope. */
		if ( mycol != kcol && bmod[lk] )
		    i = 1;  /* Contribution from non-diagonal process. */
		else i = 0;
		MPI_Reduce( &i, &brecv[lk], 1, mpi_int_t,
			   MPI_SUM, kcol, scp->comm );
		if ( mycol == kcol ) { /* Diagonal process. */
		    nbrecvmod += brecv[lk];
		    if ( !brecv[lk] && !bmod[lk] ) ++nroot;
#if ( DEBUGlevel>=2 )
		    printf("(%2d) brecv[%4d]  %2d\n", iam, k, brecv[lk]);
		    assert( brecv[lk] < Pc );
#endif
		}
	    }
	}
    }

    /* Re-initialize lsum to zero. Each block header is already in place. */
    for (k = 0; k < nsupers; ++k) {
	krow = PROW( k, grid );
	if ( myrow == krow ) {
	    knsupc = SuperSize( k );
	    lk = LBi( k, grid );
	    il = LSUM_BLK( lk );
	    dest = &lsum[il];
	    RHS_ITERATE(j) {
		for (i = 0; i < knsupc; ++i) dest[i + j*knsupc] = zero;
	    }
	}
    }
Пример #26
0
dart_ret_t dart_waitall_local(
  dart_handle_t * handle,
  size_t          num_handles)
{
  size_t i, r_n = 0;
  DART_LOG_DEBUG("dart_waitall_local()");
  if (num_handles == 0) {
    DART_LOG_DEBUG("dart_waitall_local > number of handles = 0");
    return DART_OK;
  }
  if (num_handles > INT_MAX) {
    DART_LOG_ERROR("dart_waitall_local ! number of handles > INT_MAX");
    return DART_ERR_INVAL;
  }
  if (*handle != NULL) {
    MPI_Status  *mpi_sta;
    MPI_Request *mpi_req;
    mpi_req = (MPI_Request *) malloc(num_handles * sizeof(MPI_Request));
    mpi_sta = (MPI_Status  *) malloc(num_handles * sizeof(MPI_Status));
    for (i = 0; i < num_handles; i++)  {
      if (handle[i] != NULL) {
        DART_LOG_TRACE("dart_waitall_local: -- handle[%d]: %p)",
                       i, (void*)handle[i]);
        DART_LOG_TRACE("dart_waitall_local:    handle[%d]->dest: %d",
                       i, handle[i]->dest);
        DART_LOG_TRACE("dart_waitall_local:    handle[%d]->win:  %d",
                       i, handle[i]->win);
        DART_LOG_TRACE("dart_waitall_local:    handle[%d]->req:  %d",
                       i, handle[i]->request);
        mpi_req[r_n] = handle[i]->request;
        r_n++;
      }
    }
    /*
     * Wait for local completion of MPI requests:
     */
    DART_LOG_DEBUG("dart_waitall_local: "
                   "MPI_Waitall, %d requests from %zu handles",
                   r_n, num_handles);
    if (r_n > 0) {
      if (MPI_Waitall(r_n, mpi_req, mpi_sta) == MPI_SUCCESS) {
        DART_LOG_DEBUG("dart_waitall_local: MPI_Waitall completed");
      } else {
        DART_LOG_ERROR("dart_waitall_local: MPI_Waitall failed");
        DART_LOG_TRACE("dart_waitall_local: free MPI_Request temporaries");
        free(mpi_req);
        DART_LOG_TRACE("dart_waitall_local: free MPI_Status temporaries");
        free(mpi_sta);
        return DART_ERR_INVAL;
      }
    } else {
      DART_LOG_DEBUG("dart_waitall_local > number of requests = 0");
      return DART_OK;
    }
    /*
     * copy MPI requests back to DART handles:
     */
    DART_LOG_TRACE("dart_waitall_local: "
                   "copying MPI requests back to DART handles");
    r_n = 0;
    for (i = 0; i < num_handles; i++) {
      if (handle[i]) {
        DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%d].MPI_SOURCE: %d",
                       r_n, mpi_sta[r_n].MPI_SOURCE);
        DART_LOG_TRACE("dart_waitall_local: -- mpi_sta[%d].MPI_ERROR:  %d:%s",
                       r_n,
                       mpi_sta[r_n].MPI_ERROR,
                       DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR));
        if (mpi_req[r_n] != MPI_REQUEST_NULL) {
          if (mpi_sta[r_n].MPI_ERROR == MPI_SUCCESS) {
            DART_LOG_TRACE("dart_waitall_local: -- MPI_Request_free");
            if (MPI_Request_free(&mpi_req[r_n]) != MPI_SUCCESS) {
              DART_LOG_TRACE("dart_waitall_local ! MPI_Request_free failed");
              free(mpi_req);
              free(mpi_sta);
              return DART_ERR_INVAL;
            }
          } else {
            DART_LOG_TRACE("dart_waitall_local: cannot free request %d "
                           "mpi_sta[%d] = %d (%s)",
                           r_n,
                           r_n,
                           mpi_sta[r_n].MPI_ERROR,
                           DART__MPI__ERROR_STR(mpi_sta[r_n].MPI_ERROR));
          }
        }
        DART_LOG_DEBUG("dart_waitall_local: free handle[%d] %p",
                       i, (void*)(handle[i]));
        free(handle[i]);
        handle[i] = NULL;
        r_n++;
      }
    }
    DART_LOG_TRACE("dart_waitall_local: free MPI_Request temporaries");
    free(mpi_req);
    DART_LOG_TRACE("dart_waitall_local: free MPI_Status temporaries");
    free(mpi_sta);
  }
  DART_LOG_DEBUG("dart_waitall_local > finished");
  return DART_OK;
}
Пример #27
0
int
main (int argc, char **argv)
{
  int nprocs = -1;
  int rank = -1;
  char processor_name[128];
  int namelen = 128;
  int buf0[buf_size];
  int buf1[buf_size];
  MPI_Request aReq[2];
  MPI_Status aStatus[2];

  MPI_Status status;

  /* init */
  MPI_Init (&argc, &argv);
  MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name (processor_name, &namelen);
  printf ("(%d) is alive on %s\n", rank, processor_name);
  fflush (stdout);

  MPI_Barrier (MPI_COMM_WORLD);

  if (nprocs < 2) {
      printf ("not enough tasks\n");
  }
  else {
    if (rank == 0) {
      memset (buf0, 0, buf_size);

      MPI_Send_init (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[0]);
      MPI_Recv_init (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &aReq[1]);

      MPI_Start (&aReq[0]);
      MPI_Start (&aReq[1]);

      MPI_Waitall (2, aReq, aStatus);

      memset (buf0, 1, buf_size);

      MPI_Startall (2, aReq);

      MPI_Waitall (2, aReq, aStatus);
    }
    else if (rank == 1) {
      memset (buf1, 1, buf_size);

      MPI_Recv_init (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[0]);
      MPI_Send_init (buf1, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &aReq[1]);

      MPI_Start (&aReq[0]);
      MPI_Start (&aReq[1]);

      MPI_Waitall (2, aReq, aStatus);

      memset (buf1, 0, buf_size);

      MPI_Startall (2, aReq);

      MPI_Waitall (2, aReq, aStatus);
    }
  }

  MPI_Barrier (MPI_COMM_WORLD);

  MPI_Request_free (&aReq[0]);
  MPI_Request_free (&aReq[1]);

  MPI_Finalize ();
  printf ("(%d) Finished normally\n", rank);
}
Пример #28
0
int main (int argc, char** argv) {
  assert(argc == 2);
  int coordinate_steps = atoi(argv[1]);

  double length = 1.0;
  double total_time = 0.6;

  double sigma = 0.3;
  double lambda = 1.0;

  double boundary_value = 0.0;

  double coordinate_step = length / coordinate_steps;
  double time_step = sigma * coordinate_step / lambda;

  int time_steps = total_time / time_step;

  int rv = MPI_Init(&argc, &argv);
  assert(rv == MPI_SUCCESS);

  int size;
  rv = MPI_Comm_size(MPI_COMM_WORLD, &size);
  assert(rv == MPI_SUCCESS);

  int rank;
  rv = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  assert(rv == MPI_SUCCESS);

  MPI_Request request;
  MPI_Status status;

  std::vector<double> current_values(coordinate_steps, 0.0);
  std::vector<double> previous_values(coordinate_steps, 0.0);
  double start_time = MPI_Wtime();

  if (rank == 0) {
    for (int s = 0; s <= coordinate_steps; s++) {
      double coordinate = static_cast<double>(s) / coordinate_steps * length;
      if (coordinate >= 0.1 && coordinate <= 0.2) {
        current_values[s] = 1.0;
      }
    }
  }

  if (rank == 0) {
    for (int s = 0; s <= coordinate_steps; s++) {
      rv = MPI_Isend(&current_values[s], 1, MPI_DOUBLE, Next(rank, size), s, MPI_COMM_WORLD, &request);
      assert(rv == MPI_SUCCESS);
      rv = MPI_Request_free(&request);
      assert(rv == MPI_SUCCESS);
    }
  }

  for (int i = (rank == 0) ? 1 : 0; i * size + rank <= time_steps; i++) {
    for (int s = 0; s <= coordinate_steps; s++) {
      rv = MPI_Recv(&previous_values[s], 1, MPI_DOUBLE, Prev(rank, size), s, MPI_COMM_WORLD, &status);
      assert(rv == MPI_SUCCESS);

      double left_value = (s == 0) ? boundary_value : current_values[s - 1];
      double left_down_value = (s == 0) ? boundary_value : previous_values[s - 1];
      double down_value = previous_values[s];

      current_values[s] = CalculateValue(sigma, left_value, down_value, left_down_value);

      int receiver = (i * size + rank == time_steps) ? 0 : Next(rank, size);
      rv = MPI_Isend(&current_values[s], 1, MPI_DOUBLE, receiver, s, MPI_COMM_WORLD, &request);
      assert(rv == MPI_SUCCESS);
      rv = MPI_Request_free(&request);
      assert(rv == MPI_SUCCESS);
    }
  }

  if (rank == 0) {
    for (int s = 0; s <= coordinate_steps; s++) {
      int sender = time_steps % size;
      rv = MPI_Recv(&previous_values[s], 1, MPI_DOUBLE, sender, s, MPI_COMM_WORLD, &status);
      assert(rv == MPI_SUCCESS);
    }
  }

  if (rank == 0 && coordinate_steps <= 100) {
    for (int s = 0; s <= coordinate_steps; s++) {
      double coordinate = static_cast<double>(s) / coordinate_steps * length;
      printf("%lf %lf\n", coordinate, previous_values[s]);
    }
  }

  if (rank == 0) {
    double current_time = MPI_Wtime();
    printf("%d %lf\n", size, current_time - start_time);
  }

  rv = MPI_Finalize();
  assert(rv == MPI_SUCCESS);

  return EXIT_SUCCESS;
}
Пример #29
0
int main(int argc, char *argv[])
{
    MPI_Request r;
    MPI_Status s;
    // int flag;
    int buf[10];
    int rbuf[10];
    int tag = 27;
    int dest = 0;
    int rank, size;

    MPI_Init( &argc, &argv );
    MPI_Comm_size( MPI_COMM_WORLD, &size );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );

    /* Create a persistent send request */
    // tout le monde prépare l'envoi à 0
    MPI_Send_init( buf, 10, MPI_INT, dest, tag, MPI_COMM_WORLD, &r );

    /* Use that request */
    if (rank == 0) {
	// on alloue un tableau de size request pour les irecv
	MPI_Request *rr = (MPI_Request *)malloc(size * sizeof(MPI_Request));
	for (int i=0; i<size; i++) {
	    // 0 va recevoir de tout le monde
	    MPI_Irecv( rbuf, 10, MPI_INT, i, tag, MPI_COMM_WORLD, &rr[i] );
	}
	// 0 va envoyer à 0
	MPI_Start( &r );
	// 0 envoi à 0
	MPI_Wait( &r, &s );
	// 0 recoit de tout le monde
	MPI_Waitall( size, rr, MPI_STATUSES_IGNORE );
	free(rr);
    }
    else {
	// non-0 va envoyer à 0
	MPI_Start( &r );
	// non-0 envoi à 0
	MPI_Wait( &r, &s );
    }

    MPI_Request_free( &r );


    // if (rank == 0)
    // 	{
    // 	    MPI_Request sr;
    // 	    /* Create a persistent receive request */
    // 	    // 0 prépare la récéption de tout le monde
    // 	    MPI_Recv_init( rbuf, 10, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &r );
    // 	    // 0 va envoyer à 0
    // 	    MPI_Isend( buf, 10, MPI_INT, 0, tag, MPI_COMM_WORLD, &sr );
    // 	    for (int i=0; i<size; i++) {
    // 		// 0 va recevoir de tout le monde
    // 		MPI_Start( &r );
    // 		// 0 recoit de tout le monde
    // 		MPI_Wait( &r, &s );
    // 	    }
    // 	    // 0 envoi à 0
    // 	    MPI_Wait( &sr, &s );
    // 	    MPI_Request_free( &r );
    // 	}
    // else {
    // 	// non-0 envoi à 0
    // 	MPI_Send( buf, 10, MPI_INT, 0, tag, MPI_COMM_WORLD );
    // }

    MPI_Finalize();
    return 0;
}
Пример #30
0
void _amps_wait_exchange(amps_Handle handle)
{
  int i;
  int num;

  num = handle -> package -> num_send + handle -> package -> num_recv;

  if(num)
  {
     if(handle -> package -> num_recv) 
     {
	for(i = 0; i <  handle -> package -> num_recv; i++)
	{
	   AMPS_CLEAR_INVOICE(handle -> package -> recv_invoices[i]);
	}
     }
	
     MPI_Waitall(num, handle -> package -> recv_requests, 
		 handle -> package -> status);
  }

#ifdef AMPS_MPI_PACKAGE_LOWSTORAGE

  /* Needed by the DEC's; need better memory allocation strategy */
  /* Need to uncommit packages when not in use */
  /* amps_Commit followed by amps_UnCommit ????? */
  if(handle -> package -> commited) 
  {
     for(i = 0; i < handle -> package -> num_recv; i++)
     {
	if( handle -> package -> recv_invoices[i] -> mpi_type != MPI_DATATYPE_NULL )
	{
	   MPI_Type_free(&(handle -> package -> recv_invoices[i] -> mpi_type));   
	}
    
	MPI_Request_free(&(handle -> package -> recv_requests[i]));
     }
    
     for(i = 0; i < handle -> package -> num_send; i++)
     {
	if( handle -> package -> send_invoices[i] -> mpi_type != MPI_DATATYPE_NULL ) 
	{
	   MPI_Type_free(&handle -> package -> send_invoices[i] -> mpi_type);
	}

	MPI_Request_free(&(handle -> package -> send_requests[i]));
     }
    
     if(handle -> package -> recv_requests)
     {
	free(handle -> package -> recv_requests);
	handle -> package -> recv_requests = NULL;
     }
     if(handle -> package -> status) 
     {
	free(handle -> package -> status);
	handle -> package -> status = NULL;
     }
    
     handle -> package -> commited = FALSE;
  }
#endif
}