コード例 #1
0
ファイル: collectives.hpp プロジェクト: SINTEFMedtek/VTK
 static request ibroadcast(const communicator& comm, T& x, int root)
 {
   request r;
   MPI_Ibcast(Datatype::address(x),
              Datatype::count(x),
              Datatype::datatype(), root, comm, &r.r);
   return r;
 }
コード例 #2
0
ファイル: nonblocking.c プロジェクト: Niharikareddy/mpich
int main(int argc, char **argv)
{
    int errs = 0;
    int i;
    int rank, size;
    int *sbuf = NULL;
    int *rbuf = NULL;
    int *scounts = NULL;
    int *rcounts = NULL;
    int *sdispls = NULL;
    int *rdispls = NULL;
    int *types = NULL;
    MPI_Comm comm;
    MPI_Request req;

    /* intentionally not using MTest_Init/MTest_Finalize in order to make it
     * easy to take this test and use it as an NBC sanity test outside of the
     * MPICH test suite */
    MPI_Init(&argc, &argv);

    comm = MPI_COMM_WORLD;

    MPI_Comm_size(comm, &size);
    MPI_Comm_rank(comm, &rank);

    /* enough space for every process to contribute at least NUM_INTS ints to any
     * collective operation */
    sbuf = malloc(NUM_INTS * size * sizeof(int));
    my_assert(sbuf);
    rbuf = malloc(NUM_INTS * size * sizeof(int));
    my_assert(rbuf);
    scounts = malloc(size * sizeof(int));
    my_assert(scounts);
    rcounts = malloc(size * sizeof(int));
    my_assert(rcounts);
    sdispls = malloc(size * sizeof(int));
    my_assert(sdispls);
    rdispls = malloc(size * sizeof(int));
    my_assert(rdispls);
    types = malloc(size * sizeof(int));
    my_assert(types);

    for (i = 0; i < size; ++i) {
        sbuf[2 * i] = i;
        sbuf[2 * i + 1] = i;
        rbuf[2 * i] = i;
        rbuf[2 * i + 1] = i;
        scounts[i] = NUM_INTS;
        rcounts[i] = NUM_INTS;
        sdispls[i] = i * NUM_INTS;
        rdispls[i] = i * NUM_INTS;
        types[i] = MPI_INT;
    }

    MPI_Ibarrier(comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ibcast(sbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Igather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    else
        MPI_Igather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Igatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, 0, comm,
                     &req);
    else
        MPI_Igatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm, &req);
    else
        MPI_Iscatter(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, 0, comm,
                      &req);
    else
        MPI_Iscatterv(sbuf, scounts, sdispls, MPI_INT, rbuf, NUM_INTS, MPI_INT, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallgather(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallgather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallgatherv(sbuf, NUM_INTS, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallgatherv(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT, comm,
                    &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoall(sbuf, NUM_INTS, MPI_INT, rbuf, NUM_INTS, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoall(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, rbuf, NUM_INTS, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoallv(sbuf, scounts, sdispls, MPI_INT, rbuf, rcounts, rdispls, MPI_INT, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoallv(MPI_IN_PLACE, NULL, NULL, MPI_DATATYPE_NULL, rbuf, rcounts, rdispls, MPI_INT,
                   comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoallw(sbuf, scounts, sdispls, types, rbuf, rcounts, rdispls, types, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ialltoallw(MPI_IN_PLACE, NULL, NULL, NULL, rbuf, rcounts, rdispls, types, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (0 == rank)
        MPI_Ireduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req);
    else
        MPI_Ireduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, 0, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallreduce(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iallreduce(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce_scatter(sbuf, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce_scatter(MPI_IN_PLACE, rbuf, rcounts, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce_scatter_block(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Ireduce_scatter_block(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iexscan(sbuf, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    MPI_Iexscan(MPI_IN_PLACE, rbuf, NUM_INTS, MPI_INT, MPI_SUM, comm, &req);
    MPI_Wait(&req, MPI_STATUS_IGNORE);

    if (sbuf)
        free(sbuf);
    if (rbuf)
        free(rbuf);
    if (scounts)
        free(scounts);
    if (rcounts)
        free(rcounts);
    if (sdispls)
        free(sdispls);
    if (rdispls)
        free(rdispls);

    if (rank == 0) {
        if (errs)
            fprintf(stderr, "Found %d errors\n", errs);
        else
            printf(" No errors\n");
    }
    MPI_Finalize();
    return 0;
}
コード例 #3
0
ファイル: main.c プロジェクト: parisbre56/c-workspace
int main(int argc, char **argv)
{
	MPI_Comm cartComm; //Cartesian communicator
	
	int tid; //Thread id
	int nthreads; //Number of threads
	
	double time_initial; //Start time
	double time_end; //End time
	
	int n; //N is the size of the matrix
	
	//Wrap around
	int wrapAround=1;
	
	#if defined(USE_BROADCAST_ASYNC) //If asynchronus broadcast is enabled, keep a request for testing if the data can be safely modified after being sent
		MPI_Request bcastRequest;
	#endif
	
	//Initialize the MPI environment
	if(MPI_Init(NULL,NULL)!=MPI_SUCCESS) {
		//cerr<<"ERROR"<<endl;
	}
	
	//Get number of threads
	if(MPI_Comm_size(MPI_COMM_WORLD, &nthreads)!=MPI_SUCCESS) {
		//cerr<<"ERROR"<<endl;
	}
	
	//Create one dimensional cartesian grouping that is NOT ring-shaped (does not wrap around)
	//??? Make it wrap around and add a note to the message that tells it to stop when it has reached the start?
	if(MPI_Cart_create(MPI_COMM_WORLD,1,&nthreads,&wrapAround,1,&cartComm)!=MPI_SUCCESS) {
		//cerr<<"ERROR"<<endl;
	}
	
	//Get number of threads
	if(MPI_Comm_size(cartComm, &nthreads)!=MPI_SUCCESS) {
		//cerr<<"ERROR"<<endl;
	}
	
	//Get thread id
	if(MPI_Comm_rank(cartComm, &tid)!=MPI_SUCCESS) {
		//cerr<<"ERROR"<<endl;
	}
	int destinationN;
	int destinationP;
	MPI_Cart_shift(cartComm,0,1,&destinationP,&destinationN);
	
	//Set the size of the matrix
	n=kappa*nthreads;
	
	//Initialize rand
	srand(time(NULL)+tid);
	 
	//Create the matrix and split it amongs the threads
	double ** matrPart; //Holds this thread's part of the matrix
	int partSize;
	
	createMatrix(cartComm,tid,nthreads,n,&matrPart,&partSize);
	
	#ifdef __DEBUG__MODE_EX1__
	if(tid==0) {
		////cout<<"------------------------------------------"<<endl;
	}
	printMatrix(cartComm,tid,nthreads,n,&matrPart,partSize);
	if(tid==0) {
		//cout<<"------------------------------------------"<<endl;
	}
	#endif
	
	//Create a cache for optimization
	//This ensures that there is no difference due to how expensive the functions used to find the right collumn or processor are
	#ifdef __QuestionExtra__
		int colnum = n*2;
	#else
		int colnum = n+1;
	#endif
	int* thrForCol=malloc(sizeof(int)*colnum); //Tells us which thread each collumn belongs to
	for (int i=0;i<colnum;++i) {
		thrForCol[i]=threadForCollumn(nthreads,n,i);
	}
	
	bool* colValidForThr=malloc(sizeof(bool)*colnum); //Tells us if the coolumn selected is valid for the current thread
	for (int i=0;i<colnum;++i) {
		colValidForThr[i]=(thrForCol[i]==tid);
	}
	
	int* glColToPartCol=malloc(sizeof(int)*colnum);//Holds the part collumn for the global collumn given (-1 if invalid)
	for (int i=0;i<colnum;++i) {
		if(colValidForThr[i]) {
			glColToPartCol[i]=globColToPartCol(tid,nthreads,n,i);
		}
		else {
			glColToPartCol[i]=-1;
		}
	}
	
	int* ptColToGlobCol=malloc(sizeof(int)*partSize);//Holds the global collumn for the part column given
	for (int i=0;i<partSize;++i) {
		ptColToGlobCol[i]=partColToGlobCol(tid,nthreads,n,i);
	}
	
	//If this is computing the inverse matrix
	#ifdef __QuestionExtra__ 
		bool* inInverseMatrix=malloc(sizeof(bool)*partSize); //True if in the inverse matrix
		for (int i=0;i<partSize;++i) {
			inInverseMatrix[i]=(ptColToGlobCol[i]>=n);
		}
	#endif
	
	//Set the active diagonal to 0
	int k=0;
	int kapOwner;
	
	if(tid==0) {
		//Get the start time
		time_initial  = MPI_Wtime();
	}
	
	//Start solving
	while(k<n) {
		kapOwner=thrForCol[k];
		//If this is the owner of kappa
		if(tid==kapOwner) {
			//Get the collumn you need
			int curCol=glColToPartCol[k];
			//For row k, divide it so that it becomes 1 and send what you divided it with to the other rows
			//First send what we need to do to it to the other threads (which is [k,k])
			//(Data sent is number to divide with (the other threads should have the correct k and sender))
			#ifndef __SingleProc__
				#ifdef USE_BROADCAST
					MPI_Bcast(&(matrPart[curCol][k]),1,MPI_DOUBLE,kapOwner,cartComm);
				#elif defined(USE_BROADCAST_ASYNC)
					MPI_Ibcast(&(matrPart[curCol][k]),1,MPI_DOUBLE,kapOwner,cartComm, &bcastRequest);
				#else //if not defined USE_BROADCAST
					MPI_Send(&(matrPart[curCol][k]),1,MPI_DOUBLE,destinationN,COL_TAG,cartComm);
				#endif
			#endif
			//Then divide with that number
			for(int jj=curCol+1;jj<partSize;++jj) {
				matrPart[jj][k]=matrPart[jj][k]/matrPart[curCol][k];
			}
			#if !defined(__SingleProc__) && defined(USE_BROADCAST_ASYNC)
				//Wait for the buffer to be read if sending asynchronously, to avoid race conditions
				MPI_Wait(&bcastRequest, MPI_STATUS_IGNORE);
			#endif
			matrPart[curCol][k]=1; //No need to do a real division for the first element
			
			//Then for all rows, subtract and send what we are multiplying to subtract to the other threads
			for(int i=k+1;i<n;++i) {
				//First send
				#ifndef __SingleProc__
					#ifdef USE_BROADCAST
						MPI_Bcast(&(matrPart[curCol][i]),1,MPI_DOUBLE,kapOwner,cartComm);
					#elif defined(USE_BROADCAST_ASYNC)
						MPI_Ibcast(&(matrPart[curCol][i]),1,MPI_DOUBLE,kapOwner,cartComm, &bcastRequest);
					#else //if not defined USE_BROADCAST
						MPI_Send(&(matrPart[curCol][i]),1,MPI_DOUBLE,destinationN,COL_TAG,cartComm);
					#endif
				#endif
				//For all partcollumns, check to see if we can subtract anything
				//(their global col must be greater than k and current collumn)
				for(int jj=curCol+1;jj<partSize;++jj) {
					matrPart[jj][i]=matrPart[jj][i]-matrPart[jj][k]*matrPart[curCol][i];
				}
				#if !defined(__SingleProc__) && defined(USE_BROADCAST_ASYNC)
					//Wait for the buffer to be read if sending asynchronously, to avoid race conditions
					MPI_Wait(&bcastRequest, MPI_STATUS_IGNORE);
				#endif
				//Then subtract
				matrPart[curCol][i]=0; //NO need to do real subtraction for the first element
			}
		}
		//Else, if this is not the owner of kappa
		else {
			//Used for optimisation
			bool isValid=false;
			bool isValidArr[partSize];
			for(int j=0;j<partSize;++j) {
				if(ptColToGlobCol[j]>k) {
					isValid=true;
					isValidArr[j]=true;
				}
				else {
					isValidArr[j]=false;
				}
			}
			//First receive the number you need to divide k row with and send it to the next one 
			//(unless next one is sender)
			double recD;
			#ifdef USE_BROADCAST
				MPI_Bcast(&recD,1,MPI_DOUBLE,kapOwner,cartComm);
			#elif defined(USE_BROADCAST_ASYNC)
				MPI_Ibcast(&recD,1,MPI_DOUBLE,kapOwner,cartComm, &bcastRequest);
				MPI_Wait(&bcastRequest, MPI_STATUS_IGNORE);
			#else //if not defined USE_BROADCAST
				MPI_Recv(&recD,1,MPI_DOUBLE,destinationP,MPI_ANY_TAG,cartComm,MPI_STATUS_IGNORE);
				if(destinationN!=kapOwner) {
					MPI_Send(&recD,1,MPI_DOUBLE,destinationN,COL_TAG,cartComm);
				}
			#endif
			//Then divide k row if necessary
			if(isValid) {
				for(int j=0;j<partSize;++j) {
					if(isValidArr[j]) {
						matrPart[j][k]=matrPart[j][k]/recD;
					}
				}
			}
			//Then for all rows below k row, receive what we need to multiply the subtraction with
			//and do that if necessary
			for(int i=k+1;i<n;++i) {
				#ifdef USE_BROADCAST
					MPI_Bcast(&recD,1,MPI_DOUBLE,kapOwner,cartComm);
				#elif defined(USE_BROADCAST_ASYNC)
					MPI_Ibcast(&recD,1,MPI_DOUBLE,kapOwner,cartComm, &bcastRequest);
					MPI_Wait(&bcastRequest, MPI_STATUS_IGNORE);
				#else //if not defined USE_BROADCAST
					MPI_Recv(&recD,1,MPI_DOUBLE,destinationP,MPI_ANY_TAG,cartComm,MPI_STATUS_IGNORE);
					if(destinationN!=kapOwner) {
						MPI_Send(&recD,1,MPI_DOUBLE,destinationN,COL_TAG,cartComm);
					}
				#endif
				if(isValid) {
					for(int j=0;j<partSize;++j) {
						if(isValidArr[j]) {
							matrPart[j][i]=matrPart[j][i]-recD*matrPart[j][k];
						}
					}
				}
			}
		}
		//Finally, increment k
		++k;
		
		#ifdef __DEBUG__MODE_EX1__
		printMatrix(cartComm,tid,nthreads,n,&matrPart,partSize);
		if(tid==0) {
			//cout<<"------------------------------------------"<<endl;
		}
		#endif
	}
	
	k=n-1;
	
	#ifdef __QuestionExtra__ //IF THIS IS COMPUTING THE INVERSE MATRIX
		while(k>0) {
			kapOwner=thrForCol[k];
			
			//If this is the owner of kappa
			if(tid==kapOwner) {
				//Get the collumn you need
				int curCol=glColToPartCol[k];
				for(int i=k-1;i>=0;--i) {
					#ifndef __SingleProc__
						#ifdef USE_BROADCAST
							MPI_Bcast(&(matrPart[curCol][i]),1,MPI_DOUBLE,kapOwner,cartComm);
						#elif defined(USE_BROADCAST_ASYNC)
							MPI_Ibcast(&(matrPart[curCol][i]),1,MPI_DOUBLE,kapOwner,cartComm, &bcastRequest);
						#else //if not defined USE_BROADCAST
							MPI_Send(&(matrPart[curCol][i]),1,MPI_DOUBLE,destinationN,COL_TAG,cartComm);
						#endif
					#endif
					for(int j=curCol+1;j<partSize;++j) {
						//If this is in the inverse matrix
						if(inInverseMatrix[j]) { 
							matrPart[j][i]=matrPart[j][i]-matrPart[j][k]*matrPart[curCol][i];
						}
					}
					#if !defined(__SingleProc__) && defined(USE_BROADCAST_ASYNC)
						//Wait for the buffer to be read if sending asynchronously, to avoid race conditions
						MPI_Wait(&bcastRequest, MPI_STATUS_IGNORE);
					#endif
					matrPart[curCol][i]=0; //No need to do real subtraction.
				}
			}
			//Else, if this is not the owner of kappa
			else {
				//for all rows above k row, receive what we need to multiply the subtraction with
				//and do that if necessary
				double recD;
				for(int i=k-1;i>=0;--i) {
					#ifdef USE_BROADCAST
						MPI_Bcast(&recD,1,MPI_DOUBLE,kapOwner,cartComm);
					#elif defined(USE_BROADCAST_ASYNC)
						MPI_Ibcast(&recD,1,MPI_DOUBLE,kapOwner,cartComm, &bcastRequest);
						MPI_Wait(&bcastRequest, MPI_STATUS_IGNORE);
					#else //if not defined USE_BROADCAST
						MPI_Recv(&recD,1,MPI_DOUBLE,destinationP,MPI_ANY_TAG,cartComm,MPI_STATUS_IGNORE);
						if(destinationN!=kapOwner) { //Pass it along to the next thread
							MPI_Send(&recD,1,MPI_DOUBLE,destinationN,COL_TAG,cartComm);
						}
					#endif
					//For all collumns
					for(int j=0;j<partSize;++j) {
						//If this is in the inverse matrix
						if(inInverseMatrix[j]) {
							matrPart[j][i]=matrPart[j][i]-recD*matrPart[j][k];
						}
					}
				}
			}
			//Finally, decrement kappa
			--k;
			
			#ifdef __DEBUG__MODE_EX1__
				printMatrix(cartComm,tid,nthreads,n,&matrPart,partSize);
				if(tid==0) {
					//cout<<"------------------------------------------"<<endl;
				}
			#endif
		}
	#else //If this is not computing the inverse matrix but doing elimination
		while(k>0) {
			//Used for optimisation
			int endCol;
			bool isValid=colValidForThr[n];
			if(isValid) {
				endCol=glColToPartCol[n];
			}
			
			kapOwner=thrForCol[k];
			//If this is the owner of kappa
			if(tid==kapOwner) {
				//Get the collumn you need
				int curCol=glColToPartCol[k];
				for(int i=k-1;i>=0;--i) {
					#ifndef __SingleProc__
						#ifdef USE_BROADCAST
							MPI_Bcast(&(matrPart[curCol][i]),1,MPI_DOUBLE,kapOwner,cartComm);
						#elif defined(USE_BROADCAST_ASYNC)
							MPI_Ibcast(&(matrPart[curCol][i]),1,MPI_DOUBLE,kapOwner,cartComm, &bcastRequest);
						#else //if not defined USE_BROADCAST
							MPI_Send(&(matrPart[curCol][i]),1,MPI_DOUBLE,destinationN,COL_TAG,cartComm);
						#endif
					#endif
					if(isValid) {
						matrPart[endCol][i]=matrPart[endCol][i]-matrPart[endCol][k]*matrPart[curCol][i];
					}
					#if !defined(__SingleProc__) && defined(USE_BROADCAST_ASYNC)
						//Wait for the buffer to be read if sending asynchronously, to avoid race conditions
						MPI_Wait(&bcastRequest, MPI_STATUS_IGNORE);
					#endif
					matrPart[curCol][i]=0; //No need to do real subtraction.
				}
			}
			//Else, if this is not the owner of kappa
			else {
				//for all rows above k row, receive what we need to multiply the subtraction with
				//and do that if necessary
				double recD;
				for(int i=k-1;i>=0;--i) {
					#ifdef USE_BROADCAST
						MPI_Bcast(&recD,1,MPI_DOUBLE,kapOwner,cartComm);
					#elif defined(USE_BROADCAST_ASYNC)
						MPI_Ibcast(&recD,1,MPI_DOUBLE,kapOwner,cartComm, &bcastRequest);
						MPI_Wait(&bcastRequest, MPI_STATUS_IGNORE);
					#else //if not defined USE_BROADCAST
						MPI_Recv(&recD,1,MPI_DOUBLE,destinationP,MPI_ANY_TAG,cartComm,MPI_STATUS_IGNORE);
						if(destinationN!=kapOwner) {
							MPI_Send(&recD,1,MPI_DOUBLE,destinationN,COL_TAG,cartComm);
						}
					#endif
					if(isValid) {
						matrPart[endCol][i]=matrPart[endCol][i]-recD*matrPart[endCol][k];
					}
				}
			}
			//Finally, decrement kappa
			--k;
			
			#ifdef __DEBUG__MODE_EX1__
				printMatrix(cartComm,tid,nthreads,n,&matrPart,partSize);
				if(tid==0) {
					//cout<<"------------------------------------------"<<endl;
				}
			#endif
		}
	#endif
	
	if(tid==0) {
		//Get the end time
		time_end = MPI_Wtime();
	}
	
	#ifdef __DEBUG__MODE_EX1__
		//Print the solution
		printMatrix(cartComm,tid,nthreads,n,&matrPart,partSize);
	#endif
	
	if(tid==0) {
		#ifdef __DEBUG__MODE_EX1__
			//Write some info
			//cout<<"Solved in "<<(time_end-time_initial)<<" seconds in "<<nthreads<<" threads using configuration ";
			#ifdef __Question1__
			//cout<<"1:\"serial\""<<endl;
			#endif
			#ifdef __Question2__
			//cout<<"2:\"shuffle\""<<endl;
			#endif
		#else
			/*if(isnan(matrPart[0][0])) {
				//cout<<"INVALID MATRIX: NAN"<<endl;
			}
			else {*/
				printf("%.20f",(time_end-time_initial));
				////cout<<fixed<<setprecision(20)<<(time_end-time_initial)<<endl;
			//}
		#endif
	}
	
	//Delete data
	for(int j=0;j<partSize;++j) {
		free(matrPart[j]);
	}
	free(matrPart);
	
	//Delete cache
	free(thrForCol);
	free(colValidForThr);
	free(glColToPartCol);
	free(ptColToGlobCol);
	#ifdef __QuestionExtra__ 
		free(inInverseMatrix);
	#endif
	
	//Finalize the MPI environment
	if(MPI_Finalize()!=MPI_SUCCESS) {
		////cerr<<tid<<" ERROR"<<endl;
	}
	
	//Exit
	return EXIT_SUCCESS;
}
コード例 #4
0
ファイル: nonblocking3.c プロジェクト: R7R8/simgrid
/* Starts a "random" operation on "comm" corresponding to "rndnum" and returns
 * in (*req) a request handle corresonding to that operation.  This call should
 * be considered collective over comm (with a consistent value for "rndnum"),
 * even though the operation may only be a point-to-point request. */
static void start_random_nonblocking(MPI_Comm comm, unsigned int rndnum, MPI_Request *req, struct laundry *l)
{
    int i, j;
    int rank, size;
    int *buf = NULL;
    int *recvbuf = NULL;
    int *sendcounts = NULL;
    int *recvcounts = NULL;
    int *sdispls = NULL;
    int *rdispls = NULL;
    int *sendtypes = NULL;
    int *recvtypes = NULL;
    signed char *buf_alias = NULL;

    MPI_Comm_rank(comm, &rank);
    MPI_Comm_size(comm, &size);

    *req = MPI_REQUEST_NULL;

    l->case_num = -1;
    l->comm = comm;

    l->buf        = buf        = malloc(COUNT*size*sizeof(int));
    l->recvbuf    = recvbuf    = malloc(COUNT*size*sizeof(int));
    l->sendcounts = sendcounts = malloc(size*sizeof(int));
    l->recvcounts = recvcounts = malloc(size*sizeof(int));
    l->sdispls    = sdispls    = malloc(size*sizeof(int));
    l->rdispls    = rdispls    = malloc(size*sizeof(int));
    l->sendtypes  = sendtypes  = malloc(size*sizeof(MPI_Datatype));
    l->recvtypes  = recvtypes  = malloc(size*sizeof(MPI_Datatype));

#define NUM_CASES (21)
    l->case_num = rand_range(rndnum, 0, NUM_CASES);
    switch (l->case_num) {
        case 0: /* MPI_Ibcast */
            for (i = 0; i < COUNT; ++i) {
                if (rank == 0) {
                    buf[i] = i;
                }
                else {
                    buf[i] = 0xdeadbeef;
                }
            }
            MPI_Ibcast(buf, COUNT, MPI_INT, 0, comm, req);
            break;

        case 1: /* MPI_Ibcast (again, but designed to stress scatter/allgather impls) */
            /* FIXME fiddle with PRIME and buffer allocation s.t. PRIME is much larger (1021?) */
            buf_alias = (signed char *)buf;
            my_assert(COUNT*size*sizeof(int) > PRIME); /* sanity */
            for (i = 0; i < PRIME; ++i) {
                if (rank == 0)
                    buf_alias[i] = i;
                else
                    buf_alias[i] = 0xdb;
            }
            for (i = PRIME; i < COUNT * size * sizeof(int); ++i) {
                buf_alias[i] = 0xbf;
            }
            MPI_Ibcast(buf_alias, PRIME, MPI_SIGNED_CHAR, 0, comm, req);
            break;

        case 2: /* MPI_Ibarrier */
            MPI_Ibarrier(comm, req);
            break;

        case 3: /* MPI_Ireduce */
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, 0, comm, req);
            break;

        case 4: /* same again, use a user op and free it before the wait */
            {
                MPI_Op op = MPI_OP_NULL;
                MPI_Op_create(sum_fn, /*commute=*/1, &op);
                for (i = 0; i < COUNT; ++i) {
                    buf[i] = rank + i;
                    recvbuf[i] = 0xdeadbeef;
                }
                MPI_Ireduce(buf, recvbuf, COUNT, MPI_INT, op, 0, comm, req);
                MPI_Op_free(&op);
            }
            break;

        case 5: /* MPI_Iallreduce */
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Iallreduce(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
            break;

        case 6: /* MPI_Ialltoallv (a weak test, neither irregular nor sparse) */
            for (i = 0; i < size; ++i) {
                sendcounts[i] = COUNT;
                recvcounts[i] = COUNT;
                sdispls[i] = COUNT * i;
                rdispls[i] = COUNT * i;
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + (i * j);
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ialltoallv(buf, sendcounts, sdispls, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req);
            break;

        case 7: /* MPI_Igather */
            for (i = 0; i < size*COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req);
            break;

        case 8: /* same test again, just use a dup'ed datatype and free it before the wait */
            {
                MPI_Datatype type = MPI_DATATYPE_NULL;
                MPI_Type_dup(MPI_INT, &type);
                for (i = 0; i < size*COUNT; ++i) {
                    buf[i] = rank + i;
                    recvbuf[i] = 0xdeadbeef;
                }
                MPI_Igather(buf, COUNT, MPI_INT, recvbuf, COUNT, type, 0, comm, req);
                MPI_Type_free(&type); /* should cause implementations that don't refcount
                                         correctly to blow up or hang in the wait */
            }
            break;

        case 9: /* MPI_Iscatter */
            for (i = 0; i < size; ++i) {
                for (j = 0; j < COUNT; ++j) {
                    if (rank == 0)
                        buf[i*COUNT+j] = i + j;
                    else
                        buf[i*COUNT+j] = 0xdeadbeef;
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Iscatter(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req);
            break;

        case 10: /* MPI_Iscatterv */
            for (i = 0; i < size; ++i) {
                /* weak test, just test the regular case where all counts are equal */
                sendcounts[i] = COUNT;
                sdispls[i] = i * COUNT;
                for (j = 0; j < COUNT; ++j) {
                    if (rank == 0)
                        buf[i*COUNT+j] = i + j;
                    else
                        buf[i*COUNT+j] = 0xdeadbeef;
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Iscatterv(buf, sendcounts, sdispls, MPI_INT, recvbuf, COUNT, MPI_INT, 0, comm, req);
            break;

        case 11: /* MPI_Ireduce_scatter */
            for (i = 0; i < size; ++i) {
                recvcounts[i] = COUNT;
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + i;
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ireduce_scatter(buf, recvbuf, recvcounts, MPI_INT, MPI_SUM, comm, req);
            break;

        case 12: /* MPI_Ireduce_scatter_block */
            for (i = 0; i < size; ++i) {
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + i;
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ireduce_scatter_block(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
            break;

        case 13: /* MPI_Igatherv */
            for (i = 0; i < size*COUNT; ++i) {
                buf[i] = 0xdeadbeef;
                recvbuf[i] = 0xdeadbeef;
            }
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
            }
            for (i = 0; i < size; ++i) {
                recvcounts[i] = COUNT;
                rdispls[i] = i * COUNT;
            }
            MPI_Igatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, 0, comm, req);
            break;

        case 14: /* MPI_Ialltoall */
            for (i = 0; i < size; ++i) {
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + (i * j);
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ialltoall(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req);
            break;

        case 15: /* MPI_Iallgather */
            for (i = 0; i < size*COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Iallgather(buf, COUNT, MPI_INT, recvbuf, COUNT, MPI_INT, comm, req);
            break;

        case 16: /* MPI_Iallgatherv */
            for (i = 0; i < size; ++i) {
                for (j = 0; j < COUNT; ++j) {
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
                recvcounts[i] = COUNT;
                rdispls[i] = i * COUNT;
            }
            for (i = 0; i < COUNT; ++i)
                buf[i] = rank + i;
            MPI_Iallgatherv(buf, COUNT, MPI_INT, recvbuf, recvcounts, rdispls, MPI_INT, comm, req);
            break;

        case 17: /* MPI_Iscan */
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Iscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
            break;

        case 18: /* MPI_Iexscan */
            for (i = 0; i < COUNT; ++i) {
                buf[i] = rank + i;
                recvbuf[i] = 0xdeadbeef;
            }
            MPI_Iexscan(buf, recvbuf, COUNT, MPI_INT, MPI_SUM, comm, req);
            break;

        case 19: /* MPI_Ialltoallw (a weak test, neither irregular nor sparse) */
            for (i = 0; i < size; ++i) {
                sendcounts[i] = COUNT;
                recvcounts[i] = COUNT;
                sdispls[i] = COUNT * i * sizeof(int);
                rdispls[i] = COUNT * i * sizeof(int);
                sendtypes[i] = MPI_INT;
                recvtypes[i] = MPI_INT;
                for (j = 0; j < COUNT; ++j) {
                    buf[i*COUNT+j] = rank + (i * j);
                    recvbuf[i*COUNT+j] = 0xdeadbeef;
                }
            }
            MPI_Ialltoallw(buf, sendcounts, sdispls, sendtypes, recvbuf, recvcounts, rdispls, recvtypes, comm, req);
            break;

        case 20: /* basic pt2pt MPI_Isend/MPI_Irecv pairing */
            /* even ranks send to odd ranks, but only if we have a full pair */
            if ((rank % 2 != 0) || (rank != size-1)) {
                for (j = 0; j < COUNT; ++j) {
                    buf[j] = j;
                    recvbuf[j] = 0xdeadbeef;
                }
                if (rank % 2 == 0)
                    MPI_Isend(buf, COUNT, MPI_INT, rank+1, 5, comm, req);
                else
                    MPI_Irecv(recvbuf, COUNT, MPI_INT, rank-1, 5, comm, req);
            }
            break;

        default:
            fprintf(stderr, "unexpected value for l->case_num=%d)\n", (l->case_num));
            MPI_Abort(comm, 1);
            exit(1);
            break;
    }
}
コード例 #5
0
ファイル: IMB_bcast.c プロジェクト: Cai900205/test
void IMB_ibcast_pure(struct comm_info* c_info,
                     int size,
                     struct iter_schedule* ITERATIONS,
                     MODES RUN_MODE,
                     double* time)
/*


                      MPI-NBC benchmark kernel
                      Benchmarks MPI_Ibcast



Input variables:

-c_info               (type struct comm_info*)
                      Collection of all base data for MPI;
                      see [1] for more information


-size                 (type int)
                      Basic message size in bytes

-ITERATIONS           (type struct iter_schedule *)
                      Repetition scheduling

-RUN_MODE             (type MODES)
                      (only MPI-2 case: see [1])


Output variables:

-time                 (type double*)
                      Timing result per sample


*/
{
    int         i = 0,
                root = 0;
    Type_Size   s_size;
    int         s_num = 0;
    void*       bc_buf = NULL;
    MPI_Request request;
    MPI_Status  status;
    double      t_pure = 0.;

#ifdef CHECK
    defect = 0.;
#endif
    ierr = 0;

    /* GET SIZE OF DATA TYPE */
    MPI_Type_size(c_info->s_data_type, &s_size);
    if (s_size != 0) {
        s_num = size / s_size;
    }

    if(c_info->rank != -1) {
        root = 0;
        for (i = 0; i < N_BARR; i++) {
            MPI_Barrier(c_info->communicator);
        }

        t_pure = MPI_Wtime();
        for(i = 0; i < ITERATIONS->n_sample; i++)
        {
            bc_buf = (root == c_info->rank)
                   ? c_info->s_buffer
                   : c_info->r_buffer;

            ierr = MPI_Ibcast((char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                              s_num,
                              c_info->s_data_type,
                              root,
                              c_info->communicator,
                              &request);
            MPI_ERRHAND(ierr);
            MPI_Wait(&request, &status);
            CHK_DIFF("Ibcast_pure", c_info,
                     (char*)bc_buf + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs,
                     0, size, size, 1, put, 0, ITERATIONS->n_sample, i, root, &defect);
            root = (++root) % c_info->num_procs;
        }
        t_pure = (MPI_Wtime() - t_pure) / ITERATIONS->n_sample;
    }

    time[0] = t_pure;
}