Exemplo n.º 1
0
int main(int argc, char *argv[])
{
	//MPI initialize
	MPI_Init (&argc, &argv);	
	int rank, size, master = 0;
	MPI_Comm_rank (MPI_COMM_WORLD, &rank);	
	MPI_Comm_size (MPI_COMM_WORLD, &size);
	MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);


	CheckPreprocessorMacros();

	/* -------------------------------------------------------------------- */
	/* .. Local variables. */
	/* -------------------------------------------------------------------- */
	timer_t start_t, end_t;
	const integer_t nrhs = 1;
	Error_t error;

	if(rank == master){

		fprintf(stderr, "\nShared Memory Spike Solver.\n");


		/* -------------------------------------------------------------------- */
		/* .. Load and initalize the system Ax=f. */
		/* -------------------------------------------------------------------- */
		matrix_t* A = matrix_LoadCSR("../Tests/spike/penta_15.bin");
		//matrix_t* A = matrix_LoadCSR("../Tests/pentadiagonal/large.bin");
		//matrix_t* A = matrix_LoadCSR("../Tests/dummy/tridiagonal.bin");
		matrix_PrintAsDense( A, "Original coeffient matrix" );

		// Compute matrix bandwidth
		block_t*  x = block_CreateEmptyBlock( A->n, nrhs, 0, 0, _RHS_BLOCK_, _WHOLE_SECTION_ );
		block_t*  f = block_CreateEmptyBlock( A->n, nrhs, 0, 0, _RHS_BLOCK_, _WHOLE_SECTION_ );

		block_InitializeToValue( x, __zero  ); // solution of the system
		block_InitializeToValue( f, __punit ); // rhs of the system

		start_t = GetReferenceTime();

		/* compute an optimal solving strategy */
		sm_schedule_t* S = spike_solve_analysis( A, nrhs, size-1 );

		/* create the reduced sytem in advanced, based on the solving strategy */
		matrix_t* R  = matrix_CreateEmptyReducedSystem ( S->p, S->n, S->ku, S->kl);
		block_t*  xr = block_CreateReducedRHS( S->p, S->ku, S->kl, nrhs );

		/* -------------------------------------------------------------------- */
		/* .. Factorization Phase. */
		/* -------------------------------------------------------------------- */
		for(integer_t p=0; p < S->p; p++)
		{
			sendSchedulePacked(S, p+1);
			const integer_t r0 = S->n[p];
			const integer_t rf = S->n[p+1];

			matrix_t* Aij = matrix_ExtractMatrix(A, r0, rf, r0, rf);
			sendMatrix(Aij, p+1);

			block_t*  fi  = block_ExtractBlock( f, r0, rf );
			block_t*  yi  = block_CreateEmptyBlock( rf - r0, nrhs, 0, 0, _RHS_BLOCK_, _WHOLE_SECTION_ );
			block_SetBandwidthValues( fi, A->ku, A->kl );
			block_SetBandwidthValues( yi, A->ku, A->kl );

			sendBlock(fi, p+1);
			sendBlock(yi, p+1);

			/* Add the tips of the yi block to the reduced RHS */
			block_t* yit = recvBlock(p+1);
			block_t* yib = recvBlock(p+1);
			block_AddTipTOReducedRHS( p, S->ku, S->kl, xr, yit );
			block_AddTipTOReducedRHS( p, S->ku, S->kl, xr, yib );

			/* clean up */
			block_Deallocate (fi );
			block_Deallocate (yi );
			block_Deallocate (yit);
			block_Deallocate (yib);
			
			if(p == 0){
				block_t* Vi = block_CreateEmptyBlock ( rf - r0, A->ku, A->ku, A->kl, _V_BLOCK_, _WHOLE_SECTION_ );
				block_t* Bi = matrix_ExtractBlock    ( A, r0, rf, rf, rf + A->ku, _V_BLOCK_ );
				
				sendBlock(Vi, p+1);
				sendBlock(Bi, p+1);
							
				block_t* Vit = recvBlock(p+1);
				block_t* Vib = recvBlock(p+1);

				matrix_AddTipToReducedMatrix( S->p, p, S->n, S->ku, S->kl, R, Vit );
				matrix_AddTipToReducedMatrix( S->p, p, S->n, S->ku, S->kl, R, Vib );

				block_Deallocate( Bi );
				block_Deallocate( Vi );
				block_Deallocate( Vit);
				block_Deallocate( Vib);
			}
			else if (p == ( S->p -1)){
				block_t* Wi = block_CreateEmptyBlock( rf - r0, A->kl, A->ku, A->kl, _W_BLOCK_, _WHOLE_SECTION_ );
				block_t* Ci = matrix_ExtractBlock(A, r0, rf, r0 - A->kl, r0, _W_BLOCK_ );
				sendBlock(Wi, p+1);
				sendBlock(Ci, p+1);

				block_t* Wit = recvBlock(p+1);
				block_t* Wib = recvBlock(p+1);
				matrix_AddTipToReducedMatrix( S->p, p, S->n, S->ku, S->kl, R, Wit );
				matrix_AddTipToReducedMatrix( S->p, p, S->n, S->ku, S->kl, R, Wib );

				block_Deallocate( Ci );
				block_Deallocate( Wi );
				block_Deallocate( Wit);
				block_Deallocate( Wib);
			}
			else{
				block_t* Vi = block_CreateEmptyBlock ( rf - r0, A->ku, A->ku, A->kl, _V_BLOCK_, _WHOLE_SECTION_ );
				block_t* Bi = matrix_ExtractBlock    ( A, r0, rf, rf, rf + A->ku, _V_BLOCK_ );
				sendBlock(Vi, p+1);
				sendBlock(Bi, p+1);
			
				block_t* Vit = recvBlock(p+1);
				block_t* Vib = recvBlock(p+1);			
				matrix_AddTipToReducedMatrix( S->p, p, S->n, S->ku, S->kl, R, Vit );
				matrix_AddTipToReducedMatrix( S->p, p, S->n, S->ku, S->kl, R, Vib );

				block_Deallocate( Bi );
				block_Deallocate( Vi );
				block_Deallocate( Vit);
				block_Deallocate( Vib);
				
				block_t* Wi = block_CreateEmptyBlock( rf - r0, A->kl, A->ku, A->kl, _W_BLOCK_, _WHOLE_SECTION_ );
				block_t* Ci = matrix_ExtractBlock(A, r0, rf, r0 - A->kl, r0, _W_BLOCK_ );
				sendBlock(Wi, p+1);
				sendBlock(Ci, p+1);

				block_t* Wit = recvBlock(p+1);
				block_t* Wib = recvBlock(p+1);
				matrix_AddTipToReducedMatrix( S->p, p, S->n, S->ku, S->kl, R, Wit );
				matrix_AddTipToReducedMatrix( S->p, p, S->n, S->ku, S->kl, R, Wib );

				block_Deallocate( Ci );
				block_Deallocate( Wi );
				block_Deallocate( Wit);
				block_Deallocate( Wib);
			}
			matrix_Deallocate( Aij);
		}
		
		MPI_Barrier(MPI_COMM_WORLD);
		/* -------------------------------------------------------------------- */
		/* .. Solution of the reduced system.                                   */
		/* -------------------------------------------------------------------- */

		block_t* yr = block_CreateEmptyBlock( xr->n, xr->m, 0, 0, _RHS_BLOCK_, _WHOLE_SECTION_ );
		fprintf(stderr, "\nSolving reduced linear system\n");
		system_solve ( R->colind, R->rowptr, R->aij, yr->aij, xr->aij, R->n, xr->m);
		block_Print(yr, "Solucion del sistema reducido");


		/* Free some memory, yr and R are not needed anymore */
		block_Deallocate ( xr );
		matrix_Deallocate( R  );

		/* -------------------------------------------------------------------- */
		/* .. Backward substitution phase. */
		/* -------------------------------------------------------------------- */
		for(integer_t p=0; p < S->p; p++)
		{
			fprintf(stderr, "Processing backward solution for the %d-th block\n", p);

			/* compute the limits of the blocks */
			const integer_t obs = S->n[p];        		/* original system starting row */
			const integer_t obe = S->n[p+1];	  		/* original system ending row   */
			const integer_t rbs = S->r[p];		  		/* reduceed system starting row */
			const integer_t rbe = S->r[p+1];			/* reduced system ending row    */
			const integer_t ni  = S->n[p+1] - S->n[p]; 	/* number of rows in the block  */

			/* allocate pardiso configuration parameters */
			MKL_INT pardiso_conf[64];

			/* extract xi sub-block */
			block_t*  xi  = block_ExtractBlock(x, obs, obe );
			sendBlock(xi, p+1);

			/* extract fi sub-block */
			block_t*  fi  = block_ExtractBlock(f, obs, obe );
			sendBlock(fi, p+1);
			printf("Lets go %d\n", p);
			
			if ( p == 0 ){

				block_t* xt_next = block_ExtractBlock ( yr, rbe, rbe + S->ku[p+1]);
				sendBlock(xt_next, p+1);
				block_Deallocate (xt_next);
			}

			else if ( p == ( S->p -1)){

				block_t* xb_prev = block_ExtractBlock ( yr, rbs - S->kl[p], rbs );
				sendBlock(xb_prev, p+1);
				block_Deallocate (xb_prev);
			}

			else{
				block_t* xt_next = block_ExtractBlock ( yr, rbe, rbe + S->ku[p+1]);
				sendBlock(xt_next, p+1);
				block_Deallocate (xt_next);
				
				block_t* xb_prev = block_ExtractBlock ( yr, rbs - S->kl[p], rbs );
				sendBlock(xb_prev, p+1);
				block_Deallocate (xb_prev);
			}
			xi = recvBlock(p+1);
			block_AddBlockToRHS(x, xi, obs, obe);
			block_Deallocate    ( xi );
			block_Deallocate 	( fi );
		}
		schedule_Destroy( S );
		block_Deallocate( yr);
	
		end_t = GetReferenceTime();

		fprintf(stderr, "\nSPIKE solver took %.6lf seconds", end_t - start_t);
		block_Print( x, "Solution of the linear system");

		ComputeResidualOfLinearSystem( A->colind, A->rowptr, A->aij, x->aij, f->aij, A->n, nrhs);
	
		fprintf(stderr, "\nPARDISO REFERENCE SOLUTION...\n");
		SolveOriginalSystem( A, x, f);

		/* -------------------------------------------------------------------- */
		/* .. Clean up. */
		/* -------------------------------------------------------------------- */
		matrix_Deallocate ( A );
		block_Deallocate  ( x );
		block_Deallocate  ( f );



		/* -------------------------------------------------------------------- */
		/* .. Load and initalize the system Ax=f. */
		/* -------------------------------------------------------------------- */
		fprintf(stderr, "\nProgram finished\n");

		debug("Number of malloc() calls %d, number of free() calls %d\n", cnt_alloc, cnt_free );
	}

	else{ //WORKERS

		/* -------------------------------------------------------------------- */
		/* .. Factorization Phase. */
		/* -------------------------------------------------------------------- */
		//fprintf(stderr, "Solving %d-th block\n", p);

		sm_schedule_t* S = recvSchedulePacked(master);
		/* compute the limits of the blocks */
		integer_t p = rank -1;
		const integer_t obs = S->n[p];        		/* original system starting row */
		const integer_t obe = S->n[p+1];	  		/* original system ending row   */
		const integer_t rbs = S->r[p];		  		/* reduceed system starting row */
		const integer_t rbe = S->r[p+1];			/* reduced system ending row    */
		const integer_t ni  = S->n[p+1] - S->n[p]; 	/* number of rows in the block  */

		MKL_INT pardiso_conf[64];

		/* allocate pardiso configuration parameters */
		DirectSolverHander_t *handler = directSolver_CreateHandler();

		directSolver_Configure( handler );

		/* factorize matrix */
		matrix_t* Aij = recvMatrix(master);

		directSolver_Factorize( handler,
			Aij->n,
			Aij->nnz,
			Aij->colind,
			Aij->rowptr,
			Aij->aij, Aij->n);

		/* -------------------------------------------------------------------- */
		/* .. Solve Ai * yi = fi                                                */
		/* Extracts the fi portion from f, creates a yi block used as container */
		/* for the solution of the system. Then solves the system.              */
		/* -------------------------------------------------------------------- */
		
		/* solve the system for the RHS value */
		block_t*  fi = recvBlock(master);
		block_t*  yi = recvBlock(master);

		/* solve Ai * yi = fi */
		directSolver_SolveForRHS( handler, nrhs, yi->aij, fi->aij );

		/* Extract the tips of the yi block */
		block_t* yit = block_ExtractTip( yi, _TOP_SECTION_   , _COLMAJOR_ );
		block_t* yib = block_ExtractTip( yi, _BOTTOM_SECTION_, _COLMAJOR_ );
		sendBlock(yit, master);
		sendBlock(yib, master);

		/* clean up */
		block_Deallocate (fi );
		block_Deallocate (yi );
		block_Deallocate (yit);
		block_Deallocate (yib);

		if ( rank == 1 ){
				
			block_t* Vi = recvBlock(master);
			block_t* Bi = recvBlock(master);

			/* solve Ai * Vi = Bi */
			directSolver_SolveForRHS( handler, Vi->m, Vi->aij, Bi->aij );

			block_t* Vit = block_ExtractTip( Vi, _TOP_SECTION_, _ROWMAJOR_ );
			block_t* Vib = block_ExtractTip( Vi, _BOTTOM_SECTION_, _ROWMAJOR_ );
			sendBlock(Vit, master);
			sendBlock(Vib, master);

			block_t* Bib = block_ExtractTip( Bi, _BOTTOM_SECTION_, _COLMAJOR_ );

			//block_Deallocate( Vi );
			block_Deallocate( Bi );
			block_Deallocate( Vi);
			block_Deallocate( Vit);
			block_Deallocate( Vib);

			//Here Master Resolve Reduced System
			MPI_Barrier(MPI_COMM_WORLD);
			
			block_t* xi = recvBlock(master);
			block_t* fi = recvBlock(master);
			block_t* xt_next = recvBlock(master);
			
			/* Backward substitution, implicit scheme: xi = -1.0 * Bi * xit  + fi */
			cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
				Bib->n,    						/* m - number of rows of A    */
				xt_next->m, 					/* n - number of columns of B */
				Bib->m,    						/* k - number of columns of A */
				__nunit,						/* alpha                      */
				Bib->aij, 						/* A block                    */
				Bib->n,    						/* lda - first dimension of A */
				xt_next->aij, 					/* B block                    */
				xt_next->n,    					/* ldb - first dimension of B */
				__punit,						/* beta                       */
				&fi->aij[ni - S->ku[p]], 		/* C block                    */
				ni ); 					 		/* ldc - first dimension of C */
			
			/* solve Ai * xi = fi */
			directSolver_SolveForRHS( handler, xi->m, xi->aij, fi->aij );

			sendBlock(xi, master);

			block_Deallocate ( Bib );
			block_Deallocate ( xt_next);
			block_Deallocate ( xi );
			block_Deallocate ( fi );
		}

		else if ( rank == size -1){
			
			block_t* Wi = recvBlock(master);
			block_t* Ci = recvBlock(master);

			/* solve Ai * Wi = Ci */
			directSolver_SolveForRHS( handler, Wi->m, Wi->aij, Ci->aij );


			block_t* Wit = block_ExtractTip( Wi, _TOP_SECTION_, _ROWMAJOR_ );
			block_t* Wib = block_ExtractTip( Wi, _BOTTOM_SECTION_, _ROWMAJOR_ );
			sendBlock(Wit, master);
			sendBlock(Wib, master);
			
			block_t* Cit = block_ExtractTip(Ci, _TOP_SECTION_, _COLMAJOR_ );			
		
			block_Deallocate( Ci );
			block_Deallocate( Wi );
			block_Deallocate( Wit);
			block_Deallocate( Wib);

			//Here Master Resolve Reduced System
			MPI_Barrier(MPI_COMM_WORLD);

			block_t* xi = recvBlock(master);
			block_t* fi = recvBlock(master);
			block_t* xb_prev = recvBlock(master);
			/* Backward substitution, implicit scheme: xi = -1.0 * Bi * xit  + fi */ 
			cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
				Cit->n,    						/* m - number of rows of A    */
				xb_prev->m, 					/* n - number of columns of B */
				Cit->m,    						/* k - number of columns of A */
				__nunit,						/* alpha                      */
				Cit->aij, 						/* A block                    */
				Cit->n,    						/* lda - first dimension of A */
				xb_prev->aij, 					/* B block                    */
				xb_prev->n,    					/* ldb - first dimension of B */
				__punit,						/* beta                       */
				fi->aij, 			 		    /* C block                    */
				ni );		 					/* ldc - first dimension of C */

			/* solve Ai * xi = fi */
			directSolver_SolveForRHS( handler, xi->m, xi->aij, fi->aij );

			sendBlock(xi, master);

			block_Deallocate ( Cit );
			block_Deallocate ( xb_prev);
			block_Deallocate ( xi );
			block_Deallocate ( fi );
		}

		else{
			block_t* Vi = recvBlock(master);
			block_t* Bi = recvBlock(master);

			/* solve Ai * Vi = Bi */
			directSolver_SolveForRHS( handler, Vi->m, Vi->aij, Bi->aij );

			block_t* Vit = block_ExtractTip( Vi, _TOP_SECTION_, _ROWMAJOR_ );
			block_t* Vib = block_ExtractTip( Vi, _BOTTOM_SECTION_, _ROWMAJOR_ );
			sendBlock(Vit, master);
			sendBlock(Vib, master);

			block_t* Bib = block_ExtractTip( Bi, _BOTTOM_SECTION_, _COLMAJOR_ );

			block_Deallocate( Bi );
			block_Deallocate( Vi );
			block_Deallocate( Vit);
			block_Deallocate( Vib);

			block_t* Wi = recvBlock(master);
			block_t* Ci = recvBlock(master);

			/* solve Ai * Wi = Ci */
			directSolver_SolveForRHS( handler, Wi->m, Wi->aij, Ci->aij );

			block_t* Wit = block_ExtractTip( Wi, _TOP_SECTION_, _ROWMAJOR_ );
			block_t* Wib = block_ExtractTip( Wi, _BOTTOM_SECTION_, _ROWMAJOR_ );
			sendBlock(Wit, master);
			sendBlock(Wib, master);
			
			block_t* Cit = block_ExtractTip(Ci, _TOP_SECTION_, _COLMAJOR_ );
			
			block_Deallocate( Ci );
			block_Deallocate( Wi );
			block_Deallocate( Wit);
			block_Deallocate( Wib);

			//Here Master Resolve Reduced System
			MPI_Barrier(MPI_COMM_WORLD);
			
			block_t* xi = recvBlock(master);
			block_t* fi = recvBlock(master);
			block_t* xt_next = recvBlock(master);
			
			/* Backward substitution, implicit scheme: xi = -1.0 * Bi * xit  + fi */
			cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
				Bib->n,    						/* m - number of rows of A    */
				xt_next->m, 					/* n - number of columns of B */
				Bib->m,    						/* k - number of columns of A */
				__nunit,						/* alpha                      */
				Bib->aij, 						/* A block                    */
				Bib->n,    						/* lda - first dimension of A */
				xt_next->aij, 					/* B block                    */
				xt_next->n,    					/* ldb - first dimension of B */
				__punit,						/* beta                       */
				&fi->aij[ni - S->ku[p]], 		/* C block                    */
				ni ); 					 		/* ldc - first dimension of C */

			directSolver_ApplyFactorToRHS( Aij->colind, Aij->rowptr, Aij->aij, xi->aij, fi->aij, Aij->n, xi->m, &pardiso_conf );

			/* solve Ai * xi = fi */
			directSolver_SolveForRHS( handler, xi->m, xi->aij, fi->aij );

			block_Deallocate ( Bib );
			block_Deallocate ( xt_next); 

			block_t* xb_prev = recvBlock(master);
			/* Backward substitution, implicit scheme: xi = -1.0 * Bi * xit  + fi */ 
			cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
				Cit->n,    						/* m - number of rows of A    */
				xb_prev->m, 					/* n - number of columns of B */
				Cit->m,    						/* k - number of columns of A */
				__nunit,						/* alpha                      */
				Cit->aij, 						/* A block                    */
				Cit->n,    						/* lda - first dimension of A */
				xb_prev->aij, 					/* B block                    */
				xb_prev->n,    					/* ldb - first dimension of B */
				__punit,						/* beta                       */
				fi->aij, 			 		    /* C block                    */
				ni );		 					/* ldc - first dimension of C */

			/* solve Ai * xi = fi */
			directSolver_SolveForRHS( handler, xi->m, xi->aij, fi->aij );

			sendBlock(xi, master);

			block_Deallocate ( Cit );
			block_Deallocate ( xb_prev);
			block_Deallocate ( xi );
			block_Deallocate ( fi );
		
		}
		/* Show statistics and clean up solver internal memory */
		directSolver_ShowStatistics(handler);
		directSolver_Finalize(handler);

		schedule_Destroy  ( S );
		matrix_Deallocate(Aij);	
		
		debug("Number of malloc() calls %d, number of free() calls %d\n", cnt_alloc, cnt_free );
	}
	debug("Rank %d Finished!\n", rank);
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();
	return 0;
}
Exemplo n.º 2
0
int main(int argc, const char *argv[])
{
    struct Matrix *mat1, *mat2, *prod;
    int err;
    unsigned short port;
    int sd = INVALID_SOCKET;
/* #ifdef _WIN32
    WSADATA wsaData;
#endif
 */
    if (argc < 2)
    {
        fprintf(stderr, "Usage: %s <port>\n", argv[0]);
        return 1;
    }
    
    if (sscanf(argv[1], "%hu", &port) != 1)
    {
        fprintf(stderr, "Invalid port (%hu)\n", port);
        return 1;
    }

/* #ifdef _WIN32
    if ((err = WSAStartup(MAKEWORD(2, 2), &wsaData)))
    {
        fprintf(stderr, "Could not initialize winsock: %d\n", err);
        return 1;
    }
#endif */

    mat1 = mat2 = prod = NULL;

    /* The server main loop */
    for(;;)
    {
        freeMatrix(mat1);
        freeMatrix(mat2);
        freeMatrix(prod);
        mat1 = mat2 = prod = NULL;

        if (sd != INVALID_SOCKET)
        {
            /* This prevents potential connection-reset errors in clients on
               windows... */
            if (shutdown(sd, SD_SEND) == SOCKET_ERROR)
                fprintf(stderr, "error in TCP send shutdown: %d\n",
                    WSAGetLastError());

            if (closesocket(sd) == SOCKET_ERROR)
                fprintf(stderr, "error closing client socket: %d\n",
                    WSAGetLastError());

            sd = INVALID_SOCKET;
        }

        if ((err = getClientSocket(port, &sd)) != 0)
        {
            fprintf(stderr, "Error acquiring client connection: %d\n",
                err);
            /* in case we are repeatedly unable to get a client connection
               for some reason, don't spam. */
            Sleep(1000);
            continue;
        }

        mat1 = readMatrix(sd);
        if (!mat1)
            continue;

        mat2 = readMatrix(sd);
        if (!mat2)
            continue;

        prod = multiply(mat1, mat2);
        if (!prod)
            continue;

        /* printMatrix(prod); */
        sendMatrix(sd, prod);
    }
}