Exemplo n.º 1
0
void testLUSolver(){
 
 double **A, *x;
 int n;
 n=3;
 double b[n];
 
 A=allocateDoubleMatrix(n, n);
 

 A[0][0]=6.0;  A[0][1]=0.0;  A[0][2]=2.0;
 A[1][0]=24.0;  A[1][1]=1.0;  A[1][2]=8.0;
 A[2][0]=-12.0;  A[2][1]=1.0;  A[2][2]=-3.0;
 
 b[0]=4.0;
 b[1]=19.0;
 b[2]=-6.0;

 x=LUSolver(A,b,n);
 
 fprintf(stdout,"LU solver U:\n");
 printVector(x,n);

}
Exemplo n.º 2
0
int main (int argc, char *argv[])
{

	int	taskid;              // a task identifier 
	int	numtasks;              // number of tasks in partition
	MPI_Comm comm;
	int m; // size of the matrix
	int	local_m;                  // rows of matrix A sent to each worker 
	double *A, *b,*exact_x, *x;
	double *temp_1, *temp_2;
	double *local_A,  *local_v,*local_u;
	double *local_M; // M is the preconditioner in this example, which is the diagonal element of A;
	int i,j,k;


	MPI_Init(&argc,&argv);
	comm=MPI_COMM_WORLD;
	MPI_Comm_rank(comm,&taskid);
	MPI_Comm_size(comm,&numtasks);
	if(taskid==MASTER){ // initilization: A and b
		/* start modification 1: read A and b from mtx files in node 0 */
		////m=64; // size of the matrix
		////A=(double *)malloc(sizeof(double)*(m*m));
		////// !!! A is in col-major
		////for(j=0;j<m;j++)
		////	for (i=0;i<m;i++){
		////		if(i==j)
		////			*(A+j*m+i)=m*100.0;
		////		else
		////			*(A+j*m+i)=i+1.0;
		////	}
		////exact_x=(double *)malloc(sizeof(double)*m);
		////for (i=0;i<m;i++)
		////	*(exact_x+i)=1.0;
		////b=(double *)malloc(sizeof(double)*m);
		////// b=A*ones(n,1)
		////cblas_dgemv(CblasColMajor, CblasNoTrans, m, m, 1.0, A, m, exact_x, 1, 0.0, b, 1); 	
		/////* end modification 1*/
	  mtxBLU mtxA(argv[1], NPROC);
    mtxBLU mtxb(argv[2]);
    m=mtxA.m;
    A = (double*)malloc(sizeof(double)*(m*m));
    b = (double*)malloc(sizeof(double)*m);
    for(i=0; i<m*m; i++)
      A[i]=mtxA.arr[i];
    for(i=0; i<m; i++)
      b[i]=mtxb.arr[i];
  }

	MPI_Bcast(&m,1,MPI_INT, MASTER, comm); // send m from node MASTER to all other nodes.
	local_m=m/numtasks;
	local_A=(double *)malloc(sizeof(double)*(local_m*m));
	local_u=(double *)malloc(sizeof(double)*(local_m));
	local_v=(double *)malloc(sizeof(double)*m);
	//	partition A and send A_i to local_A on  node i
	MPI_Scatter(A,local_m*m, MPI_DOUBLE, local_A, local_m*m, MPI_DOUBLE, MASTER, comm);

	if(taskid==MASTER){
		free(A);
		//free(exact_x);
		// do not free b, it wil be used  for GMRES
	}


	/* start modification 2: generate preconditioner M
	 * In this example, TA choose the diagonal elements of A as the preconditioner.
	 * In HW3 part b, you should generate L and U here.
	 */
	////local_M=(double *)malloc(sizeof(double)*local_m);
	////for(i=0;i<local_m;i++)
	////	*(local_M+i)=*(local_A+taskid*local_m+i*m+i);
  int mm=int( m/numtasks);
  double *U,*L,*UU,*LL;
  U = new double[numtasks*mm*mm]; //block col domain
  L = new double[numtasks*mm*mm]; //block col domain
  UU= new double[numtasks*mm*mm]; //full col domain
  LL= new double[numtasks*mm*mm]; //full col domain
  std::fill(L, L+numtasks*mm*mm, 0);
  std::fill(U, U+numtasks*mm*mm, 0);
  for(i=0; i<mm; i++)
    L[taskid*mm*mm+i+i*mm]=1;  //make Lii = eyes(mm)
  
  for(i=0; i<numtasks*mm*mm; i++) //backup A into U
    U[i]=local_A[i];

  blockLU(L, U, mm, taskid);
  for(k=0; k<numtasks; k++)
    for(j=0; j<mm; j++)
      for(i=0; i<mm; i++){
        UU[k*mm+i+j*mm*numtasks]= U[k*mm*mm+j*mm+i];
        LL[k*mm+i+j*mm*numtasks]= L[k*mm*mm+j*mm+i];
      }

   
  /* end  modification 2*/


	/*---------------------------------------------------------------------------
	 * GMRES: Allocate storage for the ?par parameters and the solution vectors
	 *---------------------------------------------------------------------------*/
	MKL_INT RCI_request;
	int RCI_flag;
	double dvar;
	int flag=0;

	MKL_INT ipar[128]; //specifies the integer set of data for the RCI FGMRES computations
	double dpar[128]; // specifies the double precision set of data
	double *tmp; //used to supply the double precision temporary space for theRCI FGMRES computations, specifically:
	double *computed_solution;
	double *residual;
	double *f;
	MKL_INT itercount, ierr=0;;
	MKL_INT  ivar;
	double b_2norm;
	char cvar='N';
	MKL_INT incx=1;
	if (taskid==MASTER){
		ipar[14]=RESTART; // restart iteration number
		int n_tmp = (2 * ipar[14] + 1) * m + ipar[14] * (ipar[14] + 9) / 2 + 1;
		tmp=(double *)malloc(sizeof(double)*n_tmp);
		computed_solution=(double *)malloc(sizeof(double)*m);
		residual=(double *)malloc(sizeof(double)*m);
		f=(double *)malloc(sizeof(double)*m);

		ivar=m;
		/*---------------------------------------------------------------------------
		 * Initialize the initial guess
		 *---------------------------------------------------------------------------*/
		for (i = 0; i < m; i++)
		{
			computed_solution[i] = 0.5;
		}

		b_2norm = cblas_dnrm2 (ivar, b, incx);
	//	printf("b_2norm=%f\n",b_2norm);
		/*---------------------------------------------------------------------------
		 * Initialize the solver
		 *---------------------------------------------------------------------------*/
		dfgmres_init (&ivar, computed_solution,b, &RCI_request, ipar, dpar, tmp);
		RCI_flag=RCI_request;
	}
	MPI_Bcast(&RCI_flag,1,MPI_INT, MASTER, comm);
	if (RCI_flag != 0)
		goto FAILED;

	if(taskid==MASTER){
		/*---------------------------------------------------------------------------
		 * GMRES: Set the desired parameters:
		 *---------------------------------------------------------------------------*/
		ipar[14] = RESTART; // restart iteration number
		ipar[7] = 1; //do the stopping test
		ipar[10] = 1; // use preconditioner
		dpar[0] = TOL;
		/*---------------------------------------------------------------------------
		 * Check the correctness and consistency of the newly set parameters
		 *---------------------------------------------------------------------------*/
		dfgmres_check (&ivar, computed_solution, b, &RCI_request, ipar, dpar,
				tmp);
		RCI_flag=RCI_request;
	}

	MPI_Bcast(&RCI_flag,1,MPI_INT, MASTER, comm);
	if (RCI_flag != 0)
		goto FAILED;

	if (taskid==MASTER){
		/*---------------------------------------------------------------------------
		 * Print the info about the RCI FGMRES method
		 *---------------------------------------------------------------------------*/
		printf ("Some info about the current run of RCI FGMRES method:\n\n");
		if (ipar[7])
		{
			printf ("As ipar[7]=%d, the automatic test for the maximal number of ", ipar[7]);
			printf ("iterations will be\nperformed\n");
		}
		else
		{
			printf ("As ipar[7]=%d, the automatic test for the maximal number of ", ipar[7]);
			printf ("iterations will be\nskipped\n");
		}
		printf ("+++\n");
		if (ipar[8])
		{
			printf ("As ipar[8]=%d, the automatic residual test will be performed\n", ipar[8]);
		}
		else
		{
			printf ("As ipar[8]=%d, the automatic residual test will be skipped\n", ipar[8]);
		}
		printf ("+++\n");
		if (ipar[9])
		{
			printf ("As ipar[9]=%d, the user-defined stopping test will be ", ipar[9]);
			printf ("requested via\nRCI_request=2\n");
		}
		else
		{
			printf ("As ipar[9]=%d, the user-defined stopping test will not be ", ipar[9]);
			printf ("requested, thus,\nRCI_request will not take the value 2\n");
		}
		printf ("+++\n");
		if (ipar[10])
		{
			printf ("As ipar[10]=%d, the Preconditioned FGMRES iterations will be ", ipar[10]);
			printf ("performed, thus,\nthe preconditioner action will be requested via ");
			printf ("RCI_request=3\n");
		}
		else
		{
			printf ("As ipar[10]=%d, the Preconditioned FGMRES iterations will not ", ipar[10]);
			printf ("be performed,\nthus, RCI_request will not take the value 3\n");
		}
		printf ("+++\n");
		if (ipar[11])
		{
			printf ("As ipar[11]=%d, the automatic test for the norm of the next ", ipar[11]);
			printf ("generated vector is\nnot equal to zero up to rounding and ");
			printf ("computational errors will be performed,\nthus, RCI_request will not ");
			printf ("take the value 4\n");
		}
		else
		{
			printf ("As ipar[11]=%d, the automatic test for the norm of the next ", ipar[11]);
			printf ("generated vector is\nnot equal to zero up to rounding and ");
			printf ("computational errors will be skipped,\nthus, the user-defined test ");
			printf ("will be requested via RCI_request=4\n");
		}
		printf ("+++\n\n");
	}
	/*---------------------------------------------------------------------------
	 * Compute the solution by RCI (P)FGMRES solver with preconditioning
	 * Reverse Communication starts here
	 *---------------------------------------------------------------------------*/
ONE:
	if(taskid==MASTER){
		dfgmres (&ivar, computed_solution,b, &RCI_request, ipar, dpar, tmp);
		RCI_flag=RCI_request;  
	}
	MPI_Bcast(&RCI_flag,1,MPI_INT, MASTER, comm); // send RCI_request from node MASTER to all other nodes.
	
	/*---------------------------------------------------------------------------
	 * If RCI_request=0, then the solution was found with the required precision
	 *---------------------------------------------------------------------------*/
	if (RCI_flag == 0)
		goto COMPLETE;
	/*---------------------------------------------------------------------------
	 * If RCI_request=1, then compute the vector A*tmp[ipar[21]-1]
	 * and put the result in vector tmp[ipar[22]-1]
	 *---------------------------------------------------------------------------
	 * NOTE that ipar[21] and ipar[22] contain FORTRAN style addresses,
	 * therefore, in C code it is required to subtract 1 from them to get C style
	 * addresses
	 *---------------------------------------------------------------------------*/
	if (RCI_flag == 1)
	{
		if (taskid==MASTER){
			temp_1=&tmp[ipar[21] - 1];
			temp_2=&tmp[ipar[22] - 1];
		}

		mpi_dgemv(m,local_m,local_A,temp_1, temp_2,local_u,local_v,taskid, comm);

		goto ONE;
	}
	/*---------------------------------------------------------------------------
	 * If RCI_request=2, then do the user-defined stopping test
	 * The residual stopping test for the computed solution is performed here
	 *---------------------------------------------------------------------------
	 */
	if (RCI_flag == 2)
	{
		/* Request to the dfgmres_get routine to put the solution into b[N] via ipar[12]
		   --------------------------------------------------------------------------------
WARNING: beware that the call to dfgmres_get routine with ipar[12]=0 at this
stage may destroy the convergence of the FGMRES method, therefore, only
advanced users should exploit this option with care */
		if (taskid==MASTER){
			ipar[12] = 1;
			/* Get the current FGMRES solution in the vector f */
			dfgmres_get (&ivar, computed_solution, f, &RCI_request, ipar, dpar, tmp, &itercount);
			temp_1=f;
			temp_2=residual;
		} 
		/* Compute the current true residual via mpi mat_vec multiplication */
		mpi_dgemv(m,local_m,local_A,temp_1,temp_2,local_u,local_v,taskid, comm);

		if(taskid==MASTER){
			dvar = -1.0E0;
			cblas_daxpy (ivar, dvar, b, incx, residual, incx);
			dvar = cblas_dnrm2 (ivar, residual, incx);
			printf("iteration %d, relative residual:%e\n",itercount, dvar);
		}

		MPI_Bcast(&dvar,1,MPI_DOUBLE, MASTER, comm); 
		if (dvar < TOL){
			goto COMPLETE;
		}
		else
			goto ONE;
	}
	/*---------------------------------------------------------------------------
	 * If RCI_request=3, then apply the preconditioner on the vector
	 * tmp[ipar[21]-1] and put the result in vector tmp[ipar[22]-1]
	 *---------------------------------------------------------------------------
	 * NOTE that ipar[21] and ipar[22] contain FORTRAN style addresses,
	 * therefore, in C code it is required to subtract 1 from them to get C style
	 * addresses
	 *---------------------------------------------------------------------------*/
	if (RCI_flag == 3)
	{
		if (taskid==MASTER){
			temp_1=&tmp[ipar[21] - 1];
			temp_2=&tmp[ipar[22] - 1];
		}
		/* start modification 3: solve L U temp_2 = temp_1   */
		////mpi_preconditioner_solver(m,local_m,local_M,temp_1, temp_2,local_u,taskid,comm);
	  //method1:
    //double *rhs = (double*) malloc( sizeof(double)*m);
    //for(i=0; i<m; i++)
    //  rhs[i]=temp_1[i];
    //method2:
    for(i=0; i<m; i++)
      temp_2[i]=temp_1[i];
    LUSolver(LL,UU, temp_2, m, mm, taskid, NPROC);	
    //free(rhs);
    
    /* end modification 3 */
		goto ONE;
	}
	/*---------------------------------------------------------------------------
	 * If RCI_request=4, then check if the norm of the next generated vector is
	 * not zero up to rounding and computational errors. The norm is contained
	 * in dpar[6] parameter
	 *---------------------------------------------------------------------------*/
	if (RCI_flag == 4)	
	{
		if(taskid==MASTER)
			dvar=dpar[6];
		MPI_Bcast(&dvar,1,MPI_DOUBLE, MASTER, comm); 
		if (dvar <1.0E-12 ){
			goto COMPLETE;
		}
		else
			goto ONE;
	}
	/*---------------------------------------------------------------------------
	 * If RCI_request=anything else, then dfgmres subroutine failed
	 * to compute the solution vector: computed_solution[N]
	 *---------------------------------------------------------------------------*/
	else
	{
		goto FAILED;
	}
	/*---------------------------------------------------------------------------
	 * Reverse Communication ends here
	 * Get the current iteration number and the FGMRES solution (DO NOT FORGET to
	 * call dfgmres_get routine as computed_solution is still containing
	 * the initial guess!). Request to dfgmres_get to put the solution
	 * into vector computed_solution[N] via ipar[12]
	 *---------------------------------------------------------------------------*/
COMPLETE:if(taskid==MASTER){
			 ipar[12] = 0;
			 dfgmres_get (&ivar, computed_solution,b, &RCI_request, ipar, dpar, tmp, &itercount);
			 /*---------------------------------------------------------------------------
			  * Print solution vector: computed_solution[N] and the number of iterations: itercount
			  *---------------------------------------------------------------------------*/
			 printf ("The system has been solved in %d iterations \n", itercount);
			 printf ("The following solution has been obtained (first 4 elements): \n");
			 for (i = 0; i < 4; i++)
			 {
				 printf ("computed_solution[%d]=", i);
				 printf ("%e\n", computed_solution[i]);
			 }

			 /*-------------------------------------------------------------------------*/
			 /* Release internal MKL memory that might be used for computations         */
			 /* NOTE: It is important to call the routine below to avoid memory leaks   */
			 /* unless you disable MKL Memory Manager                                   */
			 /*-------------------------------------------------------------------------*/
			 MKL_Free_Buffers ();
			 temp_1=computed_solution;
			 temp_2=residual;
		 }
		// compute the relative residual
		 mpi_dgemv(m,local_m,local_A,temp_1,temp_2,local_u,local_v,taskid, comm);
		 if(taskid==MASTER){
			 dvar = -1.0E0;
			 cblas_daxpy (ivar, dvar, b, incx, residual, incx);
			 dvar = cblas_dnrm2 (ivar, residual, incx);
			 printf("relative residual:%e\n",dvar/b_2norm);

			 if(itercount<MAXIT && dvar<TOL)
				 flag=0; //success
			 else 
				 flag=1; //fail

		 }

		 MPI_Bcast(&flag,1,MPI_INT, MASTER, comm); 
		
		free(local_A);
		free(local_M);
		free(local_u);
		free(local_v);
		if(taskid==MASTER){
			free(tmp);
			free(b);
			free(computed_solution);
			free(residual);
		}
		
		if(flag==0){
			 MPI_Finalize();
			 return 0;
		 }
		 else{
			 MPI_Finalize();
			 return 1;
		 }
		 /* Release internal MKL memory that might be used for computations         */
		 /* NOTE: It is important to call the routine below to avoid memory leaks   */
		 /* unless you disable MKL Memory Manager                                   */
		 /*-------------------------------------------------------------------------*/
FAILED:
		 if(taskid==MASTER){
			 printf ("\nThis example FAILED as the solver has returned the ERROR code %d", RCI_request);
			 MKL_Free_Buffers ();
		 }
		free(local_A);
		free(local_M);
		free(local_u);
		free(local_v);
		if(taskid==MASTER){
			free(tmp);
			free(b);
			free(computed_solution);
			free(residual);
		}

		 MPI_Finalize();
		 return 1;





}