LIS_INT lis_idr1(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,t,v,av,*dX,*dR,*P;
	LIS_SCALAR om, h;
	LIS_SCALAR M,m,c;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_REAL   angle;
	LIS_INT i,j,k,s,oldest;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes,tim;
    unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	s       = 1;
	ptimes  = 0.0;

	r       = solver->work[0];
	t       = solver->work[1];
	v       = solver->work[2];
	av      = solver->work[3];
	P       = &solver->work[4];
	dX      = &solver->work[4+s];
	dR      = &solver->work[4+2*s];

	angle   = 0.7;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	init_by_array(init, length);
		for(i=0;i<n;i++)
		{
			P[0]->value[i] = genrand_real1();
		}
		/*
	lis_vector_copy(r,P[0]);
		*/
	lis_idrs_orth(s,P);

		#ifdef PRE_RIGHT
			times = lis_wtime();
			lis_psolve(solver, r, dX[0]);
			ptimes += lis_wtime()-times;
			LIS_MATVEC(A,dX[0],dR[0]);
		#else
		#ifdef PRE_BOTH
			times = lis_wtime();
			lis_psolve_right(solver, r, t);
			ptimes += lis_wtime()-times;
			LIS_MATVEC(A,t,av);
			lis_vector_print(av);
			times = lis_wtime();
			lis_psolve_left(solver, av, v);
			ptimes += lis_wtime()-times;
		#endif
		#endif

			/*
		lis_idrs_omega(dR[k],r,angle,&om);
			*/
		lis_vector_dot(dR[0],dR[0],&h);
		lis_vector_dot(dR[0],r,&om);
		om = om / h;
		lis_vector_scale(om,dX[0]);
		lis_vector_scale(-om,dR[0]);

		lis_vector_axpy(1.0,dX[0],x);
		lis_vector_axpy(1.0,dR[0],r);


		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[1] =
nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 )
printf("iter: %5d  residual = %e\n", 1, nrm2);
		}

		if( tol >= nrm2 )
		{

			solver->retcode    = LIS_SUCCESS;
			solver->iter       = 1;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		lis_vector_dot(P[0],dR[0],&M);

	iter = s;
	oldest = 0;
	lis_vector_dot(P[0],r,&m);

	while( iter<=maxiter )
	{
		tim = lis_wtime();

		/* solve Mc=m */
		c = m/M;

		for(i=0;i<n;i++)
		{
			v->value[i] = r->value[i] + -c*dR[0]->value[i];
		}
		/*
		lis_vector_copy(r,v);
		lis_vector_axpy(-c,dR[0],v);
		*/

			#ifdef PRE_RIGHT
				times = lis_wtime();
				lis_psolve(solver, v, av);
				ptimes += lis_wtime()-times;
				LIS_MATVEC(A,av,t);
			#else
			#ifdef PRE_BOTH
				times = lis_wtime();
				lis_psolve_right(solver, v, t);
				ptimes += lis_wtime()-times;
				LIS_MATVEC(A,t,av);
				times = lis_wtime();
				lis_psolve_left(solver, av, t);
				ptimes += lis_wtime()-times;
			#endif
			#endif

				/*
			lis_idrs_omega(t,v,angle,&om);
			lis_vector_dot(t,t,&h);
			lis_vector_dot(t,v,&om);
				*/
			h  = t->value[0]*t->value[0];
			om = t->value[0]*v->value[0];
			for(i=1;i<n;i++)
			{
				h  += t->value[i]*t->value[i];
				om += t->value[i]*v->value[i];
			}
			om = om / h;
			/*
			printf("i=%d om = %lf\n",iter,om);
			*/
			#if 0
				lis_vector_scale(-om,t);
				for(j=0;j<s;j++)
				{
					lis_vector_axpy(-c[j],dR[j],t);
				}
				lis_vector_copy(t,dR[oldest]);
				lis_vector_scale(om,av);
				for(j=0;j<s;j++)
				{
					lis_vector_axpy(-c[j],dX[j],av);
				}
				lis_vector_copy(av,dX[oldest]);
			#else
				for(i=0;i<n;i++)
				{
					h = om*av->value[i];
					h -= dX[0]->value[i] * c;
					dX[0]->value[i] = h;
					h = -om*t->value[i];
					h -= dR[0]->value[i] * c;
					dR[0]->value[i] = h;
				}
			#endif

		lis_vector_axpy(1.0,dR[0],r);
		lis_vector_axpy(1.0,dX[0],x);

		iter++;

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter]
= nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 )
printf("iter: %5d  residual = %e\n", iter, nrm2);
		}

		if( tol >= nrm2 )
		{

			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		lis_vector_dot(P[0],dR[0],&h);
		m += h;
		M = h;




		/* solve Mc=m */
		c = m/M;

		for(i=0;i<n;i++)
		{
			v->value[i] = r->value[i] + -c*dR[0]->value[i];
		}
		/*
		lis_vector_copy(r,v);
		lis_vector_axpy(-c,dR[0],v);
		*/

			#ifdef PRE_RIGHT
				times = lis_wtime();
				lis_psolve(solver, v, av);
				ptimes += lis_wtime()-times;
			#endif

			#if 0
				lis_vector_scale(om,av);
				for(j=0;j<s;j++)
				{
					lis_vector_axpy(-c[j],dX[j],av);
				}
				lis_vector_copy(av,dX[oldest]);
			#else
				for(i=0;i<n;i++)
				{
					h = om*av->value[i];
					h -= dX[0]->value[i] * c;
					dX[0]->value[i] = h;
				}
			#endif

			LIS_MATVEC(A,dX[0],dR[0]);
			lis_vector_scale(-1.0,dR[0]);

		lis_vector_axpy(1.0,dR[0],r);
		lis_vector_axpy(1.0,dX[0],x);

		iter++;

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter]
= nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 )
printf("iter: %5d  residual = %e\n", iter, nrm2);
		}

		if( tol >= nrm2 )
		{

			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		lis_vector_dot(P[0],dR[0],&h);
		m += h;
		M = h;

		tim = lis_wtime() - tim;
		/*
		printf("update m,M: %e\n",tim);
		*/
	}
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_cgs_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat;
	LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one;
	LIS_REAL   bnrm2, nrm2, tol, tol2;
	LIS_INT iter,maxiter,n,output,conv;
	LIS_INT iter2,maxiter2;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	ptimes  = 0.0;

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = solver->work[2];
	phat    = solver->work[3];
	q       = solver->work[4];
	qhat    = solver->work[5];
	u       = solver->work[5];
	uhat    = solver->work[6];
	vhat    = solver->work[6];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(one,6,1);
	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0]   = 1.0;
	alpha.lo[0]   = 0.0;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, q);
	lis_vector_set_allex_nm(0.0, p);

	uhat->precision = LIS_PRECISION_DEFAULT;
	p->precision = LIS_PRECISION_DEFAULT;
	phat->precision = LIS_PRECISION_DEFAULT;

	for( iter=1; iter<=maxiter2; iter++ )
	{
			/* rho = <rtld,r> */
			lis_vector_dot(rtld,r,&rho.hi[0]);

			/* test breakdown */
			if( rho.hi[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter      = iter;
				solver->iter2     = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}

			/* beta = (rho / rho_old) */
			beta.hi[0] = (rho.hi[0] / rho_old.hi[0]);

			/* u = r + beta*q */
			lis_vector_axpyz(beta.hi[0],q,r,u);

			/* p = u + beta*(q + beta*p) */
			lis_vector_xpay(q,beta.hi[0],p);
			lis_vector_xpay(u,beta.hi[0],p);
			
			/* phat = M^-1 * p */
			times = lis_wtime();
			lis_psolve(solver, p, phat);
			ptimes += lis_wtime()-times;

			/* v = A * phat */
			LIS_MATVEC(A,phat,vhat);
			
			/* tmpdot1 = <rtld,vhat> */
			lis_vector_dot(rtld,vhat,&tmpdot1.hi[0]);
			/* test breakdown */
			if( tmpdot1.hi[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter      = iter;
				solver->iter2     = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}
			
			/* alpha = rho / tmpdot1 */
			alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0];
			
			/* q = u - alpha*vhat */
			lis_vector_axpyz(-alpha.hi[0],vhat,u,q);

			/* phat = u + q          */
			/* uhat = M^-1 * (u + q) */
			lis_vector_axpyz(1.0,u,q,phat);
			times = lis_wtime();
			lis_psolve(solver, phat, uhat);
			ptimes += lis_wtime()-times;

			/* x = x + alpha*uhat */
			lis_vector_axpy(alpha.hi[0],uhat,x);

			/* qhat = A * uhat */
			LIS_MATVEC(A,uhat,qhat);

			/* r = r - alpha*qhat */
			lis_vector_axpy(-alpha.hi[0],qhat,r);

			/* convergence check */
			lis_solver_get_residual[conv](r,solver,&nrm2);
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			if( nrm2 <= tol2 )
			{
				solver->iter       = iter;
				solver->iter2     = iter;
				solver->ptimes     = ptimes;
				break;
			}
			
			rho_old.hi[0] = rho.hi[0];
	}

	uhat->precision = LIS_PRECISION_QUAD;
	p->precision = LIS_PRECISION_QUAD;
	phat->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);
	rho_old.hi[0] = 1.0;

	lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2);
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, q);
	lis_vector_set_allex_nm(0.0, p);


	for( iter2=iter+1; iter2<=maxiter; iter2++ )
	{
			/* rho = <rtld,r> */
			lis_vector_dotex_mmm(rtld,r,&rho);

			/* test breakdown */
			if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}

			/* beta = (rho / rho_old) */
			lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);

			/* u = r + beta*q */
			lis_vector_axpyzex_mmmm(beta,q,r,u);

			/* p = u + beta*(q + beta*p) */
			lis_vector_xpayex_mmm(q,beta,p);
			lis_vector_xpayex_mmm(u,beta,p);
			
			/* phat = M^-1 * p */
			times = lis_wtime();
			lis_psolve(solver, p, phat);
			ptimes += lis_wtime()-times;

			/* v = A * phat */
			LIS_MATVEC(A,phat,vhat);
			
			/* tmpdot1 = <rtld,vhat> */
			lis_vector_dotex_mmm(rtld,vhat,&tmpdot1);
			/* test breakdown */
			if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}
			
			/* alpha = rho / tmpdot1 */
			lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi);
			
			/* q = u - alpha*vhat */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyzex_mmmm(alpha,vhat,u,q);

			/* phat = u + q          */
			/* uhat = M^-1 * (u + q) */
			lis_vector_axpyzex_mmmm(one,u,q,phat);
			times = lis_wtime();
			lis_psolve(solver, phat, uhat);
			ptimes += lis_wtime()-times;

			/* x = x + alpha*uhat */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,uhat,x);

			/* qhat = A * uhat */
			LIS_MATVEC(A,uhat,qhat);

			/* r = r - alpha*qhat */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,qhat,r);

			/* convergence check */
			lis_solver_get_residual[conv](r,solver,&nrm2);
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter2, nrm2);
			}
			
			if( tol > nrm2 )
			{
				solver->retcode    = LIS_SUCCESS;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid      = nrm2;
				solver->ptimes     = ptimes;
				LIS_DEBUG_FUNC_OUT;
				return LIS_SUCCESS;
			}
			
			rho_old.hi[0] = rho.hi[0];
			rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter2;
	solver->iter2     = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#3
0
LIS_INT lis_orthomin_quad(LIS_SOLVER solver)
{
	LIS_Comm comm;  
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, *p, *ap, *aptld;
	LIS_QUAD *dotsave;
	LIS_QUAD_PTR alpha, beta, tmp, one;

	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_INT m,l,lmax,ip,ip0;

	LIS_DEBUG_FUNC_IN;

	comm = LIS_COMM_WORLD;

	A       = solver->A;
	M       = solver->precon;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,3,1);
	LIS_QUAD_SCALAR_MALLOC(one,4,1);

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = &solver->work[2];
	ap      = &solver->work[  (m+1)+2];
	aptld   = &solver->work[2*(m+1)+2];

	one.hi[0] = 1.0;
	one.lo[0] = 0.0;

	dotsave = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD) * (m+1),"lis_orthomin_quad::dotsave" );

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,M,r,rtld,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	
	iter=1;
	while( iter<=maxiter )
	{
		ip = (iter-1) % (m+1);

		/* p[ip] = rtld */
		lis_vector_copyex_mm(rtld,p[ip]);

		/* ap[ip]    = A*p[ip] */
		/* aptld[ip] = M^-1 ap[ip] */
		lis_matvec(A,p[ip],ap[ip]);
		time = lis_wtime();
		lis_psolve(solver, ap[ip], aptld[ip]);
		ptime += lis_wtime()-time;

		lmax = _min(m,iter-1);
		for(l=1;l<=lmax;l++)
		{
			ip0 = (ip+m+1-l) % (m+1);
			/* beta = -<Ar[ip],Ap[ip0]> / <Ap[ip0],Ap[ip0]> */
			lis_vector_dotex_mmm(aptld[ip],aptld[ip0],&beta);
			lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,&dotsave[l-1]);
			lis_quad_minus((LIS_QUAD *)beta.hi);

			lis_vector_axpyex_mmm(beta,p[ip0]    ,p[ip]);
			lis_vector_axpyex_mmm(beta,ap[ip0]   ,ap[ip]);
			lis_vector_axpyex_mmm(beta,aptld[ip0],aptld[ip]);
		}
		for(l=m-1;l>0;l--)
		{
			dotsave[l] = dotsave[l-1];
		}

		lis_vector_dotex_mmm(aptld[ip],aptld[ip],&tmp);
		dotsave[0].hi = tmp.hi[0];
		dotsave[0].lo = tmp.lo[0];
		/* test breakdown */
		if( tmp.hi[0]==0.0 && tmp.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			lis_free(dotsave);
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		lis_quad_div(&dotsave[0],(LIS_QUAD *)one.hi,&dotsave[0]);

		/* alpha = <rtld,Aptld[ip]> */
		lis_vector_dotex_mmm(rtld,aptld[ip],&alpha);
		lis_quad_mul((LIS_QUAD *)alpha.hi,(LIS_QUAD *)alpha.hi,&dotsave[0]);

		lis_vector_axpyex_mmm( alpha,p[ip],x);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,ap[ip],r);
		lis_vector_axpyex_mmm(alpha,aptld[ip],rtld);
		lis_quad_minus((LIS_QUAD *)alpha.hi);

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
		}

		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(dotsave);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		iter++;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	lis_free(dotsave);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#4
0
LIS_INT lis_gmres_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,z,*v;
	LIS_QUAD *h;
	LIS_SCALAR *hd;
	LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp;
	LIS_QUAD_PTR rnorm;
	LIS_REAL bnrm2,nrm2,tol,tol2;
	LIS_INT iter,maxiter,n,output;
	LIS_INT iter2,maxiter2;
	double time,ptime;

	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	m        = solver->options[LIS_OPTIONS_RESTART];
	h_dim    = m+1;
	ptime    = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = solver->work[2];
	v       = &solver->work[3];

	LIS_QUAD_SCALAR_MALLOC(aa,0,1);
	LIS_QUAD_SCALAR_MALLOC(bb,1,1);
	LIS_QUAD_SCALAR_MALLOC(rr,2,1);
	LIS_QUAD_SCALAR_MALLOC(a2,3,1);
	LIS_QUAD_SCALAR_MALLOC(b2,4,1);
	LIS_QUAD_SCALAR_MALLOC(t,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(one,7,1);
	LIS_QUAD_SCALAR_MALLOC(rnorm,8,1);

	h       = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_gmres_switch::h" );
	hd      = (LIS_SCALAR *)h;
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	z->precision = LIS_PRECISION_DEFAULT;

	/* r = M^-1 * (b - A * x) */
	lis_matvec(A,x,z);
	lis_vector_xpay(b,-1.0,z);
	lis_psolve(solver,z,v[0]);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;


	iter=0;
	while( iter<maxiter2 )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2(v[0],&rnorm.hi[0]);
		lis_vector_scale(1.0/rnorm.hi[0],v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_all(0,s);
		s->value[0] = rnorm.hi[0];

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z, v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dot(v[i1v],v[k],&t.hi[0]);
				hd[k+iih] = t.hi[0];
				lis_vector_axpy(-t.hi[0],v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2(v[i1v],&t.hi[0]);
			hd[i1+iih] = t.hi[0];
			lis_vector_scale(1.0/t.hi[0],v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj        = k-1;
				t.hi[0]   =  hd[jj+iih];
				aa.hi[0]  =  hd[jj+cs]*t.hi[0];
				aa.hi[0] +=  hd[jj+sn]*hd[k+iih];
				bb.hi[0]  = -hd[jj+sn]*t.hi[0];
				bb.hi[0] +=  hd[jj+cs]*hd[k+iih];
				hd[jj+iih] = aa.hi[0];
				hd[k+iih] = bb.hi[0];
			}
			aa.hi[0] = hd[ii+iih];
			bb.hi[0] = hd[i1+iih];
			a2.hi[0] = aa.hi[0]*aa.hi[0];
			b2.hi[0] = bb.hi[0]*bb.hi[0];
			rr.hi[0] = sqrt(a2.hi[0]+b2.hi[0]);
			if( rr.hi[0]==0.0 ) rr.hi[0]=1.0e-17;
			hd[ii+cs] = aa.hi[0]/rr.hi[0];
			hd[ii+sn] = bb.hi[0]/rr.hi[0];
			s->value[i1] = -hd[ii+sn]*s->value[ii];
			s->value[ii] =  hd[ii+cs]*s->value[ii];

			aa.hi[0]  =  hd[ii+cs]*hd[ii+iih];
			aa.hi[0] +=  hd[ii+sn]*hd[i1+iih];
			hd[ii+iih] = aa.hi[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol2 >= nrm2 ) break;
		} while( i<m && iter <maxiter2 );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		s->value[ii] = s->value[ii]/hd[ii+iih];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			for(j=jj+1;j<=ii;j++)
			{
				t.hi[0] -= hd[jj+j*h_dim]*s->value[j];
			}
			s->value[jj] = t.hi[0]/hd[jj+jj*h_dim];
		}
		/* z = z + y * v */
		for(k=0;k<n;k++)
		{
			z->value[k] = s->value[0]*v[0]->value[k];
		}
		for(j=1;j<=ii;j++)
		{
			lis_vector_axpy(s->value[j],v[j],z);
		}
		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpy(1,r,x);

		if( tol2 >= nrm2 )
		{
			solver->iter       = iter;
			solver->iter2      = iter;
			solver->ptime      = ptime;
			break;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			s->value[jj-1] = -hd[jj-1+sn]*s->value[jj];
			s->value[jj]   =  hd[jj-1+cs]*s->value[jj];
		}

		for(j=0;j<=i1;j++)
		{
			t.hi[0] = s->value[j];
			if( j==0 ) t.hi[0] = t.hi[0]-1.0;
			lis_vector_axpy(t.hi[0],v[j],v[0]);
		}
	}

	/* Initial Residual */
	z->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);

	lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2);
	tol     = solver->tol;


	iter2=iter;
	while( iter2<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2ex_mm(v[0],&rnorm);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)rnorm.hi);
		lis_vector_scaleex_mm(tmp,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_allex_nm(0.0,s);
		s->value[0]    = rnorm.hi[0];
		s->value_lo[0] = rnorm.lo[0];

		i = 0;
		do
		{
			iter2++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z, v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dotex_mmm(v[i1v],v[k],&t);
				h[k+iih].hi = t.hi[0];
				h[k+iih].lo = t.lo[0];
				lis_quad_minus((LIS_QUAD *)t.hi);
				lis_vector_axpyex_mmm(t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2ex_mm(v[i1v],&t);
			h[i1+iih].hi = t.hi[0];
			h[i1+iih].lo = t.lo[0];
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi);
			lis_vector_scaleex_mm(tmp,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  = k-1;
				t.hi[0]   =  h[jj+iih].hi;
				t.lo[0]   =  h[jj+iih].lo;
				lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
				lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi);
				lis_quad_minus((LIS_QUAD *)bb.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi);
				h[jj+iih].hi = aa.hi[0];
				h[jj+iih].lo = aa.lo[0];
				h[k+iih].hi = bb.hi[0];
				h[k+iih].lo = bb.lo[0];
			}
			aa.hi[0] = h[ii+iih].hi;
			aa.lo[0] = h[ii+iih].lo;
			bb.hi[0] = h[i1+iih].hi;
			bb.lo[0] = h[i1+iih].lo;
			lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi);
			lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi);
			lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi);
			lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi);
			tmp.hi[0] = s->value[ii];
			tmp.lo[0] = s->value_lo[ii];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[i1] = aa.hi[0];
			s->value_lo[i1] = aa.lo[0];
			s->value[ii] = bb.hi[0];
			s->value_lo[ii] = bb.lo[0];

			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]);
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]);
			lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
			h[ii+iih].hi = aa.hi[0];
			h[ii+iih].lo = aa.lo[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter2 <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		tmp.hi[0] = s->value[ii];
		tmp.lo[0] = s->value_lo[ii];
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]);
		s->value[ii] = tmp.hi[0];
		s->value_lo[ii] = tmp.lo[0];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			t.lo[0]  = s->value_lo[jj];
			for(j=jj+1;j<=ii;j++)
			{
				tmp.hi[0] = s->value[j];
				tmp.lo[0] = s->value_lo[j];
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]);
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi);
			}
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]);
			s->value[jj] = tmp.hi[0];
			s->value_lo[jj] = tmp.lo[0];
		}
		/* z = z + y * v */
		for(k=0;k<n;k++)
		{
			aa.hi[0] = s->value[0];
			aa.lo[0] = s->value_lo[0];
			bb.hi[0] = v[0]->value[k];
			bb.lo[0] = v[0]->value_lo[k];
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)bb.hi);
			z->value[k] = tmp.hi[0];
			z->value_lo[k] = tmp.lo[0];
		}
		for(j=1;j<=ii;j++)
		{
			aa.hi[0] = s->value[j];
			aa.lo[0] = s->value_lo[j];
			lis_vector_axpyex_mmm(aa,v[j],z);
		}
		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpyex_mmm(one,r,x);

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter2;
			solver->iter2      = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			tmp.hi[0] = s->value[jj];
			tmp.lo[0] = s->value_lo[jj];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+sn]);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1+cs]);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[jj-1] = aa.hi[0];
			s->value_lo[jj-1] = aa.lo[0];
			s->value[jj] = bb.hi[0];
			s->value_lo[jj] = bb.lo[0];
		}

		for(j=0;j<=i1;j++)
		{
			t.hi[0] = s->value[j];
			t.lo[0] = s->value_lo[j];
			if( j==0 )
			{
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)one.hi);
			}
			lis_vector_axpyex_mmm(t,v[j],v[0]);
		}
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter       = iter2+1;
	solver->iter2      = iter;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#5
0
LIS_INT lis_crs(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r,rtld, p, q, u, z, ap, map, uq, auq;
	LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1;
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = solver->work[2];
	z       = solver->work[3];
	u       = solver->work[3];
	uq      = solver->work[3];
	q       = solver->work[4];
	ap      = solver->work[4];
	map     = solver->work[5];
	auq     = solver->work[5];


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,p);

	lis_matvect(A,p,rtld);
	rho_old = 1.0;
	lis_vector_set_all(0,q);
	lis_vector_set_all(0,p);

	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* z   = M^-1 * r  */
		/* rho = <rtld,z>  */
		time = lis_wtime();
		lis_psolve(solver, r, z);
		ptime += lis_wtime()-time;
		lis_vector_dot(rtld,z,&rho);

		/* test breakdown */
		if( rho==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta    = rho / rho_old         */
		/* u       = z + beta*q            */
		/* p       = u + beta*(q + beta*p) */
		/* ap      = A * p                 */
		/* map     = M^-1 * ap             */
		/* tmpdot1 = <rtld,map>            */
		beta = rho / rho_old;
		lis_vector_axpyz(beta,q,z,u);
		lis_vector_xpay(q,beta,p);
		lis_vector_xpay(u,beta,p);
		lis_matvec(A,p,ap);
		time = lis_wtime();
		lis_psolve(solver, ap, map);
		ptime += lis_wtime()-time;
		lis_vector_dot(rtld,map,&tmpdot1);
		/* test breakdown */
		if( tmpdot1==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		
		/* alpha = rho / tmpdot1 */
		/* q     = u - alpha*map */
		/* uq    = u + q         */
		/* auq   = A * uq        */
		/* x     = x + alpha*uq  */
		/* r     = r - alpha*auq */
		alpha = rho / tmpdot1;
		lis_vector_axpyz(-alpha,map,u,q);
		lis_vector_axpyz(1,u,q,uq);
		lis_matvec(A,uq,auq);
		lis_vector_axpy(alpha,uq,x);
		lis_vector_axpy(-alpha,auq,r);

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}
		
		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		rho_old = rho;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_bicg_switch(LIS_SOLVER solver)
{
  LIS_MATRIX A,At;
  LIS_PRECON M;
  LIS_VECTOR b,x;
  LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld;
  LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1;
  LIS_REAL   bnrm2, nrm2, tol, tol2;
  LIS_INT iter,maxiter,n,output,conv;
  LIS_INT iter2,maxiter2;
  double times,ptimes;

  LIS_DEBUG_FUNC_IN;


  A       = solver->A;
  At      = solver->A;
  M        = solver->precon;
  b        = solver->b;
  x        = solver->x;
  n        = A->n;
  maxiter  = solver->options[LIS_OPTIONS_MAXITER];
  maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
  output   = solver->options[LIS_OPTIONS_OUTPUT];
  conv    = solver->options[LIS_OPTIONS_CONV_COND];
  tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
  tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
  ptimes   = 0.0;

  r        = solver->work[0];
  rtld     = solver->work[1];
  z        = solver->work[2];
  ztld     = solver->work[3];
  p        = solver->work[4];
  ptld     = solver->work[5];
  q        = solver->work[2];
  qtld     = solver->work[3];

  LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
  LIS_QUAD_SCALAR_MALLOC(beta,1,1);
  LIS_QUAD_SCALAR_MALLOC(rho,2,1);
  LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
  LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
  rho_old.hi[0] = 1.0;
  rho_old.lo[0] = 0.0;


  /* Initial Residual */
  if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
  {
    LIS_DEBUG_FUNC_OUT;
    return LIS_SUCCESS;
  }
  tol2     = solver->tol_switch;

  lis_solver_set_shadowresidual(solver,r,rtld);

  lis_vector_set_allex_nm(0.0, p);
  lis_vector_set_allex_nm(0.0, ptld);

  r->precision = LIS_PRECISION_DEFAULT;
  rtld->precision = LIS_PRECISION_DEFAULT;
  p->precision = LIS_PRECISION_DEFAULT;
  ptld->precision = LIS_PRECISION_DEFAULT;

  for( iter=1; iter<=maxiter2; iter++ )
  {
    /* z    = M^-1 * r */
    /* ztld = M^-T * rtld */
    times = lis_wtime();
    lis_psolve(solver, r, z);
    lis_psolvet(solver, rtld, ztld);
    ptimes += lis_wtime()-times;

    /* rho = <z,rtld> */
    lis_vector_dot(z,rtld,&rho.hi[0]);

    /* test breakdown */
    if( rho.hi[0]==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter;
      solver->iter2     = iter;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }

    /* beta = (rho / rho_old) */
    beta.hi[0] = rho.hi[0] / rho_old.hi[0];

    /* p    = z    + beta*p    */
    /* ptld = ztld + beta*ptld */
    
    /* q    = A   * p    */
    /* qtld = A^T * ptld */
    lis_vector_xpay(z,beta.hi[0],p);
    LIS_MATVEC(A,p,q);

    lis_vector_xpay(ztld,beta.hi[0],ptld);
    LIS_MATVECT(At,ptld,qtld);

    
    /* tmpdot1 = <ptld,q> */
    lis_vector_dot(ptld,q,&tmpdot1.hi[0]);

    /* test breakdown */
    if( tmpdot1.hi[0]==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter;
      solver->iter2     = iter;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }
    
    /* alpha = rho / tmpdot1 */
    alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0];
    
    /* x = x + alpha*p */
    lis_vector_axpy(alpha.hi[0],p,x);
    
    /* r    = r    - alpha*q    */
    lis_vector_axpy(-alpha.hi[0],q,r);
    
    /* convergence check */
    lis_solver_get_residual[conv](r,solver,&nrm2);

    if( output )
    {
      if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
      if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
    }

    if( nrm2 <= tol2 )
    {
      solver->iter       = iter;
      solver->iter2     = iter;
      solver->ptimes     = ptimes;
      break;
    }
    
    /* rtld = rtld - alpha*qtld */
    lis_vector_axpy(-alpha.hi[0],qtld,rtld);

    rho_old.hi[0] = rho.hi[0];
  }

  r->precision = LIS_PRECISION_QUAD;
  rtld->precision = LIS_PRECISION_QUAD;
  p->precision = LIS_PRECISION_QUAD;
  ptld->precision = LIS_PRECISION_QUAD;

/*  solver->precon->precon_type = 0;*/
  solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
  lis_vector_copyex_mn(x,solver->xx);
  rho_old.hi[0] = 1.0;
  lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2);
  tol     = solver->tol;

  lis_solver_set_shadowresidual(solver,r,rtld);

  lis_vector_set_allex_nm(0.0, p);
  lis_vector_set_allex_nm(0.0, ptld);

  for( iter2=iter+1; iter2<=maxiter; iter2++ )
  {
    /* z    = M^-1 * r */
    /* ztld = M^-T * rtld */
    times = lis_wtime();
    lis_psolve(solver, r, z);
    lis_psolvet(solver, rtld, ztld);
/*    memset(z->value_lo,0,n*sizeof(LIS_SCALAR));
    memset(ztld->value_lo,0,n*sizeof(LIS_SCALAR));*/
    ptimes += lis_wtime()-times;

    /* rho = <z,rtld> */
    lis_vector_dotex_mmm(z,rtld,&rho);
/*    printf("rho = %e %e\n",rho.hi[0],rho.lo[0]);*/

    /* test breakdown */
    if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter2;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }

    /* beta = (rho / rho_old) */
    lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);

    /* p    = z    + beta*p    */
    /* ptld = ztld + beta*ptld */
    
    /* q    = A   * p    */
    /* qtld = A^T * ptld */
    lis_vector_xpayex_mmm(z,beta,p);
    LIS_MATVEC(A,p,q);

    lis_vector_xpayex_mmm(ztld,beta,ptld);
    LIS_MATVECT(At,ptld,qtld);

    
    /* tmpdot1 = <ptld,q> */
    lis_vector_dotex_mmm(ptld,q,&tmpdot1);

    /* test breakdown */
    if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter2;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }
    
    /* alpha = rho / tmpdot1 */
    lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi);
    
    /* x = x + alpha*p */
    lis_vector_axpyex_mmm(alpha,p,x);
    
    /* r    = r    - alpha*q    */
    lis_quad_minus((LIS_QUAD *)alpha.hi);
    lis_vector_axpyex_mmm(alpha,q,r);
    
    /* convergence check */
    lis_solver_get_residual[conv](r,solver,&nrm2);
    if( output )
    {
      if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2;
      if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
    }

    if( tol > nrm2 )
    {
      solver->retcode    = LIS_SUCCESS;
      solver->iter       = iter2;
      solver->iter2      = iter;
      solver->resid      = nrm2;
      solver->ptimes     = ptimes;
      LIS_DEBUG_FUNC_OUT;
      return LIS_SUCCESS;
    }
    
    /* rtld = rtld - alpha*qtld */
    lis_vector_axpyex_mmm(alpha,qtld,rtld);

    rho_old.hi[0] = rho.hi[0];
    rho_old.lo[0] = rho.lo[0];
  }

  solver->retcode   = LIS_MAXITER;
  solver->iter      = iter2;
  solver->iter2     = iter;
  solver->resid     = nrm2;
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
示例#7
0
LIS_INT lis_fgmres_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,*z,*v;
	LIS_QUAD *h;
	LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp;

	LIS_REAL bnrm2,nrm2,tol;
	LIS_INT iter,maxiter,n,output;
	double time,ptime;

	LIS_REAL rnorm;
	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	h_dim   = m+1;
	ptime   = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = &solver->work[2];
	v       = &solver->work[m+2];

	h       = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD)*(h_dim+1)*(h_dim+2),"lis_fgmres_quad::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;

	LIS_QUAD_SCALAR_MALLOC(aa,0,1);
	LIS_QUAD_SCALAR_MALLOC(bb,1,1);
	LIS_QUAD_SCALAR_MALLOC(rr,2,1);
	LIS_QUAD_SCALAR_MALLOC(a2,3,1);
	LIS_QUAD_SCALAR_MALLOC(b2,4,1);
	LIS_QUAD_SCALAR_MALLOC(t,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(one,7,1);

	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;
	rnorm   = 1.0/bnrm2;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_scaleex_nm(bnrm2,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_allex_nm(0.0,s);
		s->value[0]    = rnorm;
		s->value_lo[0] = 0.0;

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z[iiv]);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z[iiv], v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dotex_mmm(v[i1v],v[k],&t);
				h[k+iih].hi = t.hi[0];
				h[k+iih].lo = t.lo[0];
				lis_quad_minus((LIS_QUAD *)t.hi);
				lis_vector_axpyex_mmm(t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2ex_mm(v[i1v],&t);
			h[i1+iih].hi = t.hi[0];
			h[i1+iih].lo = t.lo[0];
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi);
			lis_vector_scaleex_mm(tmp,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  = k-1;
				t.hi[0]   =  h[jj+iih].hi;
				t.lo[0]   =  h[jj+iih].lo;
				lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
				lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi);
				lis_quad_minus((LIS_QUAD *)bb.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi);
				h[jj+iih].hi = aa.hi[0];
				h[jj+iih].lo = aa.lo[0];
				h[k+iih].hi = bb.hi[0];
				h[k+iih].lo = bb.lo[0];
			}
			aa.hi[0] = h[ii+iih].hi;
			aa.lo[0] = h[ii+iih].lo;
			bb.hi[0] = h[i1+iih].hi;
			bb.lo[0] = h[i1+iih].lo;
			lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi);
			lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi);
			lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi);
			lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi);
			if( rr.hi[0]==0.0 )
			{
				rr.hi[0]=1.0e-17;
				rr.lo[0]=0.0;
			}
			lis_quad_div((LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi);
			tmp.hi[0] = s->value[ii];
			tmp.lo[0] = s->value_lo[ii];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)tmp.hi);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)tmp.hi);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[i1] = aa.hi[0];
			s->value_lo[i1] = aa.lo[0];
			s->value[ii] = bb.hi[0];
			s->value_lo[ii] = bb.lo[0];

			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]);
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]);
			lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
			h[ii+iih].hi = aa.hi[0];
			h[ii+iih].lo = aa.lo[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1]);

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		tmp.hi[0] = s->value[ii];
		tmp.lo[0] = s->value_lo[ii];
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+iih]);
		s->value[ii] = tmp.hi[0];
		s->value_lo[ii] = tmp.lo[0];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			t.lo[0]  = s->value_lo[jj];
			for(j=jj+1;j<=ii;j++)
			{
				tmp.hi[0] = s->value[j];
				tmp.lo[0] = s->value_lo[j];
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+j*h_dim]);
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi);
			}
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj+jj*h_dim]);
			s->value[jj] = tmp.hi[0];
			s->value_lo[jj] = tmp.lo[0];
		}
		/* x = x + y * z */
		for(j=0;j<=ii;j++)
		{
			aa.hi[0] = s->value[j];
			aa.lo[0] = s->value_lo[j];
			lis_vector_axpyex_mmm(aa,z[j],x);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		lis_matvec(A,x,v[0]);
		lis_vector_xpay(b,-1.0,v[0]);
		memset(v[0]->value_lo,0,n*sizeof(LIS_SCALAR));
		lis_vector_nrm2(v[0],&rnorm);
		bnrm2 = 1.0/rnorm;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#8
0
LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, rhat, p, ptld, phat;
	LIS_VECTOR t, ttld, that, t0, t0hat;
	LIS_VECTOR y, w, u, z;
	LIS_QUAD_PTR alpha, beta, rho, rho_old;
	LIS_QUAD_PTR qsi, eta, one;
	LIS_QUAD_PTR tmp, tmpdot[5];
	LIS_REAL bnrm2, nrm2, tol, tol2;
	LIS_INT iter,maxiter,output,conv;
	LIS_INT iter2,maxiter2;
	double time,ptime;


	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	ptime   = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	rhat    = solver->work[2];
	p       = solver->work[3];
	ptld    = solver->work[4];
	phat    = solver->work[5];
	t       = solver->work[6];
	ttld    = solver->work[7];
	that    = solver->work[8];
	t0      = solver->work[9];
	t0hat   = solver->work[10];
	y       = solver->work[11];
	w       = solver->work[12];
	u       = solver->work[13];
	z       = solver->work[14];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
	LIS_QUAD_SCALAR_MALLOC(eta,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
	LIS_QUAD_SCALAR_MALLOC(one,13,1);

	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0] = 1.0;
	alpha.lo[0] = 0.0;
	qsi.hi[0] = 1.0;
	qsi.lo[0] = 0.0;
	one.hi[0] = -1.0;
	one.lo[0] = 0.0;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, ttld);
	lis_vector_set_allex_nm(0.0, ptld);
	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, u);
	lis_vector_set_allex_nm(0.0, t);
	lis_vector_set_allex_nm(0.0, t0);

	for( iter=1; iter<=maxiter2; iter++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dot(rtld,r,&rho.hi[0]);

		/* test breakdown */
		if( rho.hi[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->iter2     = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]);

		/* w = ttld + beta*ptld */
		lis_vector_axpyz(beta.hi[0],ptld,ttld,w);

		/* rhat = M^-1 * r */
		time = lis_wtime();
		lis_psolve(solver, r, rhat);
		ptime += lis_wtime()-time;

		/* p = rhat + beta*(p - u) */
		lis_vector_axpy(-1,u,p);
		lis_vector_xpay(rhat,beta.hi[0],p);
		
		/* ptld = A * p */
		lis_matvec(A,p,ptld);

		/* tmpdot[0] = <rtld,ptld> */
		lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]);
		/* test breakdown */
		/* */
		
		/* alpha = rho / tmpdot[0] */
		alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0];

		/* y = t - r + alpha*(-w + ptld) */
		lis_vector_axpyz(-1,w,ptld,y);
		lis_vector_xpay(t,alpha.hi[0],y);
		lis_vector_axpy(-1,r,y);

		/* t = r - alpha*ptld */
		lis_vector_axpyz(-alpha.hi[0],ptld,r,t);

		/* that  = M^-1 * t */
		/* phat  = M^-1 * ptld */
		/* t0hat = M^-1 * t0 */
		time = lis_wtime();
		lis_psolve(solver, t, that);
		lis_psolve(solver, ptld, phat);
		lis_psolve(solver, t0, t0hat);
		ptime += lis_wtime()-time;

		/* ttld = A * that */
		lis_matvec(A,that,ttld);

		/* tmpdot[0] = <y,y>       */
		/* tmpdot[1] = <ttld,t>    */
		/* tmpdot[2] = <y,t>       */
		/* tmpdot[3] = <ttld,y>    */
		/* tmpdot[4] = <ttld,ttld> */
		lis_vector_dot(y,y,&tmpdot[0].hi[0]);
		lis_vector_dot(ttld,t,&tmpdot[1].hi[0]);
		lis_vector_dot(y,t,&tmpdot[2].hi[0]);
		lis_vector_dot(ttld,y,&tmpdot[3].hi[0]);
		lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]);
		if(iter==1)
		{
			qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0];
			eta.hi[0] = 0.0;
		}
		else
		{
			tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0]  - tmpdot[3].hi[0]*tmpdot[3].hi[0];
			qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0];
			eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0];
		}

		/* u = qsi*phat + eta*(t0hat - rhat + beta*u) */
		lis_vector_xpay(t0hat,beta.hi[0],u);
		lis_vector_axpy(-1,rhat,u);
		lis_vector_scale(eta.hi[0],u);
		lis_vector_axpy(qsi.hi[0],phat,u);

		/* z = qsi*rhat + eta*z - alpha*u */
		lis_vector_scale(eta.hi[0],z);
		lis_vector_axpy(qsi.hi[0],rhat,z);
		lis_vector_axpy(-alpha.hi[0],u,z);

		/* x = x + alpha*p + z */
		lis_vector_axpy(alpha.hi[0],p,x);
		lis_vector_axpy(1,z,x);
		
		/* r = t - eta*y - qsi*ttld */
		lis_vector_axpyz(-eta.hi[0],y,t,r);
		lis_vector_axpy(-qsi.hi[0],ttld,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol2 >= nrm2 )
		{
			solver->iter       = iter;
			solver->iter2      = iter;
			solver->ptime      = ptime;
			break;
		}

		lis_vector_copy(t,t0);
		rho_old.hi[0] = rho.hi[0];
	}

	r->precision = LIS_PRECISION_QUAD;
	p->precision = LIS_PRECISION_QUAD;
	t->precision = LIS_PRECISION_QUAD;
	t0->precision = LIS_PRECISION_QUAD;
	ptld->precision = LIS_PRECISION_QUAD;
	that->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);

	rho_old.hi[0] = 1.0;
	alpha.hi[0] = 1.0;
	qsi.hi[0] = 1.0;
	one.hi[0] = -1.0;

	/* Initial Residual */
	lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2);
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, ttld);
	lis_vector_set_allex_nm(0.0, ptld);
	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, u);
	lis_vector_set_allex_nm(0.0, t);
	lis_vector_set_allex_nm(0.0, t0);

	for( iter2=iter+1; iter2<=maxiter; iter2++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dotex_mmm(rtld,r,&rho);

		/* test breakdown */
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter2;
			solver->iter2     = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
		lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);

		/* w = ttld + beta*ptld */
		lis_vector_axpyzex_mmmm(beta,ptld,ttld,w);

		/* rhat = M^-1 * r */
		time = lis_wtime();
		lis_psolve(solver, r, rhat);
		ptime += lis_wtime()-time;

		/* p = rhat + beta*(p - u) */
		lis_vector_axpyex_mmm(one,u,p);
		lis_vector_xpayex_mmm(rhat,beta,p);
		
		/* ptld = A * p */
		lis_matvec(A,p,ptld);

		/* tmpdot[0] = <rtld,ptld> */
		lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]);
		/* test breakdown */
		/* */
		
		/* alpha = rho / tmpdot[0] */
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi);

		/* y = t - r + alpha*(-w + ptld) */
		lis_vector_axpyzex_mmmm(one,w,ptld,y);
		lis_vector_xpayex_mmm(t,alpha,y);
		lis_vector_axpyex_mmm(one,r,y);

		/* t = r - alpha*ptld */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyzex_mmmm(alpha,ptld,r,t);

		/* that  = M^-1 * t */
		/* phat  = M^-1 * ptld */
		/* t0hat = M^-1 * t0 */
		time = lis_wtime();
		lis_psolve(solver, t, that);
		lis_psolve(solver, ptld, phat);
		lis_psolve(solver, t0, t0hat);
		ptime += lis_wtime()-time;

		/* ttld = A * that */
		lis_matvec(A,that,ttld);

		/* tmpdot[0] = <y,y>       */
		/* tmpdot[1] = <ttld,t>    */
		/* tmpdot[2] = <y,t>       */
		/* tmpdot[3] = <ttld,y>    */
		/* tmpdot[4] = <ttld,ttld> */
		lis_vector_dotex_mmm(y,y,&tmpdot[0]);
		lis_vector_dotex_mmm(ttld,t,&tmpdot[1]);
		lis_vector_dotex_mmm(y,t,&tmpdot[2]);
		lis_vector_dotex_mmm(ttld,y,&tmpdot[3]);
		lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]);
		if(iter==1)
		{
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
			eta.hi[0] = 0.0;
			eta.lo[0] = 0.0;
		}
		else
		{
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);

			lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);

			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
			lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
		}

		/* u = qsi*phat + eta*(t0hat - rhat + beta*u) */
		lis_vector_xpayex_mmm(t0hat,beta,u);
		lis_vector_axpyex_mmm(one,rhat,u);
		lis_vector_scaleex_mm(eta,u);
		lis_vector_axpyex_mmm(qsi,phat,u);

		/* z = qsi*rhat + eta*z - alpha*u */
		lis_vector_scaleex_mm(eta,z);
		lis_vector_axpyex_mmm(qsi,rhat,z);
		lis_vector_axpyex_mmm(alpha,u,z);

		/* x = x + alpha*p + z */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_quad_minus((LIS_QUAD *)one.hi);
		lis_vector_axpyex_mmm(alpha,p,x);
		lis_vector_axpyex_mmm(one,z,x);
		lis_quad_minus((LIS_QUAD *)one.hi);
		
		/* r = t - eta*y - qsi*ttld */
		lis_quad_minus((LIS_QUAD *)eta.hi);
		lis_quad_minus((LIS_QUAD *)qsi.hi);
		lis_vector_axpyzex_mmmm(eta,y,t,r);
		lis_vector_axpyex_mmm(qsi,ttld,r);
		lis_quad_minus((LIS_QUAD *)eta.hi);
		lis_quad_minus((LIS_QUAD *)qsi.hi);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter2;
			solver->iter2      = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		lis_vector_copyex_mm(t,t0);
		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->iter2     = iter2;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_bicgstab(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, t,p,v, s, phat, shat;
	LIS_SCALAR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptimes  = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	s       = solver->work[1];
	t       = solver->work[2];
	p       = solver->work[3];
	v       = solver->work[4];
	phat    = solver->work[5];
	shat    = solver->work[6];
	alpha   = (LIS_SCALAR)1.0;
	omega   = (LIS_SCALAR)1.0;
	rho_old = (LIS_SCALAR)1.0;

	lis_vector_set_all(0.0,p);
	lis_vector_set_all(0.0,phat);
	lis_vector_set_all(0.0,s);
	lis_vector_set_all(0.0,shat);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dot(rtld,r,&rho);

		/* test breakdown */
		if( rho==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		if( iter==1 )
		{
			lis_vector_copy(r,p);
		}
		else
		{
			/* beta = (rho / rho_old) * (alpha / omega) */
			beta = (rho / rho_old) * (alpha / omega);
	
			/* p = r + beta*(p - omega*v) */
			lis_vector_axpy(-omega,v,p);
			lis_vector_xpay(r,beta,p);
		}
		
		/* phat = M^-1 * p */
		times = lis_wtime();
		lis_psolve(solver, p, phat);
		ptimes += lis_wtime()-times;

		/* v = A * phat */
		LIS_MATVEC(A,phat,v);

		/* tmpdot1 = <rtld,v> */
		lis_vector_dot(rtld,v,&tmpdot1);
		/* test breakdown */
		/* */
		
		/* alpha = rho / tmpdot1 */
		alpha = rho / tmpdot1;
		
		/* s = r - alpha*v */
		lis_vector_axpy(-alpha,v,r);

		/* Early check for tolerance */
		lis_solver_get_residual[conv](s,solver,&nrm2);
/*		lis_vector_nrm2(s,&nrm2);
		nrm2 = nrm2 * bnrm2;*/
		if( nrm2 <= tol )
		{
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			lis_vector_axpy(alpha,phat,x);
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		/* shat = M^-1 * s */
		times = lis_wtime();
		lis_psolve(solver, s, shat);
		ptimes += lis_wtime()-times;

		/* t = A * shat */
		LIS_MATVEC(A,shat,t);

		/* tmpdot1 = <t,s> */
		/* tmpdot2 = <t,t> */
		/* omega   = tmpdot1 / tmpdot2 */
		lis_vector_dot(t,s,&tmpdot1);
		lis_vector_dot(t,t,&tmpdot2);
		omega   = tmpdot1 / tmpdot2;

		/* x = x + alpha*phat + omega*shat */
		lis_vector_axpy(alpha,phat,x);
		lis_vector_axpy(omega,shat,x);
		
		/* r = s - omega*t */
		lis_vector_axpy(-omega,t,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
/*		lis_vector_nrm2(r,&nrm2);
		nrm2 = nrm2 * bnrm2;*/

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		if( omega==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		rho_old = rho;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#10
0
LIS_INT lis_bicgsafe(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, mr, amr, t, mt, p, ap;
	LIS_VECTOR y, u, au, z;
	LIS_SCALAR alpha, beta;
	LIS_REAL rho, rho_old;
	LIS_SCALAR qsi, eta;
	LIS_SCALAR tmp, tmpdot[5];
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	mr      = solver->work[2];
	amr     = solver->work[3];
	p       = solver->work[4];
	ap      = solver->work[5];
	t       = solver->work[6];
	mt      = solver->work[7];
	y       = solver->work[8];
	u       = solver->work[9];
	z       = solver->work[10];
	au      = solver->work[11];


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	time = lis_wtime();
	lis_psolve(solver, r, mr);
	ptime += lis_wtime()-time;
	lis_matvec(A,mr,amr);
	lis_vector_dot(rtld,r,&rho_old);
	lis_vector_copy(amr,ap);
	lis_vector_copy(mr,p);
	beta = 0.0;

	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* tmpdot[0] = <rtld,ap> */
		/* alpha = rho_old / tmpdot[0] */
		lis_vector_dot(rtld,ap,&tmpdot[0]);
		alpha = rho_old / tmpdot[0];


		/* tmpdot[0] = <y,y>           */
		/* tmpdot[1] = <amr,r>         */
		/* tmpdot[2] = <y,r>           */
		/* tmpdot[3] = <amr,y>         */
		/* tmpdot[4] = <amr,amr>       */
		lis_vector_dot(y,y,&tmpdot[0]);
		lis_vector_dot(amr,r,&tmpdot[1]);
		lis_vector_dot(y,r,&tmpdot[2]);
		lis_vector_dot(amr,y,&tmpdot[3]);
		lis_vector_dot(amr,amr,&tmpdot[4]);
		if(iter==1)
		{
			qsi = tmpdot[1] / tmpdot[4];
			eta = 0.0;
		}
		else
		{
			tmp = tmpdot[4]*tmpdot[0] - tmpdot[3]*tmpdot[3];
			qsi = (tmpdot[0]*tmpdot[1] - tmpdot[2]*tmpdot[3]) / tmp;
			eta = (tmpdot[4]*tmpdot[2] - tmpdot[3]*tmpdot[1]) / tmp;
		}

		/* t = qsi*ap + eta*y */
		lis_vector_copy(y,t);
		lis_vector_scale(eta,t);
		lis_vector_axpy(qsi,ap,t);

		/* mt  = M^-1 * t */
		time = lis_wtime();
		lis_psolve(solver, t, mt);
		ptime += lis_wtime()-time;

		/* u    = mt + eta*beta*u */
		/* au = A * u             */
		lis_vector_xpay(mt,eta*beta,u);
		lis_matvec(A,u,au);

		/* z = qsi*mr + eta*z - alpha*u */
		lis_vector_scale(eta,z);
		lis_vector_axpy(qsi,mr,z);
		lis_vector_axpy(-alpha,u,z);

		/* y = qsi*amr + eta*y - alpha*au */
		lis_vector_scale(eta,y);
		lis_vector_axpy(qsi,amr,y);
		lis_vector_axpy(-alpha,au,y);

		/* x = x + alpha*p + z */
		lis_vector_axpy(alpha,p,x);
		lis_vector_axpy(1.0,z,x);
		
		/* r = r - alpha*ap - y */
		lis_vector_axpy(-alpha,ap,r);
		lis_vector_axpy(-1.0,y,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		/* rho = <rtld,r> */
		lis_vector_dot(rtld,r,&rho);
		if( rho==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		beta = (rho / rho_old) * (alpha / qsi);

		/* mr  = M^-1 * r */
		/* amr = A * mr   */
		time = lis_wtime();
		lis_psolve(solver, r, mr);
		ptime += lis_wtime()-time;
		lis_matvec(A,mr,amr);

		/* p  = mr + beta*(p - u)    */
		/* ap = amr + beta*(ap - au) */
		lis_vector_axpy(-1.0,u,p);
		lis_vector_xpay(mr,beta,p);
		lis_vector_axpy(-1.0,au,ap);
		lis_vector_xpay(amr,beta,ap);

		rho_old = rho;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#11
0
LIS_INT lis_bicgsafe_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, rhat, p, ptld;
	LIS_VECTOR t, ttld;
	LIS_VECTOR y, v, u, utld, z;
	LIS_QUAD_PTR alpha, beta, rho, rho_old;
	LIS_QUAD_PTR qsi, eta;
	LIS_QUAD_PTR tmp, tmpdot[5],one;
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;


	rtld    = solver->work[0];
	r       = solver->work[1];
	rhat    = solver->work[2];
	p       = solver->work[3];
	ptld    = solver->work[4];
	t       = solver->work[5];
	ttld    = solver->work[6];
	y       = solver->work[7];
	v       = solver->work[8];
	u       = solver->work[9];
	z       = solver->work[10];
	utld    = solver->work[11];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
	LIS_QUAD_SCALAR_MALLOC(eta,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
	LIS_QUAD_SCALAR_MALLOC(one,13,1);

	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0] = 1.0;
	alpha.lo[0] = 0.0;
	qsi.hi[0] = 1.0;
	qsi.lo[0] = 0.0;
	one.hi[0] = -1.0;
	one.lo[0] = 0.0;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0,p);
	lis_vector_set_allex_nm(0.0,u);
	lis_vector_set_allex_nm(0.0,ptld);
	lis_vector_set_allex_nm(0.0,utld);
	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dotex_mmm(rtld,r,&rho);

		/* test breakdown */
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
		lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);

		/* rhat = M^-1 * r */
		/* v    = A * rhat */
		time = lis_wtime();
		lis_psolve(solver, r, rhat);
		ptime += lis_wtime()-time;
		lis_matvec(A,rhat,v);

		/* p = rhat + beta*(p - u) */
		lis_vector_axpyex_mmm(one,u,p);
		lis_vector_xpayex_mmm(rhat,beta,p);
		
		/* ptld = v + beta*(ptld - utld) */
		lis_vector_axpyex_mmm(one,utld,ptld);
		lis_vector_xpayex_mmm(v,beta,ptld);

		/* tmpdot[0] = <rtld,ptld> */
		lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]);
		/* test breakdown */
		/* */
		
		/* alpha = rho / tmpdot[0] */
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi);


		/* tmpdot[0] = <y,y>       */
		/* tmpdot[1] = <v,r>       */
		/* tmpdot[2] = <y,r>       */
		/* tmpdot[3] = <v,y>       */
		/* tmpdot[4] = <v,v>       */
		lis_vector_dotex_mmm(y,y,&tmpdot[0]);
		lis_vector_dotex_mmm(v,r,&tmpdot[1]);
		lis_vector_dotex_mmm(y,r,&tmpdot[2]);
		lis_vector_dotex_mmm(v,y,&tmpdot[3]);
		lis_vector_dotex_mmm(v,v,&tmpdot[4]);
		if(iter==1)
		{
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
			eta.hi[0] = 0.0;
			eta.lo[0] = 0.0;
		}
		else
		{
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);

			lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);

			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
			lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
		}

		/* t = qsi*ptld + eta*y */
		lis_vector_copyex_mm(y,t);
		lis_vector_scaleex_mm(eta,t);
		lis_vector_axpyex_mmm(qsi,ptld,t);

		/* ttld  = M^-1 * t */
		time = lis_wtime();
		lis_psolve(solver, t, ttld);
		ptime += lis_wtime()-time;

		/* u    = ttld + eta*beta*u */
		/* utld = A * u             */
		lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi);
		lis_vector_xpayex_mmm(ttld,tmp,u);
		lis_matvec(A,u,utld);

		/* z = qsi*rhat + eta*z - alpha*u */
		lis_vector_scaleex_mm(eta,z);
		lis_vector_axpyex_mmm(qsi,rhat,z);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,u,z);

		/* y = qsi*v + eta*y - alpha*utld */
		lis_vector_scaleex_mm(eta,y);
		lis_vector_axpyex_mmm(qsi,v,y);
		lis_vector_axpyex_mmm(alpha,utld,y);
		lis_quad_minus((LIS_QUAD *)alpha.hi);

		/* x = x + alpha*p + z */
		lis_vector_axpyex_mmm(alpha,p,x);
		lis_quad_minus((LIS_QUAD *)one.hi);
		lis_vector_axpyex_mmm(one,z,x);
		lis_quad_minus((LIS_QUAD *)one.hi);
		
		/* r = r - alpha*ptld - y */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,ptld,r);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(one,y,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#12
0
LIS_INT lis_bicrsafe_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, artld, mr, amr, p, ap, map;
	LIS_VECTOR y, my, u, au, z;
	LIS_QUAD_PTR alpha, beta, rho, rho_old;
	LIS_QUAD_PTR qsi, eta, one;
	LIS_QUAD_PTR tmp, tmpdot[5];
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	mr      = solver->work[2];
	amr     = solver->work[3];
	p       = solver->work[4];
	ap      = solver->work[5];
	map     = solver->work[6];
	my      = solver->work[7];
	y       = solver->work[8];
	u       = solver->work[9];
	z       = solver->work[10];
	au      = solver->work[11];
	artld   = solver->work[12];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
	LIS_QUAD_SCALAR_MALLOC(eta,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
	LIS_QUAD_SCALAR_MALLOC(one,13,1);


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_matvect(A,rtld,artld);
	time = lis_wtime();
	lis_psolve(solver, r, mr);
	ptime += lis_wtime()-time;
	lis_matvec(A,mr,amr);
	lis_vector_dotex_mmm(rtld,amr,&rho_old);
	lis_vector_copyex_mm(amr,ap);
	lis_vector_copyex_mm(mr,p);
	one.hi[0] = -1.0;
	one.lo[0] = 0.0;

	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* map  = M^-1 * ap */
		time = lis_wtime();
		lis_psolve(solver, ap, map);
		ptime += lis_wtime()-time;

		/* tmpdot[0] = <artld,map> */
		/* alpha = rho_old / tmpdot[0] */
		lis_vector_dotex_mmm(artld,map,&tmpdot[0]);
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot[0].hi);


		/* tmpdot[0] = <y,y>           */
		/* tmpdot[1] = <amr,r>         */
		/* tmpdot[2] = <y,r>           */
		/* tmpdot[3] = <amr,y>         */
		/* tmpdot[4] = <amr,amr>       */
		lis_vector_dotex_mmm(y,y,&tmpdot[0]);
		lis_vector_dotex_mmm(amr,r,&tmpdot[1]);
		lis_vector_dotex_mmm(y,r,&tmpdot[2]);
		lis_vector_dotex_mmm(amr,y,&tmpdot[3]);
		lis_vector_dotex_mmm(amr,amr,&tmpdot[4]);
		if(iter==1)
		{
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
			eta.hi[0] = 0.0;
			eta.lo[0] = 0.0;
		}
		else
		{
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);

			lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);

			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
			lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
		}

		/* u    = qsi*map + eta*my + eta*beta*u */
		/* au   = A * u                         */
		lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)beta.hi);
		lis_vector_scaleex_mm(tmp,u);
		lis_vector_axpyex_mmm(qsi,map,u);
		lis_vector_axpyex_mmm(eta,my,u);
		lis_matvec(A,u,au);

		/* z = qsi*mr + eta*z - alpha*u */
		lis_vector_scaleex_mm(eta,z);
		lis_vector_axpyex_mmm(qsi,mr,z);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,u,z);

		/* y  = qsi*amr + eta*y - alpha*au */
		/* my = M^-1 * y */
		lis_vector_scaleex_mm(eta,y);
		lis_vector_axpyex_mmm(qsi,amr,y);
		lis_vector_axpyex_mmm(alpha,au,y);
		time = lis_wtime();
		lis_psolve(solver, y, my);
		ptime += lis_wtime()-time;

		/* x = x + alpha*p + z */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,p,x);
		lis_quad_minus((LIS_QUAD *)one.hi);
		lis_vector_axpyex_mmm(one,z,x);
		
		/* r = r - alpha*ap - y */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_quad_minus((LIS_QUAD *)one.hi);
		lis_vector_axpyex_mmm(alpha,ap,r);
		lis_vector_axpyex_mmm(one,y,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		/* mr  = mr - alpha*map - my */
		/* amr = A * mr              */
		/* rho = <rtld,amr> */
		lis_vector_axpyex_mmm(alpha,map,mr);
		lis_vector_axpyex_mmm(one,my,mr);
		lis_matvec(A,mr,amr);
		lis_vector_dotex_mmm(rtld,amr,&rho);
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
		lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);


		/* p  = mr + beta*(p - u)    */
		/* ap = amr + beta*(ap - au) */
		lis_vector_axpyex_mmm(one,u,p);
		lis_vector_xpayex_mmm(mr,beta,p);
		lis_vector_axpyex_mmm(one,au,ap);
		lis_vector_xpayex_mmm(amr,beta,ap);

		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#13
0
LIS_INT lis_gs(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,t,s;
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output;
	double time,ptime;

	LIS_INT err;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	tol     = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	ptime   = 0.0;

	r       = solver->work[0];
	t       = solver->work[1];
	s       = solver->work[2];

	lis_vector_nrm2(b,&bnrm2);
	bnrm2   = 1.0 / bnrm2;

	err = lis_matrix_split(A);
	if( err ) return err;
	if( A->use_wd!=LIS_SOLVER_GS )
	{
		if( !A->WD )
		{
			err = lis_matrix_diag_duplicate(A->D,&A->WD);
			if( err ) return err;
		}
		lis_matrix_diag_copy(A->D,A->WD);
		lis_matrix_diag_inverse(A->WD);
		A->use_wd = LIS_SOLVER_GS;
	}

	for( iter=1; iter<=maxiter; iter++ )
	{
		/* x += (D-L)^{-1}(b - Ax) */
		time = lis_wtime();
		lis_psolve(solver,x,s);
		ptime += lis_wtime() - time;
		lis_matvec(A,s,t);
/*		lis_matvec(A,x,t);*/
		lis_vector_axpyz(-1,t,b,r);
		lis_vector_nrm2(r,&nrm2);
		lis_matrix_solve(A,r,t,LIS_MATRIX_LOWER);
		lis_vector_axpy(1,t,x);

		/* convergence check */
		nrm2 = nrm2 * bnrm2;

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol >= nrm2 )
		{
			time = lis_wtime();
			lis_psolve(solver,x,s);
			ptime += lis_wtime() - time;
			lis_vector_copy(s,x);
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
	}

	lis_psolve(solver,x,s);
	lis_vector_copy(s,x);
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_idrs(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,t,v,av,*dX,*dR,*P;
	LIS_SCALAR om, h;
	LIS_SCALAR *M,*m,*c,*MM;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_REAL   angle;
	LIS_INT i,j,k,s,oldest;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes,tim;
    unsigned long init[4]={0x123, 0x234, 0x345, 0x456}, length=4;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	s       = solver->options[LIS_OPTIONS_IDRS_RESTART];
	ptimes  = 0.0;

	r       = solver->work[0];
	t       = solver->work[1];
	v       = solver->work[2];
	av      = solver->work[3];
	dX      = &solver->work[4];
	P       = &solver->work[4+s];
	dR      = &solver->work[4+2*s];

	angle   = 0.7;

	m = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::m");
	c = (LIS_SCALAR *)lis_malloc(s*sizeof(LIS_SCALAR), "lis_idrs::c");
	M = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR), "lis_idrs::M");
	MM = (LIS_SCALAR *)lis_malloc(s*s*sizeof(LIS_SCALAR),
"lis_idrs::M");



	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		lis_free2(4,m,c,M,MM);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	init_by_array(init, length);
	for(k=0;k<s;k++)
	{
		for(i=0;i<n;i++)
		{
			P[k]->value[i] = genrand_real1();
		}
	}
	lis_idrs_orth(s,P);

	for( k=0; k<s; k++ )
	{
		#ifdef PRE_RIGHT
			times = lis_wtime();
			lis_psolve(solver, r, dX[k]);
			ptimes += lis_wtime()-times;
			LIS_MATVEC(A,dX[k],dR[k]);
		#endif

		lis_vector_dot(dR[k],dR[k],&h);
		lis_vector_dot(dR[k],r,&om);
		om = om / h;
		lis_vector_scale(om,dX[k]);
		lis_vector_scale(-om,dR[k]);

		lis_vector_axpy(1.0,dX[k],x);
		lis_vector_axpy(1.0,dR[k],r);


		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[k+1] =
nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 )
printf("iter: %5d  residual = %e\n", k+1, nrm2);
		}

		if( tol >= nrm2 )
		{
			lis_free2(4,m,c,M,MM);

			solver->retcode    = LIS_SUCCESS;
			solver->iter       = k+1;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(i=0;i<s;i++)
		{
			lis_vector_dot(P[i],dR[k],&M[k*s+i]);
		}
	}

	iter = s;
	oldest = 0;
	for(i=0;i<s;i++)
	{
		lis_vector_dot(P[i],r,&m[i]);
	}

	while( iter<=maxiter )
	{
		tim = lis_wtime();
		lis_array_solve(s,M,m,c,MM); /* solve Mc=m */

		lis_vector_copy(r,v);
		for(j=0;j<s;j++)
		{
			lis_vector_axpy(-c[j],dR[j],v);
		}

		if( (iter%(s+1))==s )
		{
			#ifdef PRE_RIGHT
				times = lis_wtime();
				lis_psolve(solver, v, av);
				ptimes += lis_wtime()-times;
				LIS_MATVEC(A,av,t);
			#endif

			lis_vector_dot(t,t,&h);
			lis_vector_dot(t,v,&om);
			om = om / h;
			#if 0
				lis_vector_scale(-om,t);
				for(j=0;j<s;j++)
				{
					lis_vector_axpy(-c[j],dR[j],t);
				}
				lis_vector_copy(t,dR[oldest]);
				lis_vector_scale(om,av);
				for(j=0;j<s;j++)
				{
					lis_vector_axpy(-c[j],dX[j],av);
				}
				lis_vector_copy(av,dX[oldest]);
			#else
				for(i=0;i<n;i++)
				{
					h = om*av->value[i];
					for(j=0;j<s;j++)
					{
						h -= dX[j]->value[i] * c[j];
					}
					dX[oldest]->value[i] = h;
				}
				for(i=0;i<n;i++)
				{
					h = -om*t->value[i];
					for(j=0;j<s;j++)
					{
						h -= dR[j]->value[i] * c[j];
					}
					dR[oldest]->value[i] = h;
				}
			#endif
		}
		else
		{
			#ifdef PRE_RIGHT
				times = lis_wtime();
				lis_psolve(solver, v, av);
				ptimes += lis_wtime()-times;
			#endif

			#if 0
				lis_vector_scale(om,av);
				for(j=0;j<s;j++)
				{
					lis_vector_axpy(-c[j],dX[j],av);
				}
				lis_vector_copy(av,dX[oldest]);
			#else
				for(i=0;i<n;i++)
				{
					h = om*av->value[i];
					for(j=0;j<s;j++)
					{
						h -= dX[j]->value[i] * c[j];
					}
					dX[oldest]->value[i] = h;
				}
			#endif

			LIS_MATVEC(A,dX[oldest],dR[oldest]);
			lis_vector_scale(-1.0,dR[oldest]);
		}

		lis_vector_axpy(1.0,dR[oldest],r);
		lis_vector_axpy(1.0,dX[oldest],x);

		iter++;

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter]
= nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 )
printf("iter: %5d  residual = %e\n", iter, nrm2);
		}

		if( tol >= nrm2 )
		{
			lis_free2(4,m,c,M,MM);

			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(i=0;i<s;i++)
		{
			lis_vector_dot(P[i],dR[oldest],&h);
			m[i] += h;
			M[oldest*s+i] = h;
		}

		oldest++;
		if( oldest==s ) oldest = 0;
		tim = lis_wtime() - tim;
		/*
		printf("update m,M: %e\n",tim);
		*/
	}
	lis_free2(4,m,c,M,MM);
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_solver_get_initial_residual(LIS_SOLVER solver, LIS_PRECON M, LIS_VECTOR t, LIS_VECTOR r, LIS_SCALAR *bnrm2)
{
	LIS_INT			output,conv;
	#ifdef USE_QUAD_PRECISION
		LIS_INT	i;
	#endif
	LIS_MATRIX	A;
	LIS_VECTOR	x,b,p,xx;
	LIS_SCALAR	nrm2;
	LIS_REAL	tol,tol_w,tol_switch;

	LIS_DEBUG_FUNC_IN;

	A  = solver->A;
	b  = solver->b;
	x  = solver->x;
	xx = solver->xx;
	output     = solver->options[LIS_OPTIONS_OUTPUT];
	conv       = solver->options[LIS_OPTIONS_CONV_COND];
	tol        = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol_w      = solver->params[LIS_PARAMS_RESID_WEIGHT-LIS_OPTIONS_LEN];
	tol_switch = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];


	/* Initial Residual */
	if( M==NULL )
	{
		p = r;
	}
	else
	{
		p = t;
	}

	if( !solver->options[LIS_OPTIONS_INITGUESS_ZEROS] )
	{
		#ifndef USE_QUAD_PRECISION
			lis_matvec(A,x,p);           /* p = Ax    */
			lis_vector_xpay(b,-1,p);     /* p = b - p */
		#else
			if( solver->precision==LIS_PRECISION_DOUBLE )
			{
				lis_matvec(A,x,p);           /* p = Ax    */
				lis_vector_xpay(b,-1,p);     /* p = b - p */
			}
			else
			{
				lis_matvec(A,xx,p);           /* p = Ax    */
				lis_vector_xpay(b,-1,p);     /* p = b - p */
				
				#ifdef _OPENMP
				#pragma omp parallel for private(i)
				#endif
				for(i=0;i<A->n;i++)
				{
					p->value_lo[i] = 0.0;
				}
				
			}
		#endif
	}
	else
	{
		#ifndef USE_QUAD_PRECISION
			lis_vector_copy(b,p);
		#else
			if( solver->precision==LIS_PRECISION_DOUBLE )
			{
				lis_vector_copy(b,p);
			}
			else
			{
				lis_vector_copyex_nm(b,p);
			}
		#endif
	}

	switch(conv)
	{
	case LIS_CONV_COND_NRM2_R:
		lis_vector_nrm2(p,&nrm2);
		*bnrm2 = nrm2;
		solver->tol = tol;
		solver->tol_switch = tol_switch;
		break;
	case LIS_CONV_COND_NRM2_B:
		lis_vector_nrm2(p,&nrm2);
		lis_vector_nrm2(b,bnrm2);
		solver->tol = tol;
		solver->tol_switch = tol_switch;
		break;
	case LIS_CONV_COND_NRM1_B:
		lis_vector_nrm1(p,&nrm2);
		lis_vector_nrm1(b,bnrm2);
		solver->tol = *bnrm2*tol_w + tol;
		solver->tol_switch = *bnrm2*tol_w + tol_switch;
		break;
	}
	if( *bnrm2 == 0.0 )
	{
		*bnrm2 = 1.0;
	}
	else
	{
		*bnrm2 = 1.0 / *bnrm2;
	}
	solver->bnrm = *bnrm2;
	nrm2 = nrm2 * *bnrm2;

	if( output && (r->precision==LIS_PRECISION_QUAD && solver->precision!=LIS_PRECISION_SWITCH) )
	{
		if( output & LIS_PRINT_MEM ) solver->residual[0] = nrm2;
		if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", 0, nrm2); 
	}
	if( nrm2 <= solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN] )
	{
		solver->retcode = LIS_SUCCESS;
		solver->iter    = 1;
		solver->resid   = nrm2; 
		LIS_DEBUG_FUNC_OUT;
		return LIS_FAILS;
	}

	if( M!=NULL )
	{
		/* r = M^-1 * p */
		lis_psolve(solver, p, r);
	}

	LIS_DEBUG_FUNC_OUT;
	return LIS_SUCCESS;
}
LIS_INT lis_bicgstab_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, t,p,v, s, phat, shat;
	LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2;
	LIS_REAL   bnrm2, nrm2, tol, tol2;
	LIS_INT iter,maxiter,n,output,conv;
	LIS_INT iter2,maxiter2;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	ptimes  = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	s       = solver->work[1];
	t       = solver->work[2];
	p       = solver->work[3];
	v       = solver->work[4];
	phat    = solver->work[5];
	shat    = solver->work[6];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(omega,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1);
	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0] = 1.0;
	alpha.lo[0] = 0.0;
	omega.hi[0] = 1.0;
	omega.lo[0] = 0.0;

	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, phat);
	lis_vector_set_allex_nm(0.0, s);
	lis_vector_set_allex_nm(0.0, shat);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;

	lis_solver_set_shadowresidual(solver,r,rtld);

	s->precision = LIS_PRECISION_DEFAULT;
	shat->precision = LIS_PRECISION_DEFAULT;
	p->precision = LIS_PRECISION_DEFAULT;
	phat->precision = LIS_PRECISION_DEFAULT;
	for( iter=1; iter<=maxiter2; iter++ )
	{
			/* rho = <rtld,r> */
			lis_vector_dot(rtld,r,&rho.hi[0]);

			/* test breakdown */
			if( rho.hi[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter      = iter;
				solver->iter2     = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}

			if( iter==1 )
			{
				lis_vector_copy(r,p);
			}
			else
			{
				/* beta = (rho / rho_old) * (alpha / omega) */
				beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / omega.hi[0]);
		
				/* p = r + beta*(p - omega*v) */
				lis_vector_axpy(-omega.hi[0],v,p);
				lis_vector_xpay(r,beta.hi[0],p);
			}
			
			/* phat = M^-1 * p */
			times = lis_wtime();
			lis_psolve(solver, p, phat);
			ptimes += lis_wtime()-times;

			/* v = A * phat */
			LIS_MATVEC(A,phat,v);

			/* tmpdot1 = <rtld,v> */
			lis_vector_dot(rtld,v,&tmpdot1.hi[0]);
			/* test breakdown */
			/* */
			
			/* alpha = rho / tmpdot1 */
			alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0];
			
			/* s = r - alpha*v */
			lis_vector_axpy(-alpha.hi[0],v,r);

			/* Early check for tolerance */
			lis_solver_get_residual[conv](s,solver,&nrm2);
			if( nrm2 <= tol2 )
			{
				if( output )
				{
					if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
					if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
				}

				lis_vector_axpy(alpha.hi[0],phat,x);
				solver->iter       = iter;
				solver->iter2     = iter;
				solver->ptimes     = ptimes;
				break;
			}

			/* shat = M^-1 * s */
			times = lis_wtime();
			lis_psolve(solver, s, shat);
			ptimes += lis_wtime()-times;

			/* t = A * shat */
			LIS_MATVEC(A,shat,t);

			/* tmpdot1 = <t,s> */
			/* tmpdot2 = <t,t> */
			/* omega   = tmpdot1 / tmpdot2 */
			lis_vector_dot(t,s,&tmpdot1.hi[0]);
			lis_vector_dot(t,t,&tmpdot2.hi[0]);
			omega.hi[0]   = tmpdot1.hi[0] / tmpdot2.hi[0];

			/* x = x + alpha*phat + omega*shat */
			lis_vector_axpy(alpha.hi[0],phat,x);
			lis_vector_axpy(omega.hi[0],shat,x);
			
			/* r = s - omega*t */
			lis_vector_axpy(-omega.hi[0],t,r);
			
			/* convergence check */
			lis_solver_get_residual[conv](r,solver,&nrm2);
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			if( nrm2 <= tol2 )
			{
				solver->iter       = iter;
				solver->iter2     = iter;
				solver->ptimes     = ptimes;
				break;
			}
			
			if( omega.hi[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter      = iter;
				solver->iter2     = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}
			rho_old.hi[0] = rho.hi[0];
	}

	s->precision = LIS_PRECISION_QUAD;
	shat->precision = LIS_PRECISION_QUAD;
	p->precision = LIS_PRECISION_QUAD;
	phat->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);
	rho_old.hi[0] = 1.0;
	alpha.hi[0] = 1.0;
	omega.hi[0] = 1.0;

	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, phat);
	lis_vector_set_allex_nm(0.0, s);
	lis_vector_set_allex_nm(0.0, shat);

	/* Initial Residual */
	lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2);
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	for( iter2=iter+1; iter2<=maxiter; iter2++ )
	{
			/* rho = <rtld,r> */
			lis_vector_dotex_mmm(rtld,r,&rho);

			/* test breakdown */
			if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}

			if( iter2==1 )
			{
				lis_vector_copyex_mm(r,p);
			}
			else
			{
				/* beta = (rho / rho_old) * (alpha / omega) */
				lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
				lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi);
				lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi);
		
				/* p = r + beta*(p - omega*v) */
				lis_quad_minus((LIS_QUAD *)omega.hi);
				lis_vector_axpyex_mmm(omega,v,p);
				lis_vector_xpayex_mmm(r,beta,p);
			}
			
			/* phat = M^-1 * p */
			times = lis_wtime();
			lis_psolve(solver, p, phat);
			ptimes += lis_wtime()-times;

			/* v = A * phat */
			LIS_MATVEC(A,phat,v);

			/* tmpdot1 = <rtld,v> */
			lis_vector_dotex_mmm(rtld,v,&tmpdot1);
			/* test breakdown */
			/* */
			
			/* alpha = rho / tmpdot1 */
			lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi);
			
			/* s = r - alpha*v */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,v,r);

			/* Early check for tolerance */
			lis_solver_get_residual[conv](s,solver,&nrm2);
			if( tol > nrm2 )
			{
				if( output )
				{
					if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2;
					if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter2, nrm2);
				}

				lis_quad_minus((LIS_QUAD *)alpha.hi);
				lis_vector_axpyex_mmm(alpha,phat,x);
				solver->retcode    = LIS_SUCCESS;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid      = nrm2;
				solver->ptimes     = ptimes;
				LIS_DEBUG_FUNC_OUT;
				return LIS_SUCCESS;
			}

			/* shat = M^-1 * s */
			times = lis_wtime();
			lis_psolve(solver, s, shat);
			ptimes += lis_wtime()-times;

			/* t = A * shat */
			LIS_MATVEC(A,shat,t);

			/* tmpdot1 = <t,s> */
			/* tmpdot2 = <t,t> */
			/* omega   = tmpdot1 / tmpdot2 */
			lis_vector_dotex_mmm(t,s,&tmpdot1);
			lis_vector_dotex_mmm(t,t,&tmpdot2);
			lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi);

			/* x = x + alpha*phat + omega*shat */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,phat,x);
			lis_vector_axpyex_mmm(omega,shat,x);
			
			/* r = s - omega*t */
			lis_quad_minus((LIS_QUAD *)omega.hi);
			lis_vector_axpyex_mmm(omega,t,r);
			lis_quad_minus((LIS_QUAD *)omega.hi);
			
			/* convergence check */
			lis_solver_get_residual[conv](r,solver,&nrm2);
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter2, nrm2);
			}

			if( tol > nrm2 )
			{
				solver->retcode    = LIS_SUCCESS;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid      = nrm2;
				solver->ptimes     = ptimes;
				LIS_DEBUG_FUNC_OUT;
				return LIS_SUCCESS;
			}
			
			if( omega.hi[0]==0.0 && omega.lo[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}
			rho_old.hi[0] = rho.hi[0];
			rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter       = iter2;
	solver->iter2      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_bicg(LIS_SOLVER solver)
{
  LIS_MATRIX A,At;
  LIS_PRECON M;
  LIS_VECTOR b,x;
  LIS_VECTOR r,rtld, z,ztld,p, ptld, q, qtld;
  LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1;
  LIS_REAL   bnrm2, nrm2, tol;
  LIS_INT iter,maxiter,n,output,conv;
  double times,ptimes;

  LIS_DEBUG_FUNC_IN;

  A       = solver->A;
  At      = solver->A;
  M       = solver->precon;
  b       = solver->b;
  x       = solver->x;
  n       = A->n;
  maxiter = solver->options[LIS_OPTIONS_MAXITER];
  output  = solver->options[LIS_OPTIONS_OUTPUT];
  conv    = solver->options[LIS_OPTIONS_CONV_COND];
  ptimes  = 0.0;

  r       = solver->work[0];
  rtld    = solver->work[1];
  z       = solver->work[2];
  ztld    = solver->work[3];
  p       = solver->work[4];
  ptld    = solver->work[5];
  q       = solver->work[2];
  qtld    = solver->work[3];
  rho_old = (LIS_SCALAR)1.0;



  /* Initial Residual */
  if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
  {
    LIS_DEBUG_FUNC_OUT;
    return LIS_SUCCESS;
  }
  tol     = solver->tol;

  lis_solver_set_shadowresidual(solver,r,rtld);

  lis_vector_set_all(0, p);
  lis_vector_set_all(0, ptld);

  for( iter=1; iter<=maxiter; iter++ )
  {
    /* z    = M^-1 * r */
    /* ztld = M^-T * rtld */
    times = lis_wtime();
    lis_psolve(solver, r, z);
    lis_psolvet(solver, rtld, ztld);
    ptimes += lis_wtime()-times;

    /* rho = <z,rtld> */
    lis_vector_dot(z,rtld,&rho);
/*    printf("rho = %e\n",rho);*/

    /* test breakdown */
    if( rho==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }

    /* beta = (rho / rho_old) */
    beta = rho / rho_old;

    /* p    = z    + beta*p    */
    /* ptld = ztld + beta*ptld */
    
    /* q    = A   * p    */
    /* qtld = A^T * ptld */
    lis_vector_xpay(z,beta,p);
    LIS_MATVEC(A,p,q);

    lis_vector_xpay(ztld,beta,ptld);
    LIS_MATVECT(At,ptld,qtld);

    
    /* tmpdot1 = <ptld,q> */
    lis_vector_dot(ptld,q,&tmpdot1);
/*    printf("tmpdot1 = %e\n",tmpdot1);*/

    /* test breakdown */
    if( tmpdot1==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }
    
    /* alpha = rho / tmpdot1 */
    alpha = rho / tmpdot1;
    
    /* x = x + alpha*p */
    lis_vector_axpy(alpha,p,x);
    
    /* r    = r    - alpha*q    */
    lis_vector_axpy(-alpha,q,r);
    
    /* convergence check */
    lis_solver_get_residual[conv](r,solver,&nrm2);

    if( output )
    {
      if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
      if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
    }

    if( tol >= nrm2 )
    {
      solver->retcode    = LIS_SUCCESS;
      solver->iter       = iter;
      solver->resid      = nrm2;
      solver->ptimes     = ptimes;
      LIS_DEBUG_FUNC_OUT;
      return LIS_SUCCESS;
    }
    
    /* rtld = rtld - alpha*qtld */
    lis_vector_axpy(-alpha,qtld,rtld);

    rho_old = rho;
  }

  solver->retcode   = LIS_MAXITER;
  solver->iter      = iter;
  solver->resid     = nrm2;
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
LIS_INT lis_bicrstab(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, p, s, ap, ms, map, ams, z;
	LIS_SCALAR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptimes  = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	s       = solver->work[2];
	ms      = solver->work[3];
	ams     = solver->work[4];
	p       = solver->work[5];
	ap      = solver->work[6];
	map     = solver->work[7];
	z       = solver->work[8];

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,p);

	LIS_MATVECT(A,p,rtld);
	times = lis_wtime();
	lis_psolve(solver, r, z);
	ptimes += lis_wtime()-times;
	lis_vector_copy(z,p);
	lis_vector_dot(rtld,z,&rho_old);
	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* ap      = A * p             */
		/* map     = M^-1 * ap         */
		/* tmpdot1 = <rtld,map>        */
		/* alpha   = rho_old / tmpdot1 */
		/* s       = r - alpha*ap      */
		LIS_MATVEC(A,p,ap);
		times = lis_wtime();
		lis_psolve(solver, ap, map);
		ptimes += lis_wtime()-times;
		lis_vector_dot(rtld,map,&tmpdot1);
		alpha = rho_old / tmpdot1;
		lis_vector_axpyz(-alpha,ap,r,s);

		/* Early check for tolerance */
		lis_solver_get_residual[conv](s,solver,&nrm2);
		if( nrm2 <= tol )
		{
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			lis_vector_axpy(alpha,p,x);
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		/* ms      = z - alpha*map     */
		/* ams     = A * ms            */
		/* tmpdot1 = <ams,s>           */
		/* tmpdot2 = <ams,ams>         */
		/* omega   = tmpdot1 / tmpdot2 */
		lis_vector_axpyz(-alpha,map,z,ms);
		LIS_MATVEC(A,ms,ams);
		lis_vector_dot(ams,s,&tmpdot1);
		lis_vector_dot(ams,ams,&tmpdot2);
		omega   = tmpdot1 / tmpdot2;

		/* x = x + alpha*p  + omega*ms  */
		/* r = s - omega*ams            */
		lis_vector_axpy(alpha,p,x);
		lis_vector_axpy(omega,ms,x);
		lis_vector_axpyz(-omega,ams,s,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		/* z   = M^-1 * r */
		/* rho = <rtld,z> */
		times = lis_wtime();
		lis_psolve(solver, r, z);
		ptimes += lis_wtime()-times;
		lis_vector_dot(rtld,z,&rho);
		if( rho==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / omega) */
		/* p    = z + beta*(p - omega*map)          */
		beta = (rho / rho_old) * (alpha / omega);
		lis_vector_axpy(-omega,map,p);
		lis_vector_xpay(z,beta,p);

		rho_old = rho;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_bicr_quad(LIS_SOLVER solver)
{
  LIS_MATRIX A,At;
  LIS_PRECON M;
  LIS_VECTOR b,x;
  LIS_VECTOR r,rtld, z,ztld,p, ptld, ap, map, az, aptld;
  LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1;
  LIS_REAL   bnrm2, nrm2, tol;
  LIS_INT iter,maxiter,n,output,conv;
  double times,ptimes;

  LIS_DEBUG_FUNC_IN;

  A       = solver->A;
  At      = solver->A;
  M       = solver->precon;
  b       = solver->b;
  x       = solver->x;
  n       = A->n;
  maxiter = solver->options[LIS_OPTIONS_MAXITER];
  output  = solver->options[LIS_OPTIONS_OUTPUT];
  conv    = solver->options[LIS_OPTIONS_CONV_COND];
  ptimes  = 0.0;

  r       = solver->work[0];
  rtld    = solver->work[1];
  z       = solver->work[2];
  ztld    = solver->work[3];
  p       = solver->work[4];
  ptld    = solver->work[5];
  ap      = solver->work[6];
  az      = solver->work[7];
  map     = solver->work[8];
  aptld   = solver->work[9];

  LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
  LIS_QUAD_SCALAR_MALLOC(beta,1,1);
  LIS_QUAD_SCALAR_MALLOC(rho,2,1);
  LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
  LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);

  /* Initial Residual */
  if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
  {
    LIS_DEBUG_FUNC_OUT;
    return LIS_SUCCESS;
  }
  tol     = solver->tol;

  lis_solver_set_shadowresidual(solver,r,rtld);

  lis_psolve(solver, r, z);
  lis_psolvet(solver, rtld, ztld);
  lis_vector_copyex_mm(z,p);
  lis_vector_copyex_mm(ztld,ptld);
  LIS_MATVEC(A,z,ap);
  lis_vector_dotex_mmm(ap,ztld,&rho_old);

  for( iter=1; iter<=maxiter; iter++ )
  {
    /* aptld = A^T * ptld */
    /* map   = M^-1 * ap  */
    LIS_MATVECT(A,ptld,aptld);
    times = lis_wtime();
    lis_psolve(solver, ap, map);
    ptimes += lis_wtime()-times;

    /* tmpdot1 = <map,aptld> */
    lis_vector_dotex_mmm(map,aptld,&tmpdot1);
    /* test breakdown */
    if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }

    /* alpha = rho_old / tmpdot1 */
    /* x     = x + alpha*p   */
    /* r     = r - alpha*ap  */
    lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi);
    lis_vector_axpyex_mmm(alpha,p,x);
    lis_quad_minus((LIS_QUAD *)alpha.hi);
    lis_vector_axpyex_mmm(alpha,ap,r);
    /* convergence check */
    lis_solver_get_residual[conv](r,solver,&nrm2);

    if( output )
    {
      if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
      if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
    }

    if( tol >= nrm2 )
    {
      solver->retcode    = LIS_SUCCESS;
      solver->iter       = iter;
      solver->resid      = nrm2;
      solver->ptimes     = ptimes;
      LIS_DEBUG_FUNC_OUT;
      return LIS_SUCCESS;
    }
    
    /* rtld = rtld - alpha*aptld */
    /* z    = z - alpha*map      */
    /* ztld = M^-T * rtld        */
    /* az   = A * z              */
    /* rho = <az,ztld>           */
    lis_vector_axpyex_mmm(alpha,aptld,rtld);
    lis_vector_axpyex_mmm(alpha,map,z);
    times = lis_wtime();
    lis_psolvet(solver, rtld, ztld);
    ptimes += lis_wtime()-times;
    LIS_MATVEC(A,z,az);
    lis_vector_dotex_mmm(az,ztld,&rho);

    /* test breakdown */
    if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
    {
      solver->retcode   = LIS_BREAKDOWN;
      solver->iter      = iter;
      solver->resid     = nrm2;
      LIS_DEBUG_FUNC_OUT;
      return LIS_BREAKDOWN;
    }

    /* beta = rho / rho_old    */
    /* p    = z    + beta*p    */
    /* ptld = ztld + beta*ptld */
    /* ap   = az   + beta*ap   */
    lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
    lis_vector_xpayex_mmm(z,beta,p);
    lis_vector_xpayex_mmm(ztld,beta,ptld);
    lis_vector_xpayex_mmm(az,beta,ap);

    rho_old.hi[0] = rho.hi[0];
    rho_old.lo[0] = rho.lo[0];
  }

  solver->retcode   = LIS_MAXITER;
  solver->iter      = iter;
  solver->resid     = nrm2;
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
LIS_INT lis_ecg(LIS_ESOLVER esolver)
{
  LIS_MATRIX        A;
  LIS_VECTOR        x;
  LIS_SCALAR        evalue;
  LIS_INT               emaxiter;
  LIS_REAL          tol;
  LIS_INT               iter,iter3,nsolver,i,j,output;
  LIS_INT               nprocs,my_rank;
  LIS_REAL          nrm2,resid,resid3;
  LIS_SCALAR        lshift;
  LIS_VECTOR        b,D,r,w,p,Aw,Ax,Ap,ones,Ds;
  LIS_SCALAR        *SA, *SB, *SW, *v3, *SAv3, *SBv3, *z3, *q3, *SBz3, evalue3, ievalue3;
  LIS_SOLVER        solver;
  LIS_PRECON        precon;
  LIS_MATRIX        A0;
  LIS_VECTOR        x0,z,q;
  double	    times,itimes,ptimes,p_c_times,p_i_times;
  LIS_INT           nsol, precon_type;
  char              solvername[128], preconname[128];

  A = esolver->A;
  x = esolver->x;
  
  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];
  lshift = esolver->lshift;

  if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
  if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);

  SA = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SA");
  SB = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SB");
  SW = (LIS_SCALAR *)lis_malloc(3*3*sizeof(LIS_SCALAR), "lis_ecg::SW");
  v3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::v3");
  SAv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SAv3");
  SBv3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBv3");
  SBz3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::SBz3");
  z3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::z3");
  q3 = (LIS_SCALAR *)lis_malloc(3*sizeof(LIS_SCALAR), "lis_ecg::q3");

  b = esolver->work[0];
  D = esolver->work[1];
  Ds = esolver->work[2];
  r = esolver->work[3];
  w = esolver->work[4];
  p = esolver->work[5];
  Aw = esolver->work[6];
  Ax = esolver->work[7];
  Ap = esolver->work[8];

  lis_vector_set_all(1.0,b);
  lis_vector_nrm2(b, &nrm2);
  lis_vector_scale(1/nrm2, b);
  lis_solver_create(&solver);
  lis_solver_set_option("-i bicg -p ilu",solver);
  lis_solver_set_optionC(solver);
  lis_solver_get_solver(solver, &nsol);
  lis_solver_get_precon(solver, &precon_type);
  lis_get_solvername(nsol, solvername);
  lis_get_preconname(precon_type, preconname);
  printf("solver     : %s %d\n", solvername, nsol);
  printf("precon     : %s %d\n", preconname, precon_type);
  lis_solve(A, b, x, solver);
  lis_vector_copy(b,Ax);

  lis_vector_nrm2(x, &nrm2);
  lis_vector_set_all(0.0,p);
  lis_vector_set_all(0.0,Ap);

  lis_precon_create(solver, &precon);
  solver->precon = precon;

  iter=0;

  while (iter<emaxiter)
    {
      iter = iter + 1;

      lis_vector_dot(x,Ax,&evalue);
      lis_vector_axpyz(-(evalue),x,Ax,r); 
      lis_vector_nrm2(r, &nrm2);
      resid = fabs(nrm2/(evalue));

      if( output )
	{
	  if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid;
	  if( output & LIS_EPRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, resid);
	}

      if (resid<tol) break;  

      lis_psolve(solver, x, w);
      lis_vector_copy(x,Aw);
      lis_vector_nrm2(w, &nrm2);

      lis_vector_dot(w,Aw,&SA[0]);
      lis_vector_dot(x,Aw,&SA[3]);
      lis_vector_dot(p,Aw,&SA[6]);
      SA[1] = SA[3];
      lis_vector_dot(x,Ax,&SA[4]);
      lis_vector_dot(p,Ax,&SA[7]);
      SA[2] = SA[6];
      SA[5] = SA[7];
      lis_vector_dot(p,Ap,&SA[8]);

      lis_vector_dot(w,w,&SB[0]);
      lis_vector_dot(x,w,&SB[3]);
      lis_vector_dot(p,w,&SB[6]);
      SB[1] = SB[3];
      lis_vector_dot(x,x,&SB[4]);
      lis_vector_dot(p,x,&SB[7]);
      SB[2] = SB[6];
      SB[5] = SB[7];
      lis_vector_dot(p,p,&SB[8]);
      
      lis_array_set_all(3, 1.0, v3);

      iter3=0;
      while (iter3<emaxiter)
	{
	  iter3 = iter3 + 1;
	  lis_array_nrm2(3, v3, &nrm2); 
	  lis_array_scale(3, 1/nrm2, v3);
	  lis_array_matvec(3, SB, v3, SBv3, LIS_INS_VALUE);
	  lis_array_invvec(3, SA, SBv3, z3);
	  lis_array_dot2(3, SBv3, z3, &ievalue3);
	  if (ievalue3==0) 
	    {
	      printf("ievalue3 is zero\n");
	      lis_precon_destroy(precon);
	      lis_solver_destroy(solver);
	      esolver->iter       = iter;
	      esolver->resid      = resid;
	      esolver->evalue[0] = evalue;

	      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
	      lis_free(SA);
	      lis_free(SB);
	      lis_free(SW);
	      lis_free(v3);
	      lis_free(SAv3);
	      lis_free(SBv3);
	      lis_free(SBz3);
	      lis_free(z3);
	      lis_free(q3);
	      return LIS_BREAKDOWN;
	    }
	  lis_array_axpyz(3, -ievalue3, SBv3, z3, q3);
	  lis_array_nrm2(3, q3, &resid3); 
	  resid3 = fabs(resid3 / ievalue3);
	  if (resid3<1e-12) break;   
	  lis_array_copy(3,z3,v3);
	}

      evalue3 = 1 / ievalue3;
      
      lis_vector_scale(v3[0],w);  
      lis_vector_axpy(v3[2],p,w);
      lis_vector_xpay(w,v3[1],x);
      lis_vector_copy(w,p);
      
      lis_vector_scale(v3[0],Aw);  
      lis_vector_axpy(v3[2],Ap,Aw);
      lis_vector_xpay(Aw,v3[1],Ax);
      lis_vector_copy(Aw,Ap);
      
      lis_vector_nrm2(x,&nrm2);
      lis_vector_scale(1/nrm2,x);
      lis_vector_scale(1/nrm2,Ax);
      
      lis_vector_nrm2(p,&nrm2);
      lis_vector_scale(1/nrm2,p);
      lis_vector_scale(1/nrm2,Ap);
      
      lis_solver_get_timeex(solver,&times,&itimes,&ptimes,&p_c_times,&p_i_times);
      esolver->ptimes += solver->ptimes;
      esolver->itimes += solver->itimes;
      esolver->p_c_times += solver->p_c_times;
      esolver->p_i_times += solver->p_i_times;

    }

  lis_precon_destroy(precon);
  lis_solver_destroy(solver);

  esolver->iter       = iter;
  esolver->resid      = resid;
  esolver->evalue[0] = evalue;

  if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);

  lis_free(SA);
  lis_free(SB);
  lis_free(SW);
  lis_free(v3);
  lis_free(SAv3);
  lis_free(SBv3);
  lis_free(SBz3);
  lis_free(z3);
  lis_free(q3);

  if (resid<tol) 
    {
      esolver->retcode = LIS_SUCCESS;
      return LIS_SUCCESS;
    }
  else
    {
      esolver->retcode = LIS_MAXITER;
      return LIS_MAXITER;
    }
}
示例#21
0
LIS_INT lis_gmres(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s,z,*v;
	LIS_SCALAR *h;
	LIS_SCALAR aa,bb,rr,a2,b2,t;
	LIS_REAL tnrm2;

	LIS_REAL bnrm2,nrm2,tol;
	LIS_INT iter,maxiter,n,output;
	double time,ptime;

	LIS_REAL rnorm;
	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	h_dim   = m+1;
	ptime   = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = solver->work[2];
	v       = &solver->work[3];

	h       = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR)*(h_dim+1)*(h_dim+2),"lis_gmres::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;

	/* r = M^-1 * (b - A * x) */
	lis_matvec(A,x,z);
	lis_vector_xpay(b,-1.0,z);
	lis_psolve(solver,z,v[0]);
	
	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2(v[0],&rnorm);
		lis_vector_scale(1.0/rnorm,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_all(0,s);
		s->value[0] = rnorm;

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;


			/* z = M^-1 * v */
			time = lis_wtime();
			lis_psolve(solver,v[iiv],z);
			ptime += lis_wtime()-time;

			/* w = A * z */
			lis_matvec(A,z,v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>          */
				/* w        = w - h[k,i] * v[k] */
				lis_vector_dot(v[i1v],v[k],&t);
				h[k+iih] = t;
				lis_vector_axpy(-t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2(v[i1v],&tnrm2);
			h[i1+iih] = tnrm2;
			lis_vector_scale(1.0/tnrm2,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  =  k-1;
				t   =  h[jj+iih];
				aa  =  h[jj+cs]*t;
				aa +=  h[jj+sn]*h[k+iih];
				bb  = -h[jj+sn]*t;
				bb +=  h[jj+cs]*h[k+iih];
				h[jj+iih] = aa;
				h[k+iih] = bb;
			}
			aa = h[ii+iih];
			bb = h[i1+iih];
			a2 = aa*aa;
			b2 = bb*bb;
			rr = sqrt(a2+b2);
			if( rr==0.0 ) rr=1.0e-17;
			h[ii+cs] = aa/rr;
			h[ii+sn] = bb/rr;
			s->value[i1] = -h[ii+sn]*s->value[ii];
			s->value[ii] =  h[ii+cs]*s->value[ii];

			aa  =  h[ii+cs]*h[ii+iih];
			aa +=  h[ii+sn]*h[i1+iih];
			h[ii+iih] = aa;

			/* convergence check */
			nrm2 = sabs(s->value[i1])*bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H * Y = S for upper Hessenberg matrix H */
		s->value[ii] = s->value[ii]/h[ii+iih];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t  = s->value[jj];
			for(j=jj+1;j<=ii;j++)
			{
				t -= h[jj+j*h_dim]*s->value[j];
			}
			s->value[jj] = t/h[jj+jj*h_dim];
		}
		/* z = z + y * v */
		#ifdef _OPENMP
		#pragma omp parallel for private(k)
		#endif
		for(k=0;k<n;k++)
		{
			z->value[k] = s->value[0]*v[0]->value[k];
		}
		for(j=1;j<=ii;j++)
		{
			lis_vector_axpy(s->value[j],v[j],z);
		}

		/* r = M^-1 * z */
		time = lis_wtime();
		lis_psolve(solver,z,r);
		ptime += lis_wtime()-time;

		/* x = x + r */
		lis_vector_axpy(1,r,x);

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			s->value[jj-1] = -h[jj-1+sn]*s->value[jj];
			s->value[jj]   =  h[jj-1+cs]*s->value[jj];
		}

		for(j=0;j<=i1;j++)
		{
			t = s->value[j];
			if( j==0 ) t = t-1.0;
			lis_vector_axpy(t,v[j],v[0]);
		}
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_fgmres(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s, *z, *v;
	LIS_SCALAR *h;
	LIS_SCALAR aa,bb,rr,a2,b2,t;

	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_REAL   rnorm;
	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,i1h,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	h_dim   = m+1;
	ptimes  = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = &solver->work[2];
	v       = &solver->work[m+2];

	h       = (LIS_SCALAR *)lis_malloc( sizeof(LIS_SCALAR) * (h_dim+1) * (h_dim+2),"lis_gmres::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;
	rnorm   = 1.0 / bnrm2;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_scale(bnrm2,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_all(0,s);
		s->value[0] = rnorm;

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;
			i1h = i*h_dim;


			/* z = M^-1 v */
			times = lis_wtime();
			lis_psolve(solver, v[iiv], z[iiv]);
			ptimes += lis_wtime()-times;

			/* v = Az */
			LIS_MATVEC(A,z[iiv], v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>       */
				/* w        = w - h[k,i]v[k] */
				lis_vector_dot(v[i1v],v[k],&t);
				h[k + iih] = t;
				lis_vector_axpy(-t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2(v[i1v],&t);
			h[i1 + iih] = t;
			lis_vector_scale(1.0/t,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  = k-1;
				t   =  h[jj + iih];
				aa  =  h[jj + cs]*t;
				aa +=  h[jj + sn]*h[k  + iih];
				bb  = -h[jj + sn]*t;
				bb +=  h[jj + cs]*h[k  + iih];
				h[jj + iih] = aa;
				h[k  + iih] = bb;
			}
			aa = h[ii + iih];
			bb = h[i1 + iih];
			a2 = aa*aa;
			b2 = bb*bb;
			rr = sqrt(a2 + b2);
			if( rr==0.0 ) rr=1.0e-17;
			h[ii + cs] = aa / rr;
			h[ii + sn] = bb / rr;
			s->value[i1] = -h[ii + sn]*s->value[ii];
			s->value[ii] =  h[ii + cs]*s->value[ii];

			aa  =  h[ii + cs]*h[ii + iih];
			aa +=  h[ii + sn]*h[i1 + iih];
			h[ii   + iih] = aa;

			/* convergence check */
			nrm2 = fabs(s->value[i1]);

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H*Y =S for upper triangular H */
		s->value[ii] = s->value[ii] / h[ii + iih];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t  = s->value[jj];
			for(j=jj+1;j<=ii;j++)
			{
				t -= h[jj + j*h_dim]*s->value[j];
			}
			s->value[jj] = t / h[jj + jj*h_dim];
		}
		/* x = x + zy */
		for(j=0;j<=ii;j++)
		{
			lis_vector_axpy(s->value[j],z[j],x);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		LIS_MATVEC(A,x,v[0]);
		lis_vector_xpay(b,-1.0,v[0]);
		lis_vector_nrm2(v[0],&rnorm);
		bnrm2 = 1.0 / rnorm;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#23
0
LIS_INT lis_cgs_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat;
	LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one;
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = solver->work[2];
	phat    = solver->work[3];
	q       = solver->work[4];
	qhat    = solver->work[5];
	u       = solver->work[5];
	uhat    = solver->work[6];
	vhat    = solver->work[6];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(one,6,1);
	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0]   = 1.0;
	alpha.lo[0]   = 0.0;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, q);
	lis_vector_set_allex_nm(0.0, p);


	for( iter=1; iter<=maxiter; iter++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dotex_mmm(rtld,r,&rho);

		/* test breakdown */
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) */
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);

		/* u = r + beta*q */
		lis_vector_axpyzex_mmmm(beta,q,r,u);

		/* p = u + beta*(q + beta*p) */
		lis_vector_xpayex_mmm(q,beta,p);
		lis_vector_xpayex_mmm(u,beta,p);
		
		/* phat = M^-1 * p */
		time = lis_wtime();
		lis_psolve(solver, p, phat);
		ptime += lis_wtime()-time;

		/* v = A * phat */
		lis_matvec(A,phat,vhat);
		
		/* tmpdot1 = <rtld,vhat> */
		lis_vector_dotex_mmm(rtld,vhat,&tmpdot1);
		/* test breakdown */
		if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		
		/* alpha = rho / tmpdot1 */
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi);
		
		/* q = u - alpha*vhat */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyzex_mmmm(alpha,vhat,u,q);

		/* phat = u + q          */
		/* uhat = M^-1 * (u + q) */
		lis_vector_axpyzex_mmmm(one,u,q,phat);
		time = lis_wtime();
		lis_psolve(solver, phat, uhat);
		ptime += lis_wtime()-time;

		/* x = x + alpha*uhat */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,uhat,x);

		/* qhat = A * uhat */
		lis_matvec(A,uhat,qhat);

		/* r = r - alpha*qhat */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,qhat,r);

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}
		
		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_gmres_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,s, z, *v;
	LIS_QUAD *h;
	LIS_QUAD_PTR aa,bb,rr,a2,b2,t,one,tmp;
	LIS_QUAD_PTR rnorm;

	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_INT i,j,k,m;
	LIS_INT ii,i1,iiv,i1v,iih,i1h,jj;
	LIS_INT h_dim;
	LIS_INT cs,sn;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	m       = solver->options[LIS_OPTIONS_RESTART];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	h_dim   = m+1;
	ptimes  = 0.0;

	s       = solver->work[0];
	r       = solver->work[1];
	z       = solver->work[2];
	v       = &solver->work[3];

	LIS_QUAD_SCALAR_MALLOC(aa,0,1);
	LIS_QUAD_SCALAR_MALLOC(bb,1,1);
	LIS_QUAD_SCALAR_MALLOC(rr,2,1);
	LIS_QUAD_SCALAR_MALLOC(a2,3,1);
	LIS_QUAD_SCALAR_MALLOC(b2,4,1);
	LIS_QUAD_SCALAR_MALLOC(t,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(one,7,1);
	LIS_QUAD_SCALAR_MALLOC(rnorm,8,1);

	h       = (LIS_QUAD *)lis_malloc( sizeof(LIS_QUAD) * (h_dim+1) * (h_dim+2),"lis_gmres_quad::h" );
	cs      = (m+1)*h_dim;
	sn      = (m+2)*h_dim;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,v[0],&bnrm2) )
	{
		lis_free(h);
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;


	iter=0;
	while( iter<maxiter )
	{
		/* first column of V */
		/* v = r / ||r||_2 */
		lis_vector_nrm2ex_mm(v[0],&rnorm);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)rnorm.hi);
		lis_vector_scaleex_mm(tmp,v[0]);

		/* s = ||r||_2 e_1 */
		lis_vector_set_allex_nm(0.0,s);
		s->value[0]    = rnorm.hi[0];
		s->value_lo[0] = rnorm.lo[0];

		i = 0;
		do
		{
			iter++;
			i++;
			ii  = i-1;
			i1  = i;
			iiv = i-1;
			i1v = i;
			iih = (i-1)*h_dim;
			i1h = i*h_dim;


			/* z = M^-1 v */
			times = lis_wtime();
			lis_psolve(solver, v[iiv], z);
			ptimes += lis_wtime()-times;

			/* v = Az */
			LIS_MATVEC(A,z, v[i1v]);

			for(k=0;k<i;k++)
			{
				/* h[k,i]   = <w,v[k]>       */
				/* w        = w - h[k,i]v[k] */
				lis_vector_dotex_mmm(v[i1v],v[k],&t);
				h[k + iih].hi = t.hi[0];
				h[k + iih].lo = t.lo[0];
				lis_quad_minus((LIS_QUAD *)t.hi);
				lis_vector_axpyex_mmm(t,v[k],v[i1v]);
			}
			/* h[i+1,i] = ||w||          */
			/* v[i+1]   = w / h[i+1,i]   */
			lis_vector_nrm2ex_mm(v[i1v],&t);
			h[i1 + iih].hi = t.hi[0];
			h[i1 + iih].lo = t.lo[0];
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)one.hi,(LIS_QUAD *)t.hi);
			lis_vector_scaleex_mm(tmp,v[i1v]);

			for(k=1;k<=ii;k++)
			{
				jj  = k-1;
				t.hi[0]   =  h[jj + iih].hi;
				t.lo[0]   =  h[jj + iih].lo;
				lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)t.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
				lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[jj+sn],(LIS_QUAD *)t.hi);
				lis_quad_minus((LIS_QUAD *)bb.hi);
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj+cs],(LIS_QUAD *)&h[k+iih]);
				lis_quad_add((LIS_QUAD *)bb.hi,(LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi);
				h[jj + iih].hi = aa.hi[0];
				h[jj + iih].lo = aa.lo[0];
				h[k  + iih].hi = bb.hi[0];
				h[k  + iih].lo = bb.lo[0];
			}
			aa.hi[0] = h[ii + iih].hi;
			aa.lo[0] = h[ii + iih].lo;
			bb.hi[0] = h[i1 + iih].hi;
			bb.lo[0] = h[i1 + iih].lo;
			lis_quad_sqr((LIS_QUAD *)a2.hi,(LIS_QUAD *)aa.hi);
			lis_quad_sqr((LIS_QUAD *)b2.hi,(LIS_QUAD *)bb.hi);
			lis_quad_add((LIS_QUAD *)rr.hi,(LIS_QUAD *)a2.hi,(LIS_QUAD *)b2.hi);
			lis_quad_sqrt((LIS_QUAD *)rr.hi,(LIS_QUAD *)rr.hi);
			if( rr.hi[0]==0.0 )
			{
				rr.hi[0]=1.0e-17;
				rr.lo[0]=0.0;
			}
			lis_quad_div((LIS_QUAD *)&h[ii + cs],(LIS_QUAD *)aa.hi,(LIS_QUAD *)rr.hi);
			lis_quad_div((LIS_QUAD *)&h[ii + sn],(LIS_QUAD *)bb.hi,(LIS_QUAD *)rr.hi);
			tmp.hi[0] = s->value[ii];
			tmp.lo[0] = s->value_lo[ii];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii + sn],(LIS_QUAD *)tmp.hi);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)&h[ii + cs],(LIS_QUAD *)tmp.hi);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[i1] = aa.hi[0];
			s->value_lo[i1] = aa.lo[0];
			s->value[ii] = bb.hi[0];
			s->value_lo[ii] = bb.lo[0];

			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)&h[ii+cs],(LIS_QUAD *)&h[ii+iih]);
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii+sn],(LIS_QUAD *)&h[i1+iih]);
			lis_quad_add((LIS_QUAD *)aa.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi);
			h[ii   + iih].hi = aa.hi[0];
			h[ii   + iih].lo = aa.lo[0];

			/* convergence check */
			nrm2 = fabs(s->value[i1]) * bnrm2;

			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			if( tol >= nrm2 ) break;
		} while( i<m && iter <maxiter );

		/* Solve H*Y =S for upper triangular H */
		tmp.hi[0] = s->value[ii];
		tmp.lo[0] = s->value_lo[ii];
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[ii + iih]);
		s->value[ii] = tmp.hi[0];
		s->value_lo[ii] = tmp.lo[0];
		for(k=1;k<=ii;k++)
		{
			jj = ii-k;
			t.hi[0]  = s->value[jj];
			t.lo[0]  = s->value_lo[jj];
			for(j=jj+1;j<=ii;j++)
			{
				tmp.hi[0] = s->value[j];
				tmp.lo[0] = s->value_lo[j];
				lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj + j*h_dim]);
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)tmp.hi);
			}
			lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)&h[jj + jj*h_dim]);
			s->value[jj] = tmp.hi[0];
			s->value_lo[jj] = tmp.lo[0];
		}
		/* x = x + yv */
		for(k=0;k<n;k++)
		{
			aa.hi[0] = s->value[0];
			aa.lo[0] = s->value_lo[0];
			bb.hi[0] = v[0]->value[k];
			bb.lo[0] = v[0]->value_lo[k];
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)aa.hi,(LIS_QUAD *)bb.hi);
			z->value[k] = tmp.hi[0];
			z->value_lo[k] = tmp.lo[0];
		}
		for(j=1;j<=ii;j++)
		{
			aa.hi[0] = s->value[j];
			aa.lo[0] = s->value_lo[j];
			lis_vector_axpyex_mmm(aa,v[j],z);
		}
		/* r = M^-1 z */
		times = lis_wtime();
		lis_psolve(solver, z, r);
		ptimes += lis_wtime()-times;

		/* x = x + r */
		lis_vector_axpyex_mmm(one,r,x);

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->iter2      = 0;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			lis_free(h);
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		for(j=1;j<=i;j++)
		{
			jj = i1-j+1;
			tmp.hi[0] = s->value[jj];
			tmp.lo[0] = s->value_lo[jj];
			lis_quad_mul((LIS_QUAD *)aa.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1 + sn]);
			lis_quad_mul((LIS_QUAD *)bb.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)&h[jj-1 + cs]);
			lis_quad_minus((LIS_QUAD *)aa.hi);
			s->value[jj-1] = aa.hi[0];
			s->value_lo[jj-1] = aa.lo[0];
			s->value[jj] = bb.hi[0];
			s->value_lo[jj] = bb.lo[0];
		}

		for(j=0;j<=i1;j++)
		{
			t.hi[0] = s->value[j];
			t.lo[0] = s->value_lo[j];
			if( j==0 )
			{
				lis_quad_sub((LIS_QUAD *)t.hi,(LIS_QUAD *)t.hi,(LIS_QUAD *)one.hi);
			}
			lis_vector_axpyex_mmm(t,v[j],v[0]);
		}
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter+1;
	solver->iter2     = 0;
	solver->resid     = nrm2;
	lis_free(h);
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_cgs(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat;
	LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptimes  = 0.0;

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = solver->work[2];
	phat    = solver->work[3];
	q       = solver->work[4];
	qhat    = solver->work[5];
	u       = solver->work[5];
	uhat    = solver->work[6];
	vhat    = solver->work[6];
	alpha   = (LIS_SCALAR)1.0;
	rho_old = (LIS_SCALAR)1.0;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_all(0,q);
	lis_vector_set_all(0,p);

	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dot(rtld,r,&rho);

		/* test breakdown */
		if( rho==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) */
		beta = (rho / rho_old);

		/* u = r + beta*q */
		lis_vector_axpyz(beta,q,r,u);

		/* p = u + beta*(q + beta*p) */
		lis_vector_xpay(q,beta,p);
		lis_vector_xpay(u,beta,p);
		
		/* phat = M^-1 * p */
		times = lis_wtime();
		lis_psolve(solver, p, phat);
		ptimes += lis_wtime()-times;

		/* v = A * phat */
		LIS_MATVEC(A,phat,vhat);
		
		/* tmpdot1 = <rtld,vhat> */
		lis_vector_dot(rtld,vhat,&tmpdot1);
		/* test breakdown */
		if( tmpdot1==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		
		/* alpha = rho / tmpdot1 */
		alpha = rho / tmpdot1;
		
		/* q = u - alpha*vhat */
		lis_vector_axpyz(-alpha,vhat,u,q);

		/* phat = u + q          */
		/* uhat = M^-1 * (u + q) */
		lis_vector_axpyz(1,u,q,phat);
		times = lis_wtime();
		lis_psolve(solver, phat, uhat);
		ptimes += lis_wtime()-times;

		/* x = x + alpha*uhat */
		lis_vector_axpy(alpha,uhat,x);

		/* qhat = A * uhat */
		LIS_MATVEC(A,uhat,qhat);

		/* r = r - alpha*qhat */
		lis_vector_axpy(-alpha,qhat,r);

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
		}
		
		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		rho_old = rho;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
示例#26
0
LIS_INT lis_jacobi(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR b,x;
	LIS_VECTOR d,r,t,s;
	LIS_REAL bnrm2,nrm2,tol;
	LIS_INT iter,maxiter,output;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	b       = solver->b;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	tol     = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	ptime   = 0.0;

	r       = solver->work[0];
	t       = solver->work[1];
	s       = solver->work[2];
	d       = solver->work[3];

	lis_vector_nrm2(b,&bnrm2);
	bnrm2   = 1.0 / bnrm2;

	lis_matrix_get_diagonal(A,d);
	lis_vector_reciprocal(d);

	for( iter=1; iter<=maxiter; iter++ )
	{
		/* x += D^{-1}(b - Ax) */
		time = lis_wtime();
		lis_psolve(solver,x,s);
		ptime += lis_wtime() - time;
		lis_matvec(A,s,t);
/*		lis_matvec(A,x,t);*/
		lis_vector_axpyz(-1,t,b,r);
		lis_vector_nrm2(r,&nrm2);
		lis_vector_pmul(r,d,r);
		lis_vector_axpy(1,r,x);

		/* convergence check */
		nrm2 = nrm2 * bnrm2;

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol >= nrm2 )
		{
			time = lis_wtime();
			lis_psolve(solver,x,s);
			ptime += lis_wtime() - time;
			lis_vector_copy(s,x);
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
	}

	lis_psolve(solver,x,s);
	lis_vector_copy(s,x);
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_crs_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, p, q, u, z, ap, map, uq, auq;
	LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptimes  = 0.0;

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = solver->work[2];
	z       = solver->work[3];
	u       = solver->work[3];
	uq      = solver->work[3];
	q       = solver->work[4];
	ap      = solver->work[4];
	map     = solver->work[5];
	auq     = solver->work[5];
	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(one,6,1);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,p);

	LIS_MATVECT(A,p,rtld);
	lis_vector_set_allex_nm(0.0,q);
	lis_vector_set_allex_nm(0.0,p);
	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* z   = M^-1 * r  */
		/* rho = <rtld,z>  */
		times = lis_wtime();
		lis_psolve(solver, r, z);
		ptimes += lis_wtime()-times;
		lis_vector_dotex_mmm(rtld,z,&rho);

		/* test breakdown */
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta    = rho / rho_old         */
		/* u       = z + beta*q            */
		/* p       = u + beta*(q + beta*p) */
		/* ap      = A * p                 */
		/* map     = M^-1 * ap             */
		/* tmpdot1 = <rtld,map>            */
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_vector_axpyzex_mmmm(beta,q,z,u);
		lis_vector_xpayex_mmm(q,beta,p);
		lis_vector_xpayex_mmm(u,beta,p);
		LIS_MATVEC(A,p,ap);
		times = lis_wtime();
		lis_psolve(solver, ap, map);
		ptimes += lis_wtime()-times;
		lis_vector_dotex_mmm(rtld,map,&tmpdot1);
		/* test breakdown */
		if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		
		/* alpha = rho / tmpdot1 */
		/* q     = u - alpha*map */
		/* uq    = u + q         */
		/* auq   = A * uq        */
		/* x     = x + alpha*uq  */
		/* r     = r - alpha*auq */
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyzex_mmmm(alpha,map,u,q);
		lis_vector_axpyzex_mmmm(one,u,q,uq);
		LIS_MATVEC(A,uq,auq);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,uq,x);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,auq,r);

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
			if( output & LIS_PRINT_OUT ) printf("iter: %5d  residual = %e\n", iter, nrm2);
		}
		
		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
LIS_INT lis_esi_quad(LIS_ESOLVER esolver)
{
  LIS_MATRIX        A;
  LIS_VECTOR        x, Ax;
  LIS_SCALAR        xAx, xx, mu, lshift;
  LIS_INT               ss;
  LIS_INT               emaxiter;
  LIS_REAL          tol;
  LIS_INT               i,j,k;
  LIS_SCALAR        evalue,dotvr;
  LIS_INT               iter,giter,output,niesolver;
  LIS_INT               nprocs,my_rank;
  LIS_REAL          nrm2,dot,resid,resid0;
  LIS_QUAD_PTR      qdot_vv, qdot_vr;
  LIS_VECTOR        *v,r,q;
  LIS_SOLVER        solver;
  LIS_PRECON        precon;
  double	    times,itimes,ptimes,p_c_times,p_i_times;
  LIS_INT		    err;
  LIS_INT           nsol, precon_type;
  char              solvername[128], preconname[128];

  LIS_DEBUG_FUNC_IN;

  A = esolver->A;
  x = esolver->x;

  ss = esolver->options[LIS_EOPTIONS_SUBSPACE];
  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  lshift = esolver->lshift;
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];
  niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER];

  r = esolver->work[0];
  q = esolver->work[1];
  v = &esolver->work[2];
  Ax = esolver->work[3];

  LIS_QUAD_SCALAR_MALLOC(qdot_vv,0,1);
  LIS_QUAD_SCALAR_MALLOC(qdot_vr,1,1);

  lis_vector_set_all(1.0,r);
  lis_vector_nrm2(r, &nrm2);
  lis_vector_scale(1/nrm2,r);
	  
  switch ( niesolver )
    {
    case LIS_ESOLVER_II:
      lis_solver_create(&solver);
      lis_solver_set_option("-i bicg -p ilu -precision quad",solver);
      lis_solver_set_optionC(solver);
      lis_solver_get_solver(solver, &nsol);
      lis_solver_get_precon(solver, &precon_type);
      lis_get_solvername(nsol, solvername);
      lis_get_preconname(precon_type, preconname);
      printf("solver     : %s %d\n", solvername, nsol);
      printf("precon     : %s %d\n", preconname, precon_type);
      if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
      if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
      break;
    case LIS_ESOLVER_AII:
      lis_solver_create(&solver);
      lis_solver_set_option("-i bicg -p ilu -precision quad",solver);
      lis_solver_set_optionC(solver);
      lis_solver_get_solver(solver, &nsol);
      lis_solver_get_precon(solver, &precon_type);
      lis_get_solvername(nsol, solvername);
      lis_get_preconname(precon_type, preconname);
      printf("solver     : %s %d\n", solvername, nsol);
      printf("precon     : %s %d\n", preconname, precon_type);
      if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
      if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
      lis_vector_set_all(1.0,q);
      lis_solve(A, q, x, solver);
      lis_precon_create(solver, &precon);
      solver->precon = precon;
      break;
    case LIS_ESOLVER_RQI:
      lis_solver_create(&solver);
      lis_solver_set_option("-p ilu -precision quad -maxiter 10",solver);
      lis_solver_set_optionC(solver);
      lis_solver_get_solver(solver, &nsol);
      lis_solver_get_precon(solver, &precon_type);
      lis_get_solvername(nsol, solvername);
      lis_get_preconname(precon_type, preconname);
      printf("solver     : %s %d\n", solvername, nsol);
      printf("precon     : %s %d\n", preconname, precon_type);
      if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
      if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
      break;
    }

  giter=0;
  j=0;
  while (j<ss)
    {
      lis_vector_duplicate(A,&esolver->evector[j]); 
      j = j+1;
      lis_vector_copy(r, v[j]);

      if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI)
	{
	  /* create preconditioner */
	  solver->A = A;
	  err = lis_precon_create(solver, &precon);
	  if( err )
	    {
	      lis_solver_work_destroy(solver);
	      solver->retcode = err;
	      return err;
	    }
	}

      if (niesolver==LIS_ESOLVER_RQI)
	{
	  lis_vector_nrm2(x, &nrm2);
	  lis_vector_scale(1/nrm2, x);
	  lis_matvec(A, x, Ax);
	  lis_vector_dot(x, Ax, &xAx);
	  lis_vector_dot(x, x, &xx);
	  mu = xAx / xx;
	}

      iter = 0;
      while (iter<emaxiter)
	{
	  /* diagonalization */
	  iter = iter+1;
	  giter = giter+1;
	  for (k=1;k<j;k++)
	    { 
	      lis_vector_dotex_mmm(v[j], v[k], &qdot_vv);
	      lis_quad_minus((LIS_QUAD *)qdot_vv.hi);
	      lis_vector_axpyex_mmm(qdot_vv,v[k],v[j]);
	    }

	  switch( niesolver )
	    {
	    case LIS_ESOLVER_PI:
	      lis_matvec(A,v[j],r); 
	      break;
	    case LIS_ESOLVER_II:
	      lis_solve_kernel(A, v[j], r, solver, precon);
	      break;
	    case LIS_ESOLVER_AII:
	      lis_psolve(solver, v[j], r); 
	      break;
	    case LIS_ESOLVER_RQI:
	      lis_vector_nrm2(v[j], &nrm2);
	      lis_vector_scale(1/nrm2, v[j]);
	      lis_matrix_shift_diagonal(A, -mu);
	      lis_solve_kernel(A, v[j], r, solver, precon);
	      lis_matrix_shift_diagonal(A, mu);
	      break;
	    }

	  if ( j==1 && ( niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_AII || niesolver==LIS_ESOLVER_RQI ))
	    {
	      lis_solver_get_timeex(solver,&times,&itimes,&ptimes,&p_c_times,&p_i_times);
	      esolver->ptimes += solver->ptimes;
	      esolver->itimes += solver->itimes;
	      esolver->p_c_times += solver->p_c_times;
	      esolver->p_i_times += solver->p_i_times;
	    }

	  lis_vector_nrm2(r, &nrm2);
	  lis_vector_dotex_mmm(v[j], r, &qdot_vr);
	  lis_quad_minus((LIS_QUAD *)qdot_vr.hi);
	  lis_vector_axpyzex_mmmm(qdot_vr,v[j],r,q);
	  lis_quad_minus((LIS_QUAD *)qdot_vr.hi);	  
	  dotvr = qdot_vr.hi[0];
	  mu = mu + 1/dotvr;

	  lis_vector_nrm2(q, &resid);
	  resid = fabs(resid / dotvr);
	  lis_vector_scale(1/nrm2,r);
	  lis_vector_copy(r, v[j]);
	  if ( j==1 ) 
	    {
	      if( output & LIS_PRINT_MEM ) esolver->residual[iter] = resid; 
	      if( output & LIS_PRINT_OUT ) printf("iter: %5d  residual = %e\n", iter, resid);
	      esolver->iter = iter;
	      esolver->resid = resid;
	    }
	  if (tol>resid) break;
	}

      if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI)
	{
	  lis_precon_destroy(precon);
	}

      switch ( niesolver )
	{
	case LIS_ESOLVER_PI:
	  esolver->evalue[j-1] = dotvr;
	  break;
	case LIS_ESOLVER_II:
	  esolver->evalue[j-1] = 1/dotvr;
	  break;
	case LIS_ESOLVER_AII:
	  esolver->evalue[j-1] = 1/dotvr;
	  break;
	case LIS_ESOLVER_RQI:
	  esolver->evalue[j-1] = mu;
	  break;
	}
      lis_vector_copy(v[j], esolver->evector[j-1]);  

      if (A->my_rank==0 && ss>1)
	{
#ifdef _LONGLONG
	  printf("Subspace: mode number              = %lld\n", j-1);
#else
	  printf("Subspace: mode number              = %d\n", j-1);
#endif
	  printf("Subspace: eigenvalue               = %e\n", esolver->evalue[j-1]);
#ifdef _LONGLONG
	  printf("Subspace: number of iterations     = %lld\n",iter);
#else
	  printf("Subspace: number of iterations     = %d\n",iter);
#endif
	  printf("Subspace: relative residual 2-norm = %e\n",resid);
	}
    }
  
  lis_vector_copy(esolver->evector[esolver->options[LIS_EOPTIONS_MODE]], esolver->x);

  switch ( niesolver )
    {
    case LIS_ESOLVER_II:
      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
      lis_solver_destroy(solver);
      break;
    case LIS_ESOLVER_AII:
      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
      lis_precon_destroy(precon);
      lis_solver_destroy(solver);
      break;
    case LIS_ESOLVER_RQI:
      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
      lis_solver_destroy(solver);
      break;
    }

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
示例#29
0
LIS_INT lis_minres(LIS_SOLVER solver)
{
  LIS_Comm comm;  
  LIS_MATRIX A;
  LIS_VECTOR b,x;
  LIS_VECTOR v1,v2,v3,v4,w0,w1,w2;
  LIS_REAL nrm2,tol;
  LIS_SCALAR alpha;
  LIS_REAL beta2,beta3;
  LIS_SCALAR gamma1,gamma2,gamma3;
  LIS_SCALAR delta,eta;
  LIS_SCALAR sigma1,sigma2,sigma3;
  LIS_SCALAR rho1,rho2,rho3;
  LIS_REAL r0_euc,r_euc; 
  LIS_INT iter,maxiter,output;
  double time,ptime;

  LIS_DEBUG_FUNC_IN;

  comm = LIS_COMM_WORLD;
  
  A       = solver->A;
  b       = solver->b;
  x       = solver->x;
  tol     = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
  maxiter = solver->options[LIS_OPTIONS_MAXITER];
  output  = solver->options[LIS_OPTIONS_OUTPUT];
  ptime   = 0.0;

  v1       = solver->work[0];
  v2       = solver->work[1];
  v3       = solver->work[2];
  v4       = solver->work[3];
  w0       = solver->work[4];
  w1       = solver->work[5];
  w2       = solver->work[6];

  /* Lanczos algorithm */
  lis_matvec(A,x,v2); 
  lis_vector_xpay(b,-1.0,v2);

  time = lis_wtime();
  lis_psolve(solver,v2,v3);
  ptime += lis_wtime()-time;
  lis_vector_copy(v3,v2);

  /* Compute elements of Hermitian tridiagonal matrix */
  lis_vector_nrm2(v2,&r_euc); 
  eta = beta2 = r0_euc = r_euc; 
  gamma2 = gamma1 = 1.0; 
  sigma2 = sigma1 = 0.0;

  lis_vector_set_all(0.0,v1); 
  lis_vector_set_all(0.0,w0); 
  lis_vector_set_all(0.0,w1);

  nrm2 = r_euc / r0_euc; 

  for(iter=1;iter<=maxiter;iter++)
    {

      /* Lanczos algorithm */
      lis_vector_scale(1.0 / beta2,v2); 

      lis_matvec(A,v2,v3); 
      time = lis_wtime();

      lis_psolve(solver,v3,v4);
      ptime += lis_wtime()-time;

      lis_vector_dot(v2,v4,&alpha);
      lis_vector_axpy(-alpha,v2,v4);
      lis_vector_axpy(-beta2,v1,v4);
      lis_vector_nrm2(v4,&beta3);

      /* Compute elements of Hermitian tridiagonal matrix */
      delta = gamma2 * alpha - gamma1 * sigma2 * beta2;
      rho1 = sqrt(delta * delta + beta3 * beta3); 
      rho2 = sigma2 * alpha + gamma1 * gamma2 * beta2; 
      rho3 = sigma1 * beta2;
      gamma3 = delta / rho1; 
      sigma3 = beta3 / rho1;

      lis_vector_axpyz(-rho3,w0,v2,w2); 
      lis_vector_axpy(-rho2,w1,w2); 
      lis_vector_scale(1.0 / rho1,w2);

      lis_vector_axpy(gamma3 * eta,w2,x);

      /* convergence check */
      r_euc *= fabs(sigma3);
      nrm2 = r_euc / r0_euc;
      
      if( output )
	{
	  if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
	  if( output & LIS_PRINT_OUT ) lis_print_rhistory(comm,iter,nrm2);
	}
      
      if( nrm2 <= tol )
	{ 
	  solver->retcode    = LIS_SUCCESS;
	  solver->iter       = iter;
	  solver->resid      = nrm2;
	  solver->ptime      = ptime;
	  LIS_DEBUG_FUNC_OUT;
	  return LIS_SUCCESS;
	}

      eta *= -sigma3;

      lis_vector_copy(v2,v1); 
      lis_vector_copy(v4,v2);
      lis_vector_copy(w1,w0); 
      lis_vector_copy(w2,w1);

      beta2 = beta3;
      gamma1 = gamma2; 
      gamma2 = gamma3; 
      sigma1 = sigma2; 
      sigma2 = sigma3;

    }

  lis_vector_destroy(v1);
  lis_vector_destroy(v2); 
  lis_vector_destroy(v3);
  lis_vector_destroy(v4);
  lis_vector_destroy(w0); 
  lis_vector_destroy(w1); 
  lis_vector_destroy(w2);

  solver->retcode   = LIS_MAXITER;
  solver->iter      = iter;
  solver->resid     = nrm2;
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
示例#30
0
LIS_INT lis_bicr(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r,rtld, z,ztld,p, ptld, ap, map, az, aptld;
	LIS_SCALAR alpha, beta, rho, rho_old, tmpdot1;
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	r       = solver->work[0];
	rtld    = solver->work[1];
	z       = solver->work[2];
	ztld    = solver->work[3];
	p       = solver->work[4];
	ptld    = solver->work[5];
	ap      = solver->work[6];
	az      = solver->work[7];
	map     = solver->work[8];
	aptld   = solver->work[9];



	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_psolve(solver, r, z);
	lis_psolvet(solver, rtld, ztld);
	lis_vector_copy(z,p);
	lis_vector_copy(ztld,ptld);
	lis_matvec(A,z,ap);
	lis_vector_dot(ap,ztld,&rho_old);

	for( iter=1; iter<=maxiter; iter++ )
	{
		/* aptld = A^T * ptld */
		/* map   = M^-1 * ap  */
		lis_matvect(A,ptld,aptld);
		time = lis_wtime();
		lis_psolve(solver, ap, map);
		ptime += lis_wtime()-time;

		/* tmpdot1 = <map,aptld> */
		lis_vector_dot(map,aptld,&tmpdot1);
		/* test breakdown */
		if( tmpdot1==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* alpha = rho_old / tmpdot1 */
		/* x     = x + alpha*p   */
		/* r     = r - alpha*ap  */
		alpha = rho_old / tmpdot1;
		lis_vector_axpy(alpha,p,x);
		lis_vector_axpy(-alpha,ap,r);
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);

		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		/* rtld = rtld - alpha*aptld */
		/* z    = z - alpha*map      */
		/* ztld = M^-T * rtld        */
		/* az   = A * z              */
		/* rho = <az,ztld>           */
		lis_vector_axpy(-alpha,aptld,rtld);
		lis_vector_axpy(-alpha,map,z);
		time = lis_wtime();
		lis_psolvet(solver, rtld, ztld);
		ptime += lis_wtime()-time;
		lis_matvec(A,z,az);
		lis_vector_dot(az,ztld,&rho);

		/* test breakdown */
		if( rho==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = rho / rho_old    */
		/* p    = z    + beta*p    */
		/* ptld = ztld + beta*ptld */
		/* ap   = az   + beta*ap   */
		beta = rho / rho_old;
		lis_vector_xpay(z,beta,p);
		lis_vector_xpay(ztld,beta,ptld);
		lis_vector_xpay(az,beta,ap);

		rho_old = rho;
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}