コード例 #1
0
LIS_INT lis_crs_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, p, q, u, z, ap, map, uq, auq;
	LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptimes  = 0.0;

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = solver->work[2];
	z       = solver->work[3];
	u       = solver->work[3];
	uq      = solver->work[3];
	q       = solver->work[4];
	ap      = solver->work[4];
	map     = solver->work[5];
	auq     = solver->work[5];
	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(one,6,1);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,p);

	LIS_MATVECT(A,p,rtld);
	lis_vector_set_allex_nm(0.0,q);
	lis_vector_set_allex_nm(0.0,p);
	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* z   = M^-1 * r  */
		/* rho = <rtld,z>  */
		times = lis_wtime();
		lis_psolve(solver, r, z);
		ptimes += lis_wtime()-times;
		lis_vector_dotex_mmm(rtld,z,&rho);

		/* test breakdown */
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta    = rho / rho_old         */
		/* u       = z + beta*q            */
		/* p       = u + beta*(q + beta*p) */
		/* ap      = A * p                 */
		/* map     = M^-1 * ap             */
		/* tmpdot1 = <rtld,map>            */
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_vector_axpyzex_mmmm(beta,q,z,u);
		lis_vector_xpayex_mmm(q,beta,p);
		lis_vector_xpayex_mmm(u,beta,p);
		LIS_MATVEC(A,p,ap);
		times = lis_wtime();
		lis_psolve(solver, ap, map);
		ptimes += lis_wtime()-times;
		lis_vector_dotex_mmm(rtld,map,&tmpdot1);
		/* test breakdown */
		if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		
		/* alpha = rho / tmpdot1 */
		/* q     = u - alpha*map */
		/* uq    = u + q         */
		/* auq   = A * uq        */
		/* x     = x + alpha*uq  */
		/* r     = r - alpha*auq */
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyzex_mmmm(alpha,map,u,q);
		lis_vector_axpyzex_mmmm(one,u,q,uq);
		LIS_MATVEC(A,uq,auq);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,uq,x);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,auq,r);

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
			if( output & LIS_PRINT_OUT ) printf("iter: %5d  residual = %e\n", iter, nrm2);
		}
		
		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #2
0
ファイル: lis_solver_cgs.c プロジェクト: florianl/lis
LIS_INT lis_cgs_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat;
	LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one;
	LIS_REAL bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,output,conv;
	double time,ptime;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptime   = 0.0;

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = solver->work[2];
	phat    = solver->work[3];
	q       = solver->work[4];
	qhat    = solver->work[5];
	u       = solver->work[5];
	uhat    = solver->work[6];
	vhat    = solver->work[6];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(one,6,1);
	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0]   = 1.0;
	alpha.lo[0]   = 0.0;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, q);
	lis_vector_set_allex_nm(0.0, p);


	for( iter=1; iter<=maxiter; iter++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dotex_mmm(rtld,r,&rho);

		/* test breakdown */
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) */
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);

		/* u = r + beta*q */
		lis_vector_axpyzex_mmmm(beta,q,r,u);

		/* p = u + beta*(q + beta*p) */
		lis_vector_xpayex_mmm(q,beta,p);
		lis_vector_xpayex_mmm(u,beta,p);
		
		/* phat = M^-1 * p */
		time = lis_wtime();
		lis_psolve(solver, p, phat);
		ptime += lis_wtime()-time;

		/* v = A * phat */
		lis_matvec(A,phat,vhat);
		
		/* tmpdot1 = <rtld,vhat> */
		lis_vector_dotex_mmm(rtld,vhat,&tmpdot1);
		/* test breakdown */
		if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}
		
		/* alpha = rho / tmpdot1 */
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi);
		
		/* q = u - alpha*vhat */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyzex_mmmm(alpha,vhat,u,q);

		/* phat = u + q          */
		/* uhat = M^-1 * (u + q) */
		lis_vector_axpyzex_mmmm(one,u,q,phat);
		time = lis_wtime();
		lis_psolve(solver, phat, uhat);
		ptime += lis_wtime()-time;

		/* x = x + alpha*uhat */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,uhat,x);

		/* qhat = A * uhat */
		lis_matvec(A,uhat,qhat);

		/* r = r - alpha*qhat */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,qhat,r);

		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}
		
		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #3
0
LIS_INT lis_cgs_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, p,phat, q, qhat, u, uhat, vhat;
	LIS_QUAD_PTR alpha, beta, rho, rho_old, tmpdot1, one;
	LIS_REAL   bnrm2, nrm2, tol, tol2;
	LIS_INT iter,maxiter,n,output,conv;
	LIS_INT iter2,maxiter2;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	ptimes  = 0.0;

	r       = solver->work[0];
	rtld    = solver->work[1];
	p       = solver->work[2];
	phat    = solver->work[3];
	q       = solver->work[4];
	qhat    = solver->work[5];
	u       = solver->work[5];
	uhat    = solver->work[6];
	vhat    = solver->work[6];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(one,6,1);
	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0]   = 1.0;
	alpha.lo[0]   = 0.0;
	one.hi[0]   = 1.0;
	one.lo[0]   = 0.0;

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, q);
	lis_vector_set_allex_nm(0.0, p);

	uhat->precision = LIS_PRECISION_DEFAULT;
	p->precision = LIS_PRECISION_DEFAULT;
	phat->precision = LIS_PRECISION_DEFAULT;

	for( iter=1; iter<=maxiter2; iter++ )
	{
			/* rho = <rtld,r> */
			lis_vector_dot(rtld,r,&rho.hi[0]);

			/* test breakdown */
			if( rho.hi[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter      = iter;
				solver->iter2     = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}

			/* beta = (rho / rho_old) */
			beta.hi[0] = (rho.hi[0] / rho_old.hi[0]);

			/* u = r + beta*q */
			lis_vector_axpyz(beta.hi[0],q,r,u);

			/* p = u + beta*(q + beta*p) */
			lis_vector_xpay(q,beta.hi[0],p);
			lis_vector_xpay(u,beta.hi[0],p);
			
			/* phat = M^-1 * p */
			times = lis_wtime();
			lis_psolve(solver, p, phat);
			ptimes += lis_wtime()-times;

			/* v = A * phat */
			LIS_MATVEC(A,phat,vhat);
			
			/* tmpdot1 = <rtld,vhat> */
			lis_vector_dot(rtld,vhat,&tmpdot1.hi[0]);
			/* test breakdown */
			if( tmpdot1.hi[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter      = iter;
				solver->iter2     = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}
			
			/* alpha = rho / tmpdot1 */
			alpha.hi[0] = rho.hi[0] / tmpdot1.hi[0];
			
			/* q = u - alpha*vhat */
			lis_vector_axpyz(-alpha.hi[0],vhat,u,q);

			/* phat = u + q          */
			/* uhat = M^-1 * (u + q) */
			lis_vector_axpyz(1.0,u,q,phat);
			times = lis_wtime();
			lis_psolve(solver, phat, uhat);
			ptimes += lis_wtime()-times;

			/* x = x + alpha*uhat */
			lis_vector_axpy(alpha.hi[0],uhat,x);

			/* qhat = A * uhat */
			LIS_MATVEC(A,uhat,qhat);

			/* r = r - alpha*qhat */
			lis_vector_axpy(-alpha.hi[0],qhat,r);

			/* convergence check */
			lis_solver_get_residual[conv](r,solver,&nrm2);
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			if( nrm2 <= tol2 )
			{
				solver->iter       = iter;
				solver->iter2     = iter;
				solver->ptimes     = ptimes;
				break;
			}
			
			rho_old.hi[0] = rho.hi[0];
	}

	uhat->precision = LIS_PRECISION_QUAD;
	p->precision = LIS_PRECISION_QUAD;
	phat->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);
	rho_old.hi[0] = 1.0;

	lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2);
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, q);
	lis_vector_set_allex_nm(0.0, p);


	for( iter2=iter+1; iter2<=maxiter; iter2++ )
	{
			/* rho = <rtld,r> */
			lis_vector_dotex_mmm(rtld,r,&rho);

			/* test breakdown */
			if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}

			/* beta = (rho / rho_old) */
			lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);

			/* u = r + beta*q */
			lis_vector_axpyzex_mmmm(beta,q,r,u);

			/* p = u + beta*(q + beta*p) */
			lis_vector_xpayex_mmm(q,beta,p);
			lis_vector_xpayex_mmm(u,beta,p);
			
			/* phat = M^-1 * p */
			times = lis_wtime();
			lis_psolve(solver, p, phat);
			ptimes += lis_wtime()-times;

			/* v = A * phat */
			LIS_MATVEC(A,phat,vhat);
			
			/* tmpdot1 = <rtld,vhat> */
			lis_vector_dotex_mmm(rtld,vhat,&tmpdot1);
			/* test breakdown */
			if( tmpdot1.hi[0]==0.0 && tmpdot1.lo[0]==0.0 )
			{
				solver->retcode   = LIS_BREAKDOWN;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid     = nrm2;
				LIS_DEBUG_FUNC_OUT;
				return LIS_BREAKDOWN;
			}
			
			/* alpha = rho / tmpdot1 */
			lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot1.hi);
			
			/* q = u - alpha*vhat */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyzex_mmmm(alpha,vhat,u,q);

			/* phat = u + q          */
			/* uhat = M^-1 * (u + q) */
			lis_vector_axpyzex_mmmm(one,u,q,phat);
			times = lis_wtime();
			lis_psolve(solver, phat, uhat);
			ptimes += lis_wtime()-times;

			/* x = x + alpha*uhat */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,uhat,x);

			/* qhat = A * uhat */
			LIS_MATVEC(A,uhat,qhat);

			/* r = r - alpha*qhat */
			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,qhat,r);

			/* convergence check */
			lis_solver_get_residual[conv](r,solver,&nrm2);
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter2] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter2, nrm2);
			}
			
			if( tol > nrm2 )
			{
				solver->retcode    = LIS_SUCCESS;
				solver->iter       = iter2;
				solver->iter2      = iter;
				solver->resid      = nrm2;
				solver->ptimes     = ptimes;
				LIS_DEBUG_FUNC_OUT;
				return LIS_SUCCESS;
			}
			
			rho_old.hi[0] = rho.hi[0];
			rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter2;
	solver->iter2     = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #4
0
LIS_INT lis_esi_quad(LIS_ESOLVER esolver)
{
  LIS_MATRIX        A;
  LIS_VECTOR        x, Ax;
  LIS_SCALAR        xAx, xx, mu, lshift;
  LIS_INT               ss;
  LIS_INT               emaxiter;
  LIS_REAL          tol;
  LIS_INT               i,j,k;
  LIS_SCALAR        evalue,dotvr;
  LIS_INT               iter,giter,output,niesolver;
  LIS_INT               nprocs,my_rank;
  LIS_REAL          nrm2,dot,resid,resid0;
  LIS_QUAD_PTR      qdot_vv, qdot_vr;
  LIS_VECTOR        *v,r,q;
  LIS_SOLVER        solver;
  LIS_PRECON        precon;
  double	    times,itimes,ptimes,p_c_times,p_i_times;
  LIS_INT		    err;
  LIS_INT           nsol, precon_type;
  char              solvername[128], preconname[128];

  LIS_DEBUG_FUNC_IN;

  A = esolver->A;
  x = esolver->x;

  ss = esolver->options[LIS_EOPTIONS_SUBSPACE];
  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  lshift = esolver->lshift;
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];
  niesolver = esolver->options[LIS_EOPTIONS_INNER_ESOLVER];

  r = esolver->work[0];
  q = esolver->work[1];
  v = &esolver->work[2];
  Ax = esolver->work[3];

  LIS_QUAD_SCALAR_MALLOC(qdot_vv,0,1);
  LIS_QUAD_SCALAR_MALLOC(qdot_vr,1,1);

  lis_vector_set_all(1.0,r);
  lis_vector_nrm2(r, &nrm2);
  lis_vector_scale(1/nrm2,r);
	  
  switch ( niesolver )
    {
    case LIS_ESOLVER_II:
      lis_solver_create(&solver);
      lis_solver_set_option("-i bicg -p ilu -precision quad",solver);
      lis_solver_set_optionC(solver);
      lis_solver_get_solver(solver, &nsol);
      lis_solver_get_precon(solver, &precon_type);
      lis_get_solvername(nsol, solvername);
      lis_get_preconname(precon_type, preconname);
      printf("solver     : %s %d\n", solvername, nsol);
      printf("precon     : %s %d\n", preconname, precon_type);
      if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
      if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
      break;
    case LIS_ESOLVER_AII:
      lis_solver_create(&solver);
      lis_solver_set_option("-i bicg -p ilu -precision quad",solver);
      lis_solver_set_optionC(solver);
      lis_solver_get_solver(solver, &nsol);
      lis_solver_get_precon(solver, &precon_type);
      lis_get_solvername(nsol, solvername);
      lis_get_preconname(precon_type, preconname);
      printf("solver     : %s %d\n", solvername, nsol);
      printf("precon     : %s %d\n", preconname, precon_type);
      if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
      if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
      lis_vector_set_all(1.0,q);
      lis_solve(A, q, x, solver);
      lis_precon_create(solver, &precon);
      solver->precon = precon;
      break;
    case LIS_ESOLVER_RQI:
      lis_solver_create(&solver);
      lis_solver_set_option("-p ilu -precision quad -maxiter 10",solver);
      lis_solver_set_optionC(solver);
      lis_solver_get_solver(solver, &nsol);
      lis_solver_get_precon(solver, &precon_type);
      lis_get_solvername(nsol, solvername);
      lis_get_preconname(precon_type, preconname);
      printf("solver     : %s %d\n", solvername, nsol);
      printf("precon     : %s %d\n", preconname, precon_type);
      if( A->my_rank==0 ) printf("local shift = %e\n", lshift);
      if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
      break;
    }

  giter=0;
  j=0;
  while (j<ss)
    {
      lis_vector_duplicate(A,&esolver->evector[j]); 
      j = j+1;
      lis_vector_copy(r, v[j]);

      if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI)
	{
	  /* create preconditioner */
	  solver->A = A;
	  err = lis_precon_create(solver, &precon);
	  if( err )
	    {
	      lis_solver_work_destroy(solver);
	      solver->retcode = err;
	      return err;
	    }
	}

      if (niesolver==LIS_ESOLVER_RQI)
	{
	  lis_vector_nrm2(x, &nrm2);
	  lis_vector_scale(1/nrm2, x);
	  lis_matvec(A, x, Ax);
	  lis_vector_dot(x, Ax, &xAx);
	  lis_vector_dot(x, x, &xx);
	  mu = xAx / xx;
	}

      iter = 0;
      while (iter<emaxiter)
	{
	  /* diagonalization */
	  iter = iter+1;
	  giter = giter+1;
	  for (k=1;k<j;k++)
	    { 
	      lis_vector_dotex_mmm(v[j], v[k], &qdot_vv);
	      lis_quad_minus((LIS_QUAD *)qdot_vv.hi);
	      lis_vector_axpyex_mmm(qdot_vv,v[k],v[j]);
	    }

	  switch( niesolver )
	    {
	    case LIS_ESOLVER_PI:
	      lis_matvec(A,v[j],r); 
	      break;
	    case LIS_ESOLVER_II:
	      lis_solve_kernel(A, v[j], r, solver, precon);
	      break;
	    case LIS_ESOLVER_AII:
	      lis_psolve(solver, v[j], r); 
	      break;
	    case LIS_ESOLVER_RQI:
	      lis_vector_nrm2(v[j], &nrm2);
	      lis_vector_scale(1/nrm2, v[j]);
	      lis_matrix_shift_diagonal(A, -mu);
	      lis_solve_kernel(A, v[j], r, solver, precon);
	      lis_matrix_shift_diagonal(A, mu);
	      break;
	    }

	  if ( j==1 && ( niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_AII || niesolver==LIS_ESOLVER_RQI ))
	    {
	      lis_solver_get_timeex(solver,&times,&itimes,&ptimes,&p_c_times,&p_i_times);
	      esolver->ptimes += solver->ptimes;
	      esolver->itimes += solver->itimes;
	      esolver->p_c_times += solver->p_c_times;
	      esolver->p_i_times += solver->p_i_times;
	    }

	  lis_vector_nrm2(r, &nrm2);
	  lis_vector_dotex_mmm(v[j], r, &qdot_vr);
	  lis_quad_minus((LIS_QUAD *)qdot_vr.hi);
	  lis_vector_axpyzex_mmmm(qdot_vr,v[j],r,q);
	  lis_quad_minus((LIS_QUAD *)qdot_vr.hi);	  
	  dotvr = qdot_vr.hi[0];
	  mu = mu + 1/dotvr;

	  lis_vector_nrm2(q, &resid);
	  resid = fabs(resid / dotvr);
	  lis_vector_scale(1/nrm2,r);
	  lis_vector_copy(r, v[j]);
	  if ( j==1 ) 
	    {
	      if( output & LIS_PRINT_MEM ) esolver->residual[iter] = resid; 
	      if( output & LIS_PRINT_OUT ) printf("iter: %5d  residual = %e\n", iter, resid);
	      esolver->iter = iter;
	      esolver->resid = resid;
	    }
	  if (tol>resid) break;
	}

      if (niesolver==LIS_ESOLVER_II || niesolver==LIS_ESOLVER_RQI)
	{
	  lis_precon_destroy(precon);
	}

      switch ( niesolver )
	{
	case LIS_ESOLVER_PI:
	  esolver->evalue[j-1] = dotvr;
	  break;
	case LIS_ESOLVER_II:
	  esolver->evalue[j-1] = 1/dotvr;
	  break;
	case LIS_ESOLVER_AII:
	  esolver->evalue[j-1] = 1/dotvr;
	  break;
	case LIS_ESOLVER_RQI:
	  esolver->evalue[j-1] = mu;
	  break;
	}
      lis_vector_copy(v[j], esolver->evector[j-1]);  

      if (A->my_rank==0 && ss>1)
	{
#ifdef _LONGLONG
	  printf("Subspace: mode number              = %lld\n", j-1);
#else
	  printf("Subspace: mode number              = %d\n", j-1);
#endif
	  printf("Subspace: eigenvalue               = %e\n", esolver->evalue[j-1]);
#ifdef _LONGLONG
	  printf("Subspace: number of iterations     = %lld\n",iter);
#else
	  printf("Subspace: number of iterations     = %d\n",iter);
#endif
	  printf("Subspace: relative residual 2-norm = %e\n",resid);
	}
    }
  
  lis_vector_copy(esolver->evector[esolver->options[LIS_EOPTIONS_MODE]], esolver->x);

  switch ( niesolver )
    {
    case LIS_ESOLVER_II:
      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
      lis_solver_destroy(solver);
      break;
    case LIS_ESOLVER_AII:
      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
      lis_precon_destroy(precon);
      lis_solver_destroy(solver);
      break;
    case LIS_ESOLVER_RQI:
      if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
      lis_solver_destroy(solver);
      break;
    }

  LIS_DEBUG_FUNC_OUT;
  return LIS_SUCCESS;
}
コード例 #5
0
LIS_INT lis_bicrstab_quad(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_PRECON M;
	LIS_VECTOR b,x;
	LIS_VECTOR r,rtld, p, s, ap, ms, map, ams, z;
	LIS_QUAD_PTR alpha, beta, omega, rho, rho_old, tmpdot1, tmpdot2;
	LIS_REAL   bnrm2, nrm2, tol;
	LIS_INT iter,maxiter,n,output,conv;
	double times,ptimes;

	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	M       = solver->precon;
	b       = solver->b;
	x       = solver->x;
	n       = A->n;
	maxiter = solver->options[LIS_OPTIONS_MAXITER];
	output  = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	ptimes  = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	s       = solver->work[2];
	ms      = solver->work[3];
	ams     = solver->work[4];
	p       = solver->work[5];
	ap      = solver->work[6];
	map     = solver->work[7];
	z       = solver->work[8];
	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot1,4,1);
	LIS_QUAD_SCALAR_MALLOC(omega,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot2,7,1);

	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,p);

	LIS_MATVECT(A,p,rtld);
	times = lis_wtime();
	lis_psolve(solver, r, z);
	ptimes += lis_wtime()-times;
	lis_vector_copyex_mm(z,p);
	lis_vector_dotex_mmm(rtld,z,&rho_old);
	
	for( iter=1; iter<=maxiter; iter++ )
	{
		/* ap      = A * p             */
		/* map     = M^-1 * ap         */
		/* tmpdot1 = <rtld,map>        */
		/* alpha   = rho_old / tmpdot1 */
		/* s       = r - alpha*ap      */
		LIS_MATVEC(A,p,ap);
		times = lis_wtime();
		lis_psolve(solver, ap, map);
		ptimes += lis_wtime()-times;
		lis_vector_dotex_mmm(rtld,map,&tmpdot1);
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho_old.hi,(LIS_QUAD *)tmpdot1.hi);
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyzex_mmmm(alpha,ap,r,s);

		/* Early check for tolerance */
		lis_solver_get_residual[conv](s,solver,&nrm2);
		if( nrm2 <= tol )
		{
			if( output )
			{
				if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
				if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
			}

			lis_quad_minus((LIS_QUAD *)alpha.hi);
			lis_vector_axpyex_mmm(alpha,p,x);
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		/* ms      = z - alpha*map     */
		/* ams     = A * ms            */
		/* tmpdot1 = <ams,s>           */
		/* tmpdot2 = <ams,ams>         */
		/* omega   = tmpdot1 / tmpdot2 */
		lis_vector_axpyzex_mmmm(alpha,map,z,ms);
		LIS_MATVEC(A,ms,ams);
		lis_vector_dotex_mmm(ams,s,&tmpdot1);
		lis_vector_dotex_mmm(ams,ams,&tmpdot2);
		lis_quad_div((LIS_QUAD *)omega.hi,(LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)tmpdot2.hi);

		/* x = x + alpha*p  + omega*ms  */
		/* r = s - omega*ams            */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyex_mmm(alpha,p,x);
		lis_vector_axpyex_mmm(omega,ms,x);
		lis_quad_minus((LIS_QUAD *)omega.hi);
		lis_vector_axpyzex_mmmm(omega,ams,s,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->residual[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) printf("iter: %5d  residual = %e\n", iter, nrm2);
		}

		if( tol >= nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter;
			solver->resid      = nrm2;
			solver->ptimes     = ptimes;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}
		
		/* z   = M^-1 * r */
		/* rho = <rtld,z> */
		times = lis_wtime();
		lis_psolve(solver, r, z);
		ptimes += lis_wtime()-times;
		lis_vector_dotex_mmm(rtld,z,&rho);
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / omega) */
		/* p    = z + beta*(p - omega*map)          */
		lis_quad_minus((LIS_QUAD *)omega.hi);
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_quad_div((LIS_QUAD *)tmpdot1.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)omega.hi);
		lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmpdot1.hi);
		lis_quad_minus((LIS_QUAD *)omega.hi);
		lis_vector_axpyex_mmm(omega,map,p);
		lis_vector_xpayex_mmm(z,beta,p);

		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}

	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #6
0
ファイル: lis_solver_bicgsafe.c プロジェクト: florianl/lis
LIS_INT lis_bicgsafe_switch(LIS_SOLVER solver)
{
	LIS_MATRIX A;
	LIS_VECTOR x;
	LIS_VECTOR r, rtld, rhat, p, ptld, phat;
	LIS_VECTOR t, ttld, that, t0, t0hat;
	LIS_VECTOR y, w, u, z;
	LIS_QUAD_PTR alpha, beta, rho, rho_old;
	LIS_QUAD_PTR qsi, eta, one;
	LIS_QUAD_PTR tmp, tmpdot[5];
	LIS_REAL bnrm2, nrm2, tol, tol2;
	LIS_INT iter,maxiter,output,conv;
	LIS_INT iter2,maxiter2;
	double time,ptime;


	LIS_DEBUG_FUNC_IN;

	A       = solver->A;
	x       = solver->x;
	maxiter  = solver->options[LIS_OPTIONS_MAXITER];
	maxiter2 = solver->options[LIS_OPTIONS_SWITCH_MAXITER];
	output   = solver->options[LIS_OPTIONS_OUTPUT];
	conv    = solver->options[LIS_OPTIONS_CONV_COND];
	tol      = solver->params[LIS_PARAMS_RESID-LIS_OPTIONS_LEN];
	tol2     = solver->params[LIS_PARAMS_SWITCH_RESID-LIS_OPTIONS_LEN];
	ptime   = 0.0;

	rtld    = solver->work[0];
	r       = solver->work[1];
	rhat    = solver->work[2];
	p       = solver->work[3];
	ptld    = solver->work[4];
	phat    = solver->work[5];
	t       = solver->work[6];
	ttld    = solver->work[7];
	that    = solver->work[8];
	t0      = solver->work[9];
	t0hat   = solver->work[10];
	y       = solver->work[11];
	w       = solver->work[12];
	u       = solver->work[13];
	z       = solver->work[14];

	LIS_QUAD_SCALAR_MALLOC(alpha,0,1);
	LIS_QUAD_SCALAR_MALLOC(beta,1,1);
	LIS_QUAD_SCALAR_MALLOC(rho,2,1);
	LIS_QUAD_SCALAR_MALLOC(rho_old,3,1);
	LIS_QUAD_SCALAR_MALLOC(qsi,4,1);
	LIS_QUAD_SCALAR_MALLOC(eta,5,1);
	LIS_QUAD_SCALAR_MALLOC(tmp,6,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[0],7,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[1],8,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[2],9,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[3],10,1);
	LIS_QUAD_SCALAR_MALLOC(tmpdot[4],11,1);
	LIS_QUAD_SCALAR_MALLOC(one,13,1);

	rho_old.hi[0] = 1.0;
	rho_old.lo[0] = 0.0;
	alpha.hi[0] = 1.0;
	alpha.lo[0] = 0.0;
	qsi.hi[0] = 1.0;
	qsi.lo[0] = 0.0;
	one.hi[0] = -1.0;
	one.lo[0] = 0.0;


	/* Initial Residual */
	if( lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2) )
	{
		LIS_DEBUG_FUNC_OUT;
		return LIS_SUCCESS;
	}
	tol2     = solver->tol_switch;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, ttld);
	lis_vector_set_allex_nm(0.0, ptld);
	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, u);
	lis_vector_set_allex_nm(0.0, t);
	lis_vector_set_allex_nm(0.0, t0);

	for( iter=1; iter<=maxiter2; iter++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dot(rtld,r,&rho.hi[0]);

		/* test breakdown */
		if( rho.hi[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter;
			solver->iter2     = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		beta.hi[0] = (rho.hi[0] / rho_old.hi[0]) * (alpha.hi[0] / qsi.hi[0]);

		/* w = ttld + beta*ptld */
		lis_vector_axpyz(beta.hi[0],ptld,ttld,w);

		/* rhat = M^-1 * r */
		time = lis_wtime();
		lis_psolve(solver, r, rhat);
		ptime += lis_wtime()-time;

		/* p = rhat + beta*(p - u) */
		lis_vector_axpy(-1,u,p);
		lis_vector_xpay(rhat,beta.hi[0],p);
		
		/* ptld = A * p */
		lis_matvec(A,p,ptld);

		/* tmpdot[0] = <rtld,ptld> */
		lis_vector_dot(rtld,ptld,&tmpdot[0].hi[0]);
		/* test breakdown */
		/* */
		
		/* alpha = rho / tmpdot[0] */
		alpha.hi[0] = rho.hi[0] / tmpdot[0].hi[0];

		/* y = t - r + alpha*(-w + ptld) */
		lis_vector_axpyz(-1,w,ptld,y);
		lis_vector_xpay(t,alpha.hi[0],y);
		lis_vector_axpy(-1,r,y);

		/* t = r - alpha*ptld */
		lis_vector_axpyz(-alpha.hi[0],ptld,r,t);

		/* that  = M^-1 * t */
		/* phat  = M^-1 * ptld */
		/* t0hat = M^-1 * t0 */
		time = lis_wtime();
		lis_psolve(solver, t, that);
		lis_psolve(solver, ptld, phat);
		lis_psolve(solver, t0, t0hat);
		ptime += lis_wtime()-time;

		/* ttld = A * that */
		lis_matvec(A,that,ttld);

		/* tmpdot[0] = <y,y>       */
		/* tmpdot[1] = <ttld,t>    */
		/* tmpdot[2] = <y,t>       */
		/* tmpdot[3] = <ttld,y>    */
		/* tmpdot[4] = <ttld,ttld> */
		lis_vector_dot(y,y,&tmpdot[0].hi[0]);
		lis_vector_dot(ttld,t,&tmpdot[1].hi[0]);
		lis_vector_dot(y,t,&tmpdot[2].hi[0]);
		lis_vector_dot(ttld,y,&tmpdot[3].hi[0]);
		lis_vector_dot(ttld,ttld,&tmpdot[4].hi[0]);
		if(iter==1)
		{
			qsi.hi[0] = tmpdot[1].hi[0] / tmpdot[4].hi[0];
			eta.hi[0] = 0.0;
		}
		else
		{
			tmp.hi[0] = tmpdot[4].hi[0]*tmpdot[0].hi[0]  - tmpdot[3].hi[0]*tmpdot[3].hi[0];
			qsi.hi[0] = (tmpdot[0].hi[0]*tmpdot[1].hi[0] - tmpdot[2].hi[0]*tmpdot[3].hi[0]) / tmp.hi[0];
			eta.hi[0] = (tmpdot[4].hi[0]*tmpdot[2].hi[0] - tmpdot[3].hi[0]*tmpdot[1].hi[0]) / tmp.hi[0];
		}

		/* u = qsi*phat + eta*(t0hat - rhat + beta*u) */
		lis_vector_xpay(t0hat,beta.hi[0],u);
		lis_vector_axpy(-1,rhat,u);
		lis_vector_scale(eta.hi[0],u);
		lis_vector_axpy(qsi.hi[0],phat,u);

		/* z = qsi*rhat + eta*z - alpha*u */
		lis_vector_scale(eta.hi[0],z);
		lis_vector_axpy(qsi.hi[0],rhat,z);
		lis_vector_axpy(-alpha.hi[0],u,z);

		/* x = x + alpha*p + z */
		lis_vector_axpy(alpha.hi[0],p,x);
		lis_vector_axpy(1,z,x);
		
		/* r = t - eta*y - qsi*ttld */
		lis_vector_axpyz(-eta.hi[0],y,t,r);
		lis_vector_axpy(-qsi.hi[0],ttld,r);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol2 >= nrm2 )
		{
			solver->iter       = iter;
			solver->iter2      = iter;
			solver->ptime      = ptime;
			break;
		}

		lis_vector_copy(t,t0);
		rho_old.hi[0] = rho.hi[0];
	}

	r->precision = LIS_PRECISION_QUAD;
	p->precision = LIS_PRECISION_QUAD;
	t->precision = LIS_PRECISION_QUAD;
	t0->precision = LIS_PRECISION_QUAD;
	ptld->precision = LIS_PRECISION_QUAD;
	that->precision = LIS_PRECISION_QUAD;

	solver->options[LIS_OPTIONS_INITGUESS_ZEROS] = LIS_FALSE;
	lis_vector_copyex_mn(x,solver->xx);

	rho_old.hi[0] = 1.0;
	alpha.hi[0] = 1.0;
	qsi.hi[0] = 1.0;
	one.hi[0] = -1.0;

	/* Initial Residual */
	lis_solver_get_initial_residual(solver,NULL,NULL,r,&bnrm2);
	tol     = solver->tol;

	lis_solver_set_shadowresidual(solver,r,rtld);

	lis_vector_set_allex_nm(0.0, ttld);
	lis_vector_set_allex_nm(0.0, ptld);
	lis_vector_set_allex_nm(0.0, p);
	lis_vector_set_allex_nm(0.0, u);
	lis_vector_set_allex_nm(0.0, t);
	lis_vector_set_allex_nm(0.0, t0);

	for( iter2=iter+1; iter2<=maxiter; iter2++ )
	{
		/* rho = <rtld,r> */
		lis_vector_dotex_mmm(rtld,r,&rho);

		/* test breakdown */
		if( rho.hi[0]==0.0 && rho.lo[0]==0.0 )
		{
			solver->retcode   = LIS_BREAKDOWN;
			solver->iter      = iter2;
			solver->iter2     = iter;
			solver->resid     = nrm2;
			LIS_DEBUG_FUNC_OUT;
			return LIS_BREAKDOWN;
		}

		/* beta = (rho / rho_old) * (alpha / qsi) */
		lis_quad_div((LIS_QUAD *)beta.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)rho_old.hi);
		lis_quad_div((LIS_QUAD *)tmp.hi,(LIS_QUAD *)alpha.hi,(LIS_QUAD *)qsi.hi);
		lis_quad_mul((LIS_QUAD *)beta.hi,(LIS_QUAD *)beta.hi,(LIS_QUAD *)tmp.hi);

		/* w = ttld + beta*ptld */
		lis_vector_axpyzex_mmmm(beta,ptld,ttld,w);

		/* rhat = M^-1 * r */
		time = lis_wtime();
		lis_psolve(solver, r, rhat);
		ptime += lis_wtime()-time;

		/* p = rhat + beta*(p - u) */
		lis_vector_axpyex_mmm(one,u,p);
		lis_vector_xpayex_mmm(rhat,beta,p);
		
		/* ptld = A * p */
		lis_matvec(A,p,ptld);

		/* tmpdot[0] = <rtld,ptld> */
		lis_vector_dotex_mmm(rtld,ptld,&tmpdot[0]);
		/* test breakdown */
		/* */
		
		/* alpha = rho / tmpdot[0] */
		lis_quad_div((LIS_QUAD *)alpha.hi,(LIS_QUAD *)rho.hi,(LIS_QUAD *)tmpdot[0].hi);

		/* y = t - r + alpha*(-w + ptld) */
		lis_vector_axpyzex_mmmm(one,w,ptld,y);
		lis_vector_xpayex_mmm(t,alpha,y);
		lis_vector_axpyex_mmm(one,r,y);

		/* t = r - alpha*ptld */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_vector_axpyzex_mmmm(alpha,ptld,r,t);

		/* that  = M^-1 * t */
		/* phat  = M^-1 * ptld */
		/* t0hat = M^-1 * t0 */
		time = lis_wtime();
		lis_psolve(solver, t, that);
		lis_psolve(solver, ptld, phat);
		lis_psolve(solver, t0, t0hat);
		ptime += lis_wtime()-time;

		/* ttld = A * that */
		lis_matvec(A,that,ttld);

		/* tmpdot[0] = <y,y>       */
		/* tmpdot[1] = <ttld,t>    */
		/* tmpdot[2] = <y,t>       */
		/* tmpdot[3] = <ttld,y>    */
		/* tmpdot[4] = <ttld,ttld> */
		lis_vector_dotex_mmm(y,y,&tmpdot[0]);
		lis_vector_dotex_mmm(ttld,t,&tmpdot[1]);
		lis_vector_dotex_mmm(y,t,&tmpdot[2]);
		lis_vector_dotex_mmm(ttld,y,&tmpdot[3]);
		lis_vector_dotex_mmm(ttld,ttld,&tmpdot[4]);
		if(iter==1)
		{
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[1].hi,(LIS_QUAD *)tmpdot[4].hi);
			eta.hi[0] = 0.0;
			eta.lo[0] = 0.0;
		}
		else
		{
			lis_quad_mul((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_sqr((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)tmp.hi,(LIS_QUAD *)tmp.hi,(LIS_QUAD *)qsi.hi);

			lis_quad_mul((LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[2].hi,(LIS_QUAD *)tmpdot[3].hi);
			lis_quad_sub((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)eta.hi);
			lis_quad_div((LIS_QUAD *)qsi.hi,(LIS_QUAD *)qsi.hi,(LIS_QUAD *)tmp.hi);

			lis_quad_mul((LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[4].hi,(LIS_QUAD *)tmpdot[2].hi);
			lis_quad_mul((LIS_QUAD *)tmpdot[0].hi,(LIS_QUAD *)tmpdot[3].hi,(LIS_QUAD *)tmpdot[1].hi);
			lis_quad_sub((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmpdot[0].hi);
			lis_quad_div((LIS_QUAD *)eta.hi,(LIS_QUAD *)eta.hi,(LIS_QUAD *)tmp.hi);
		}

		/* u = qsi*phat + eta*(t0hat - rhat + beta*u) */
		lis_vector_xpayex_mmm(t0hat,beta,u);
		lis_vector_axpyex_mmm(one,rhat,u);
		lis_vector_scaleex_mm(eta,u);
		lis_vector_axpyex_mmm(qsi,phat,u);

		/* z = qsi*rhat + eta*z - alpha*u */
		lis_vector_scaleex_mm(eta,z);
		lis_vector_axpyex_mmm(qsi,rhat,z);
		lis_vector_axpyex_mmm(alpha,u,z);

		/* x = x + alpha*p + z */
		lis_quad_minus((LIS_QUAD *)alpha.hi);
		lis_quad_minus((LIS_QUAD *)one.hi);
		lis_vector_axpyex_mmm(alpha,p,x);
		lis_vector_axpyex_mmm(one,z,x);
		lis_quad_minus((LIS_QUAD *)one.hi);
		
		/* r = t - eta*y - qsi*ttld */
		lis_quad_minus((LIS_QUAD *)eta.hi);
		lis_quad_minus((LIS_QUAD *)qsi.hi);
		lis_vector_axpyzex_mmmm(eta,y,t,r);
		lis_vector_axpyex_mmm(qsi,ttld,r);
		lis_quad_minus((LIS_QUAD *)eta.hi);
		lis_quad_minus((LIS_QUAD *)qsi.hi);
		
		/* convergence check */
		lis_solver_get_residual[conv](r,solver,&nrm2);
		if( output )
		{
			if( output & LIS_PRINT_MEM ) solver->rhistory[iter2] = nrm2;
			if( output & LIS_PRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,nrm2);
		}

		if( tol > nrm2 )
		{
			solver->retcode    = LIS_SUCCESS;
			solver->iter       = iter2;
			solver->iter2      = iter;
			solver->resid      = nrm2;
			solver->ptime      = ptime;
			LIS_DEBUG_FUNC_OUT;
			return LIS_SUCCESS;
		}

		lis_vector_copyex_mm(t,t0);
		rho_old.hi[0] = rho.hi[0];
		rho_old.lo[0] = rho.lo[0];
	}
	solver->retcode   = LIS_MAXITER;
	solver->iter      = iter;
	solver->iter2     = iter2;
	solver->resid     = nrm2;
	LIS_DEBUG_FUNC_OUT;
	return LIS_MAXITER;
}
コード例 #7
0
LIS_INT lis_epi_quad(LIS_ESOLVER esolver)
{
  LIS_MATRIX        A;
  LIS_VECTOR        x;
  LIS_SCALAR        evalue;
  LIS_INT               emaxiter;
  LIS_REAL          tol;
  LIS_INT               iter,output;
  LIS_INT               nprocs,my_rank;
  LIS_REAL          nrm2,resid;
  LIS_QUAD_PTR      qdot_xz;
  LIS_VECTOR        z,q;
  double            times, ptimes;

  LIS_DEBUG_FUNC_IN;

  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];

  A = esolver->A;
  x = esolver->x;
  if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) 
    {
      lis_vector_set_all(1.0,x);
    }
  z = esolver->work[0];
  q = esolver->work[1];

  LIS_QUAD_SCALAR_MALLOC(qdot_xz,0,1);

  iter=0;
  while (iter<emaxiter)
    {
      iter = iter+1;

      /* x = x / ||x||_2 */
      lis_vector_nrm2(x, &nrm2);
      lis_vector_scale(1/nrm2, x);

      /* z = A * x */
      lis_matvec(A,x,z);

      /* evalue = <x,z> */
      lis_vector_dotex_mmm(x, z, &qdot_xz);
      lis_quad_minus((LIS_QUAD *)qdot_xz.hi);

      /* resid = ||z - evalue * x||_2 / |evalue| */
      lis_vector_axpyzex_mmmm(qdot_xz,x,z,q);
      lis_quad_minus((LIS_QUAD *)qdot_xz.hi);
      lis_vector_nrm2(q, &resid); 
      evalue = qdot_xz.hi[0];
      resid = fabs(resid / evalue);

      /* x = z */
      lis_vector_copy(z, x);

      /* convergence check */
      if( output )
  {
    if( output & LIS_EPRINT_MEM ) esolver->residual[iter] = resid;
    if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid);
  }

      if( tol >= resid )
  {
    esolver->retcode    = LIS_SUCCESS;
    esolver->iter       = iter;
    esolver->resid      = resid;
    esolver->evalue[0] = evalue;
    LIS_DEBUG_FUNC_OUT;
    return LIS_SUCCESS;
  }
    }
  esolver->retcode    = LIS_MAXITER;
  esolver->iter      = iter;
  esolver->resid     = resid;
  esolver->evalue[0] = evalue;
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}
コード例 #8
0
ファイル: lis_esolver_ii.c プロジェクト: rwl/lis
LIS_INT lis_eii_quad(LIS_ESOLVER esolver)
{
  LIS_MATRIX A;
  LIS_VECTOR x;
  LIS_SCALAR evalue, ievalue;
  LIS_SCALAR lshift;
  LIS_INT emaxiter;
  LIS_REAL tol;
  LIS_INT iter,iter2,output;
  LIS_REAL nrm2,resid;
  LIS_QUAD_PTR qdot_xz;
  LIS_VECTOR z,q;
  LIS_SOLVER solver;
  double time,itime,ptime,p_c_time,p_i_time;
  LIS_INT err;
  LIS_PRECON precon;
  LIS_INT nsol, precon_type;
  char solvername[128], preconname[128];

  LIS_DEBUG_FUNC_IN;

  emaxiter = esolver->options[LIS_EOPTIONS_MAXITER];
  tol = esolver->params[LIS_EPARAMS_RESID - LIS_EOPTIONS_LEN]; 
  lshift = esolver->lshift;
  output  = esolver->options[LIS_EOPTIONS_OUTPUT];

  A = esolver->A;
  x = esolver->x;
  if (esolver->options[LIS_EOPTIONS_INITGUESS_ONES] ) 
    {
      lis_vector_set_all(1.0,x);
    }
  evalue = 1.0;
  z = esolver->work[0];
  q = esolver->work[1];

  LIS_QUAD_SCALAR_MALLOC(qdot_xz,0,1);

  iter=0;
  ievalue = 1/(evalue);
#ifdef _LONG__DOUBLE
  if( output & (A->my_rank==0) ) printf("local shift           : %Le\n", lshift);
#else
  if( output & (A->my_rank==0) ) printf("local shift           : %e\n", lshift);
#endif
  if (lshift != 0) lis_matrix_shift_diagonal(A, lshift);
  lis_solver_create(&solver);
  lis_solver_set_option("-i bicg -p none -precision quad",solver);
  lis_solver_set_optionC(solver);
  lis_solver_get_solver(solver, &nsol);
  lis_solver_get_precon(solver, &precon_type);
  lis_solver_get_solvername(nsol, solvername);
  lis_solver_get_preconname(precon_type, preconname);
  if( output & (A->my_rank==0) ) printf("linear solver         : %s\n", solvername);
  if( output & (A->my_rank==0) ) printf("preconditioner        : %s\n", preconname);

  /* create preconditioner */
  solver->A = A;
  err = lis_precon_create(solver, &precon);
  if( err )
    {
      lis_solver_work_destroy(solver);
      solver->retcode = err;
      return err;
    }

  while (iter<emaxiter)
    {
      iter = iter+1;

      /* x = x / ||x||_2 */
      lis_vector_nrm2(x, &nrm2);
      lis_vector_scale(1/nrm2, x);

      /* z = (A - lshift I)^-1 * x */
      lis_solve_kernel(A, x, z, solver, precon);
      lis_solver_get_iter(solver,&iter2);

      /* 1/evalue = <x,z> */
      lis_vector_dotex_mmm(x, z, &qdot_xz);
      lis_quad_minus((LIS_QUAD *)qdot_xz.hi);
      lis_vector_axpyzex_mmmm(qdot_xz,x,z,q);
      lis_quad_minus((LIS_QUAD *)qdot_xz.hi);
      ievalue = qdot_xz.hi[0];

      /* resid = ||z - 1/evalue * x||_2 / |1/evalue| */
      lis_vector_nrm2(q, &resid);
      resid = fabs(resid/ievalue);

      /* x = z */
      lis_vector_copy(z,x);

      /* convergence check */
      lis_solver_get_timeex(solver,&time,&itime,&ptime,&p_c_time,&p_i_time);
      esolver->ptime += solver->ptime;
      esolver->itime += solver->itime;
      esolver->p_c_time += solver->p_c_time;
      esolver->p_i_time += solver->p_i_time;

      if( output )
	{
	  if( output & LIS_EPRINT_MEM ) esolver->rhistory[iter] = resid;
	  if( output & LIS_EPRINT_OUT && A->my_rank==0 ) lis_print_rhistory(iter,resid);
	}

      if( tol >= resid ) 
	{
	  esolver->retcode    = LIS_SUCCESS;
	  esolver->iter[0]    = iter;
	  esolver->resid[0]   = resid;
	  esolver->evalue[0]  = 1/ievalue;
	  lis_vector_nrm2(x, &nrm2);
	  lis_vector_scale(1/nrm2, x);
	  if (lshift != 0) lis_matrix_shift_diagonal(A, -lshift);
	  lis_precon_destroy(precon);
	  lis_solver_destroy(solver); 
	  LIS_DEBUG_FUNC_OUT;
	  return LIS_SUCCESS;
	}
    }

  lis_precon_destroy(precon);

  esolver->retcode    = LIS_MAXITER;
  esolver->iter[0]    = iter;
  esolver->resid[0]   = resid;
  esolver->evalue[0]  = 1/ievalue;
  lis_vector_nrm2(x, &nrm2);
  lis_vector_scale(1/nrm2, x);
  if (lshift != 0) 
    {
      lis_matrix_shift_diagonal(A, -lshift);
    }
  lis_solver_destroy(solver); 
  LIS_DEBUG_FUNC_OUT;
  return LIS_MAXITER;
}